Use RDataFrame with THnSparse

Okay, I’ve found a solution. Thank you for your advice and for pointing me to the tutorial. My code follows below for anyone interested down the line.


def declare():
    'declares class SparseHelper to enable RDataFrame to fill a THnSparse'
    'it is based on $ROOTSYS/tutorials/dataframe/df018_customActions.C'
    import ROOT
    
    ROOT.gROOT.ProcessLine(r'''
        // This is a custom action which respects a well defined interface. It supports parallelism,
        // in the sense that it behaves correctly if implicit multi threading is enabled.
        // We template it on:
        // - The type of the internal THnSparseT(s)
        // - The dimension of the internal THnSparseT(s)
        // Note the plural: in presence of a MT execution, internally more than a single THnSparseT is created.
        // Note also that when booking, the last column will be understood as the weight
        template <typename T, unsigned int NDIM>
        class SparseHelper : public ROOT::Detail::RDF::RActionImpl<SparseHelper<T, NDIM>> {
        public:
            /// This is a handy, expressive shortcut.
            using THn_t = THnSparseT<T>;
            /// This type is a requirement for every helper.
            using Result_t = THn_t;
        
        private:
            std::vector<std::shared_ptr<THn_t>> fHistos; // one per data processing slot
        
        public:
            /// This constructor takes all the parameters necessary to build the THnSparseTs. In addition, it requires the names of
            /// the columns which will be used.
            SparseHelper(std::string_view name, std::string_view title, std::array<int, NDIM> nbins, std::array<double, NDIM> xmins,
                      std::array<double, NDIM> xmax)
            {
                const auto nSlots = ROOT::IsImplicitMTEnabled() ? ROOT::GetImplicitMTPoolSize() : 1;
                for (auto i : ROOT::TSeqU(nSlots)) {
                    fHistos.emplace_back(std::make_shared<THn_t>(std::string(name).c_str(), std::string(title).c_str(),
                                                                 NDIM, nbins.data(), xmins.data(), xmax.data()));
                    (void)i;
               }
            }
            SparseHelper(SparseHelper &&) = default;
            SparseHelper(const SparseHelper &) = delete;
            std::shared_ptr<THn_t> GetResultPtr() const { return fHistos[0]; }
            void Initialize() {}
            void InitTask(TTreeReader *, unsigned int) {}
            /// This is a method executed at every entry
            template <typename... ColumnTypes>
            void Exec(unsigned int slot, ColumnTypes... values)
            {
                // Since THnSparseT<T>::Fill expects a double*, we build it passing through a std::array.
                std::array<double, sizeof...(ColumnTypes)> _valuesArr{static_cast<double>(values)...};
                // We understand the last item in the list as a weight
                std::array<double, sizeof(_valuesArr) - 1> valuesArr;
                for(int i=0; i<sizeof(_valuesArr) - 1; i++){
                    valuesArr[i] = _valuesArr[i];
                }
                auto w = _valuesArr.back();
                fHistos[slot]->Fill(valuesArr.data(), w);
            }
            /// This method is called at the end of the event loop. It is used to merge all the internal THnSparseTs which
            /// were used in each of the data processing slots.
            void Finalize()
            {
                auto &res = fHistos[0];
                for (auto slot : ROOT::TSeqU(1, fHistos.size())) {
                    res->Add(fHistos[slot].get());
                }
            }
            
            std::string GetActionName(){
                return "SparseHelper";
            }
        };
        ''')


def fill(df, hnm, htit, brlist, weightbr):
    'create THnSparse(hnm, htit, ...) wrapped in RResultPtr'
    
    import ROOT
    
    br_and_weight_list = brlist + [weightbr]
    
    ROOT.gROOT.ProcessLine('''
        ROOT::RDF::RResultPtr<THnSparseT<TArrayD> > CppFunctionThatCallsFillFor_{hnm}(ROOT::RDF::{dfclassnm} df)
        {{
            // Our Helper type: templated on the internal THnSparseT type, the size, the types of the columns
            // we'll use to fill.
            using Helper_t = SparseHelper<TArrayD, {ndims}>;
            
            Helper_t helper{{"{hnm}",
                            "{htit}",
                            {{{nbins}}},
                            {{{xmins}}},
                            {{{xmaxs}}}}};
            
            // We book the action: it will be treated during the event loop.
            auto myTHnSparseT = df.Book<{dattyps}>(std::move(helper), {{{br_and_weight_nms}}});
            
            return myTHnSparseT;
        }}
    '''.format(
        ndims=len(brlist),
        dfclassnm=df.__class__.__name__,
        hnm=hnm,
        htit=htit,
        nbins=','.join(str(x.nBins) for x in brlist),
        xmins=','.join(str(x.loBin) for x in brlist),
        xmaxs=','.join(str(x.hiBin) for x in brlist),
        dattyps=','.join(x.datatype for x in br_and_weight_list),
        br_and_weight_nms=','.join('"{}"'.format(x.branch) for x in br_and_weight_list))
    )
    return getattr(ROOT, 'CppFunctionThatCallsFillFor_' + hnm)(df)


def test():
    import ROOT
    
    ROOT.gROOT.SetBatch(True)
    ROOT.ROOT.EnableImplicitMT()
    declare()
    df = ROOT.RDataFrame(10000).Define('x0', 'gRandom->Uniform(-5, 5)').Define('x1', '(float)gRandom->Uniform(0, 10)').Define('x2', '(int)gRandom->Uniform(-100, 100)').Define('weight', '(double) 1.')
    
    class mybranch():
        def __init__(self, branch, nBins=0, loBin=0, hiBin=0, datatype=None):
            self.branch = branch
            self.nBins = nBins
            self.loBin = loBin
            self.hiBin = hiBin
            self.datatype = datatype
    
    brlist = [
        mybranch('x0', 10, -5, 5, datatype='double'),
        mybranch('x1', 10, 0, 10, datatype='float'),
        mybranch('x2', 10, -100, 100, datatype='int'),
    ]
    wbr = mybranch('weight', datatype='double')
    myhist1 = fill(df, 'myhist1', 'myhist1', brlist[:1], wbr)
    myhist2 = fill(df, 'myhist2', 'myhist2', brlist[:2], wbr)
    myhist3 = fill(df, 'myhist3', 'myhist3', brlist[:3], wbr)
    c = ROOT.TCanvas()
    hs = ROOT.THStack()
    hs.Add(myhist1.Projection(0))
    hs.Add(myhist2.Projection(1))
    hs.Add(myhist3.Projection(2))
    hs.Draw('nostack plc pmc')
    c.BuildLegend()
    c.SaveAs('test.png')


if __name__ == '__main__':
    test()
1 Like