Okay, I’ve found a solution. Thank you for your advice and for pointing me to the tutorial. My code follows below for anyone interested down the line.
def declare():
'declares class SparseHelper to enable RDataFrame to fill a THnSparse'
'it is based on $ROOTSYS/tutorials/dataframe/df018_customActions.C'
import ROOT
ROOT.gROOT.ProcessLine(r'''
// This is a custom action which respects a well defined interface. It supports parallelism,
// in the sense that it behaves correctly if implicit multi threading is enabled.
// We template it on:
// - The type of the internal THnSparseT(s)
// - The dimension of the internal THnSparseT(s)
// Note the plural: in presence of a MT execution, internally more than a single THnSparseT is created.
// Note also that when booking, the last column will be understood as the weight
template <typename T, unsigned int NDIM>
class SparseHelper : public ROOT::Detail::RDF::RActionImpl<SparseHelper<T, NDIM>> {
public:
/// This is a handy, expressive shortcut.
using THn_t = THnSparseT<T>;
/// This type is a requirement for every helper.
using Result_t = THn_t;
private:
std::vector<std::shared_ptr<THn_t>> fHistos; // one per data processing slot
public:
/// This constructor takes all the parameters necessary to build the THnSparseTs. In addition, it requires the names of
/// the columns which will be used.
SparseHelper(std::string_view name, std::string_view title, std::array<int, NDIM> nbins, std::array<double, NDIM> xmins,
std::array<double, NDIM> xmax)
{
const auto nSlots = ROOT::IsImplicitMTEnabled() ? ROOT::GetImplicitMTPoolSize() : 1;
for (auto i : ROOT::TSeqU(nSlots)) {
fHistos.emplace_back(std::make_shared<THn_t>(std::string(name).c_str(), std::string(title).c_str(),
NDIM, nbins.data(), xmins.data(), xmax.data()));
(void)i;
}
}
SparseHelper(SparseHelper &&) = default;
SparseHelper(const SparseHelper &) = delete;
std::shared_ptr<THn_t> GetResultPtr() const { return fHistos[0]; }
void Initialize() {}
void InitTask(TTreeReader *, unsigned int) {}
/// This is a method executed at every entry
template <typename... ColumnTypes>
void Exec(unsigned int slot, ColumnTypes... values)
{
// Since THnSparseT<T>::Fill expects a double*, we build it passing through a std::array.
std::array<double, sizeof...(ColumnTypes)> _valuesArr{static_cast<double>(values)...};
// We understand the last item in the list as a weight
std::array<double, sizeof(_valuesArr) - 1> valuesArr;
for(int i=0; i<sizeof(_valuesArr) - 1; i++){
valuesArr[i] = _valuesArr[i];
}
auto w = _valuesArr.back();
fHistos[slot]->Fill(valuesArr.data(), w);
}
/// This method is called at the end of the event loop. It is used to merge all the internal THnSparseTs which
/// were used in each of the data processing slots.
void Finalize()
{
auto &res = fHistos[0];
for (auto slot : ROOT::TSeqU(1, fHistos.size())) {
res->Add(fHistos[slot].get());
}
}
std::string GetActionName(){
return "SparseHelper";
}
};
''')
def fill(df, hnm, htit, brlist, weightbr):
'create THnSparse(hnm, htit, ...) wrapped in RResultPtr'
import ROOT
br_and_weight_list = brlist + [weightbr]
ROOT.gROOT.ProcessLine('''
ROOT::RDF::RResultPtr<THnSparseT<TArrayD> > CppFunctionThatCallsFillFor_{hnm}(ROOT::RDF::{dfclassnm} df)
{{
// Our Helper type: templated on the internal THnSparseT type, the size, the types of the columns
// we'll use to fill.
using Helper_t = SparseHelper<TArrayD, {ndims}>;
Helper_t helper{{"{hnm}",
"{htit}",
{{{nbins}}},
{{{xmins}}},
{{{xmaxs}}}}};
// We book the action: it will be treated during the event loop.
auto myTHnSparseT = df.Book<{dattyps}>(std::move(helper), {{{br_and_weight_nms}}});
return myTHnSparseT;
}}
'''.format(
ndims=len(brlist),
dfclassnm=df.__class__.__name__,
hnm=hnm,
htit=htit,
nbins=','.join(str(x.nBins) for x in brlist),
xmins=','.join(str(x.loBin) for x in brlist),
xmaxs=','.join(str(x.hiBin) for x in brlist),
dattyps=','.join(x.datatype for x in br_and_weight_list),
br_and_weight_nms=','.join('"{}"'.format(x.branch) for x in br_and_weight_list))
)
return getattr(ROOT, 'CppFunctionThatCallsFillFor_' + hnm)(df)
def test():
import ROOT
ROOT.gROOT.SetBatch(True)
ROOT.ROOT.EnableImplicitMT()
declare()
df = ROOT.RDataFrame(10000).Define('x0', 'gRandom->Uniform(-5, 5)').Define('x1', '(float)gRandom->Uniform(0, 10)').Define('x2', '(int)gRandom->Uniform(-100, 100)').Define('weight', '(double) 1.')
class mybranch():
def __init__(self, branch, nBins=0, loBin=0, hiBin=0, datatype=None):
self.branch = branch
self.nBins = nBins
self.loBin = loBin
self.hiBin = hiBin
self.datatype = datatype
brlist = [
mybranch('x0', 10, -5, 5, datatype='double'),
mybranch('x1', 10, 0, 10, datatype='float'),
mybranch('x2', 10, -100, 100, datatype='int'),
]
wbr = mybranch('weight', datatype='double')
myhist1 = fill(df, 'myhist1', 'myhist1', brlist[:1], wbr)
myhist2 = fill(df, 'myhist2', 'myhist2', brlist[:2], wbr)
myhist3 = fill(df, 'myhist3', 'myhist3', brlist[:3], wbr)
c = ROOT.TCanvas()
hs = ROOT.THStack()
hs.Add(myhist1.Projection(0))
hs.Add(myhist2.Projection(1))
hs.Add(myhist3.Projection(2))
hs.Draw('nostack plc pmc')
c.BuildLegend()
c.SaveAs('test.png')
if __name__ == '__main__':
test()