RDataFrame snapshot automatic type deduction performance

Ok I found a working solution.
Thanks a lot for pointing me to this gInterpreter->Declare.

#include <ROOT/RDataFrame.hxx>
#include "ROOT/RDF/RInterface.hxx"
#include <iostream>

ROOT::RDF::RNode defines(ROOT::RDF::RNode node, int ncols){
    if(ncols > 0){
        return defines(node.Define("x"+std::to_string(ncols), [ncols](){return ncols;}), ncols-1);
    }
    else{
        return node;
    }
}

int snapshotperf(){
    ROOT::RDataFrame df_orig(10);
    auto df = defines(df_orig, 3);
    std::time_t start = std::time(0);

    ROOT::RDF::RSnapshotOptions opts;
    opts.fLazy = true;
    using SnapRet_t = ROOT::RDF::RResultPtr<ROOT::RDF::RInterface<ROOT::Detail::RDF::RLoopManager>>;
    std::vector<SnapRet_t> rets;

    std::vector<std::string> columnnames = {"x1", "x2", "x3"};
    std::vector<std::string> columntypes = {"int", "int", "int"};
    std::string template_expr("<");
    for(int i = 0; i < columntypes.size(); i++){
        template_expr+=columntypes[i];
        if(i!= columntypes.size()-1)
            template_expr+=",";
    }
    template_expr+=">";

    start = std::time(0);
    std::string declare_expr(
        "ROOT::RDF::RResultPtr<ROOT::RDF::RInterface<ROOT::Detail::RDF::RLoopManager>> make_snap"
        "(ROOT::RDF::RNode df, std::string treename, std::string fname, std::vector<std::string> columnnames, ROOT::RDF::RSnapshotOptions opts){"
        "return df.Snapshot"+template_expr+"(treename, fname, columnnames, opts);"
        "}");

    gInterpreter->Declare(declare_expr.c_str());
    std::cout << "time to declare " << std::time(0)-start << std::endl;

    start = std::time(0);
    auto make_snap = (SnapRet_t (*)(ROOT::RDF::RNode,std::string,std::string,std::vector<std::string>, ROOT::RDF::RSnapshotOptions)) gInterpreter->ProcessLine("make_snap");
    std::cout << "time to get function " << std::time(0)-start << std::endl;

    start = std::time(0);
    for (auto i = 0; i < 5; ++i){
        SnapRet_t res = make_snap(df, "t", "f" + std::to_string(i) + ".root", columnnames, opts);
        rets.emplace_back(res);
    }
    std::cout << "time to create snapshots " << std::time(0)-start << std::endl;

    return df.Count().GetValue();
}
1 Like