struct TData { std::string fUserId; std::string fCountry; std::string fZipCode; std::string fOrder; Long64_t fCount; double fPrice; // per year !! int fSYear; int fEYear; }; //______________________________________________________________________________________ auto MakeDataFrame( ROOT::RDF::RNode tdf_csv) { auto calcColumns = [](const std::string &x,int sYear,int eYear) { static TPRegexp rl("(\\d{1,2})-Jahreskarte (\\d{4})"); if (x.find("Jahreskarte") != std::string::npos) { const TObjArray *subStrL = rl.MatchS(x); if (subStrL->GetLast()+1 == 3) { const int length = ((TObjString *)subStrL->At(1))->GetString().Atoi(); sYear = ((TObjString *)subStrL->At(2))->GetString().Atoi(); eYear = sYear+length-1; } else { Error("MakeDataFrame","Invalid data entry: %s",x.c_str()); } delete subStrL; } else { std::cout << "line not assigned: " << x << std::endl; } }; // Insert a few columns auto tdf = tdf_csv.Define("fSYear", [](){return 0;}) .Define("fEYear", [](){return -1;}); // calcute the new columns tdf.Foreach(calcColumns,{"fTicket","fSYear","fEYear"}); Long64_t count_sum = 0; tdf.Foreach([&count_sum](Long64_t count){count_sum += count;},{"fCount"} ); std::cout << "sum counts in csv: " << count_sum << std::endl; return tdf; } //______________________________________________________________________________________ auto FilterDataFrame( ROOT::RDF::RNode tdf, const std::string &select) { auto userId_A = tdf.Take("fUserId"); auto country_A = tdf.Take("fCountry"); auto zipCode_A = tdf.Take("fZipCode"); auto order_A = tdf.Take("fOrder"); auto count_A = tdf.Take("fCount"); auto price_A = tdf.Take("fPrice"); auto syear_A = tdf.Take("fSYear"); auto eyear_A = tdf.Take("fEYear"); auto N = userId_A->size(); std::vector indexSort(N); TMath::Sort(Long64_t(N),userId_A->data(),indexSort.data(),kFALSE); // Consolidate the duplicate userId's ; user could have had several different orders // in one year // But only consolidate orders with same SYear and EYear std::vector data; ULong64_t iev = 0; while (iev < N) { TData d; d.fUserId = userId_A->at(indexSort[iev]); d.fCountry = country_A->at(indexSort[iev]); d.fZipCode = zipCode_A->at(indexSort[iev]); d.fOrder = order_A->at(indexSort[iev]); d.fCount = count_A->at(indexSort[iev]); d.fPrice = count_A->at(indexSort[iev])*price_A->at(indexSort[iev]); d.fSYear = syear_A->at(indexSort[iev]); d.fEYear = eyear_A->at(indexSort[iev]); ++iev; } ROOT::RDataFrame tdf_empty(data.size()); auto tdf_cons = tdf_empty; tdf_cons.DefineSlotEntry("data",[&data](unsigned int /*slot*/,ULong64_t entry) {return data[entry]; } ) .Define("fUserId", [](TData &d) {return d.fUserId; },{"data"}) .Define("fCountry",[](TData &d) {return d.fCountry;},{"data"}) .Define("fZipCode",[](TData &d) {return d.fZipCode;},{"data"}) .Define("fOrder", [](TData &d) {return d.fOrder; },{"data"}) .Define("fCount", [](TData &d) {return d.fCount; },{"data"}) .Define("fPrice", [](TData &d) {return d.fPrice; },{"data"}) .Define("fSYear", [](TData &d) {return d.fSYear; },{"data"}) .Define("fEYear", [](TData &d) {return d.fEYear; },{"data"}); std::cout << "select: " << select << std::endl; auto tdf_cut = tdf_cons.Filter(select); auto counts = tdf_cut.Count(); std::cout << "tdf_cut.Count(): " << *counts << std::endl; return tdf_cut; } //______________________________________________________________________________________ void mytest2() { std::string fname_csv = "ticket2.txt"; auto d_csv = ROOT::RDF::MakeCsvDataFrame(fname_csv); auto d = MakeDataFrame(d_csv); auto colNames = d.GetColumnNames(); for (auto &&name: colNames) {std::cout << name <