Hi,
I’m trying to find out whether a value of an event of one dataset for a column/branch is present in another dataset for the same column/branch.
Currently I have the following C++ declared in pyROOT (as an example):
void multiple_candidates(ROOT::RDataFrame ws_df, ROOT::RDataFrame rs_df){
ROOT::RDF::RNode ws_node = ws_df;
ROOT::RDF::RNode rs_node = rs_df;
ws_node = ws_node.Define("unique_event_number",[](const unsigned int& rn, const unsigned long long& en) -> unsigned long long {return 10e+9*rn+en;},{"runNumber","eventNumber"});
rs_node = rs_node.Define("unique_event_number",[](const unsigned int& rn, const unsigned long long& en) -> unsigned long long {return 10e+9*rn+en;},{"runNumber","eventNumber"});
std::vector<unsigned long long> uevs_ws;
std::vector<unsigned long long> uevs_rs;
ws_node.Foreach([&uevs_ws](const unsigned long long& uev){
uevs_ws.push_back(uev);
}, {"unique_event_number"});
std::cout << "Length of WS uev vector: " << uevs_ws.size() << std::endl;
rs_node.Foreach([&uevs_rs](const unsigned long long& uev){
uevs_rs.push_back(uev);
}, {"unique_event_number"});
std::cout << "Length of RS uev vector: " << uevs_rs.size() << std::endl;
//sort the uev vectors
std::sort(uevs_ws.begin(), uevs_ws.end());
std::sort(uevs_rs.begin(), uevs_rs.end());
//find unique event numbers that are in both the RS and WS.
std::vector<unsigned long long> common_uevs;
std::set_intersection(uevs_ws.begin(), uevs_ws.end(), uevs_rs.begin(), uevs_rs.end(), std::back_inserter(common_uevs));
//check if eventnumber is in common_uevs
//This is what I want to do. This doesn't work but perhaps it will with a lambda function???
ws_node = ws_node.Define("in_both","std::binary_search(unique_event_number,common_uevs) );
rs_node = rs_node.Define("in_both","std::binary_search(unique_event_number,common_uevs) );
}
It doesn’t seem to work using the method above but can it work with a lambda function? Is this the most efficient way to compare two dataframes?
_ROOT Version:v6.26.06