Hi,
I’m working in a python environment but I’m declaring some C++ functions with gInterpreter to create some new columns/branches in an RDataFrame. I do this with DisableImplicitMT()
.
This function is as follows and defined using ROOT.gInterpreter.Declare
:
std::tuple<ROOT::RDF::RNode,ROOT::RDF::RNode> ws_rs_contamination(ROOT::RDataFrame ws_df, ROOT::RDataFrame rs_df){
ROOT::RDF::RNode ws_node = ws_df;
ROOT::RDF::RNode rs_node = rs_df;
ws_node = ws_node.Define("unique_event_number",[](const unsigned int& rn, const unsigned long long& en) -> unsigned long long {return 10e+9*rn+en;},{"runNumber","eventNumber"});
rs_node = rs_node.Define("unique_event_number",[](const unsigned int& rn, const unsigned long long& en) -> unsigned long long {return 10e+9*rn+en;},{"runNumber","eventNumber"});
std::vector<unsigned long long> uevs_ws;
std::vector<unsigned long long> uevs_rs;
ws_node.Foreach([&uevs_ws](const unsigned long long& uev){
uevs_ws.push_back(uev);
}, {"unique_event_number"});
std::cout << "Length of WS uev vector: " << uevs_ws.size() << std::endl;
rs_node.Foreach([&uevs_rs](const unsigned long long& uev){
uevs_rs.push_back(uev);
}, {"unique_event_number"});
std::cout << "Length of RS uev vector: " << uevs_rs.size() << std::endl;
//sort the uev vectors
std::sort(uevs_ws.begin(), uevs_ws.end());
std::sort(uevs_rs.begin(), uevs_rs.end());
//find unique event numbers that are in both the RS and WS.
std::vector<unsigned long long> common_uevs;
std::set_intersection(uevs_ws.begin(), uevs_ws.end(), uevs_rs.begin(), uevs_rs.end(), std::back_inserter(common_uevs));
ws_node = ws_node.Define("in_both",[&common_uevs] (unsigned long long uev) {return (bool)std::binary_search(common_uevs.begin(),common_uevs.end(),uev);},{"unique_event_number"});
rs_node = rs_node.Define("in_both",[&common_uevs] (unsigned long long uev) {return (bool)std::binary_search(common_uevs.begin(),common_uevs.end(),uev);},{"unique_event_number"});
return {ws_node,rs_node};
When I return the dataframes using this function and try the following:
ws_test,rs_test = ROOT.ws_rs_contamination(ws_frame_no_clones,rs_frame_no_clones)
arr = ws_test.AsNumpy(["in_both"])
I get a segfault with the error pointing to this
69 result_ptrs = {}
70 for column in columns:
---> 71 column_type = df.GetColumnType(column)
72 result_ptrs[column] = df.Take[column_type](column)
Is this an issue with gInterpreter and python or am I doing something wrong in my code?
Thanks
Please read tips for efficient and successful posting and posting code
_ROOT Version: v6.22