Hi,
I am trying to write a tree with RDataFrame. The tree has some branches which will be filled with different values for different user-defined variables. This is the code I want to write:
/*
"""
"""
*/
#include <iostream>
#include "TFile.h"
#include "TH1.h"
#include "TH2.h"
#include "TString.h"
#include <string>
#include <sys/stat.h>
#include "TChain.h"
#include "TTree.h"
#include <ROOT/RDataFrame.hxx>
#include <algorithm>
#include <time.h>
#include <chrono> // for high_resolution_clock
using namespace std;
using namespace ROOT;
Float_t find_trigger_weight(int channel, float lep_pT, float lep_eta, TFile *weight_dic, float lep2_pT=0, int up_or_down=0){
//# find trigger weight
TString name = "";
if(channel == 1){
if(lep_pT < 65) name = "eff_low_ee";
else name = "eff_high_ee";
}
else if(channel == 0){
if(lep_pT < 52.5) name = "eff_low_mm";
else name = "eff_high_mm";
}
else if(channel == 2){
if(lep_pT > 65) name = "eff_high_em";
else if(lep2_pT > 52.5) name = "eff_high_me";
else name = "eff_low_em";
}
else if(channel == 3){
if(lep2_pT > 65) name = "eff_high_em";
else if(lep_pT > 52.5) name = "eff_high_me";
else name = "eff_low_me";
}
else{
std::cout << "Channel makes no sense." << channel << std::endl;
return -1;
}
//#find pt/eta bin
TH2D* histWeights = (TH2D*)weight_dic->Get(name);
Int_t binNumber = histWeights->FindBin(lep_eta, lep_pT);
Float_t trigWeight = histWeights->GetBinContent(binNumber);
Float_t trigWeightErr = histWeights->GetBinError(binNumber);
if(up_or_down == -1) return (trigWeight-trigWeightErr);
else if(up_or_down == 1) return (trigWeight+trigWeightErr);
else return trigWeight;
}
Float_t find_weight(float val_pt, float val_eta, TFile *weight_dic, bool do_err_up, bool do_err_down, TString era, int lepNumber){
/// find identification weight
TString lepNumberStr = Form("%d",lepNumber);
TH2D *h_Weights = (TH2D*)weight_dic->Get("h_Weight_"+era+"_"+lepNumberStr);
int binNumber = h_Weights->FindBin(val_eta, val_pt);
Float_t kWeight = h_Weights->GetBinContent(binNumber);
Float_t kWeightErr = h_Weights->GetBinError(binNumber);
if(do_err_up) return (kWeight + kWeightErr);
else if(do_err_down) return (kWeight - kWeightErr);
else return (kWeight);
}
float applyWeights(float pt0, float pt1, float eta0, float eta1, string sampleTag, string era, int channel, string needWeight, string outputFolder){
bool do_err_up = false;
bool do_err_down = false;
TString eraT = (TString)era;
TString sampleTagT = (TString)sampleTag;
TString outputDirT = (TString)outputFolder;
TString lep1FileName = outputDirT+"TxtFiles/kFactors_lep1_"+eraT+"_"+sampleTagT+".root";
TFile *lep1File = new TFile(lep1FileName);
TString lep0FileName = outputDirT+"TxtFiles/kFactors_lep0_"+eraT+"_"+sampleTagT+".root";
TFile *lep0File = new TFile(lep0FileName);
TString alphaFileName = outputDirT+"TxtFiles/alphaFactors_"+eraT+"_"+sampleTagT+".root";
TFile *alphaFile = new TFile(alphaFileName);
//### getting the weights
float ee_weight = .5;
float mm_weight = .5;
float ee_weight_up = .5;
float mm_weight_up = .5;
float ee_weight_down = .5;
float mm_weight_down = .5;
//### em channel
if(channel == 2){
ee_weight *= find_weight(pt1, fabs(eta1), lep1File, do_err_up, do_err_down, eraT, 1);
mm_weight /= find_weight(pt0, fabs(eta0), lep0File, do_err_up, do_err_down, eraT, 0);
ee_weight_up *= find_weight(pt1, fabs(eta1), lep1File, true, do_err_down, eraT, 1);
mm_weight_up /= find_weight(pt0, fabs(eta0), lep0File, true, do_err_down, eraT, 0);
ee_weight_down *= find_weight(pt1, fabs(eta1), lep1File, do_err_up, true, eraT, 1);
mm_weight_down /= find_weight(pt0, fabs(eta0), lep0File, do_err_up, true, eraT, 0);
}
//### me channel
if(channel == 3){
ee_weight *= find_weight(pt0, fabs(eta0), lep0File, do_err_up, do_err_down, eraT, 0);
mm_weight /= find_weight(pt1, fabs(eta1), lep1File, do_err_up, do_err_down, eraT, 1);
ee_weight_up *= find_weight(pt0, fabs(eta0), lep0File, true, do_err_down, eraT, 0);
mm_weight_up /= find_weight(pt1, fabs(eta1), lep1File, true, do_err_down, eraT, 1);
ee_weight_down *= find_weight(pt0, fabs(eta0), lep0File, do_err_up, true, eraT, 0);
mm_weight_down /= find_weight(pt1, fabs(eta1), lep1File, do_err_up, true, eraT, 1);
}
float fsKLep0 = find_weight(pt0, fabs(eta0), lep0File, do_err_up, do_err_down, eraT, 0);
float fsKLep1 = find_weight(pt1, fabs(eta1), lep1File, do_err_up, do_err_down, eraT, 1);
//### getting the trigger weight
float trigger_weight = 1.0;
float fsAlpha = 1.0;
float trigger_weight_up = 1.0;
float trigger_weight_down = 1.0;
if(find_trigger_weight(channel,pt0,fabs(eta0),alphaFile, pt1) == 0){
trigger_weight = 1;
fsAlpha = 1;
}
else{
trigger_weight = sqrt( find_trigger_weight(0,pt0,fabs(eta0),alphaFile) * find_trigger_weight(1,pt0,fabs(eta0),alphaFile) ) / find_trigger_weight(channel,pt0,fabs(eta0),alphaFile,pt1);
trigger_weight_up = sqrt( find_trigger_weight(0,pt0,fabs(eta0),alphaFile,0, 1) * find_trigger_weight(1,pt0,fabs(eta0),alphaFile,0, 1) ) / find_trigger_weight(channel,pt0,fabs(eta0),alphaFile,pt1,-1);
trigger_weight_down = sqrt( find_trigger_weight(0,pt0,fabs(eta0),alphaFile, 0, -1) * find_trigger_weight(1,pt0,fabs(eta0),alphaFile, 0, -1) ) / find_trigger_weight(channel,pt0,fabs(eta0),alphaFile,pt1,1);
fsAlpha = sqrt( find_trigger_weight(0,pt0,fabs(eta0),alphaFile) * find_trigger_weight(1,pt0,fabs(eta0),alphaFile) ) / find_trigger_weight(channel,pt0,fabs(eta0),alphaFile,pt1);
}
ee_weight *= trigger_weight;
mm_weight *= trigger_weight;
ee_weight_up *= trigger_weight_up;
mm_weight_up *= trigger_weight_up;
ee_weight_down *= trigger_weight_down;
mm_weight_down *= trigger_weight_down;
if (needWeight == "ee_weight") return ee_weight;
else if (needWeight == "mm_weight") return mm_weight;
else if (needWeight == "ee_weight_up") return ee_weight_up;
else if (needWeight == "mm_weight_up") return mm_weight_up;
else if (needWeight == "ee_weight_down") return ee_weight_down;
else if (needWeight == "mm_weight_down") return mm_weight_down;
else if (needWeight == "fsAlpha") return fsAlpha;
else if (needWeight == "fsKLep0") return fsKLep0;
else if (needWeight == "fsKLep1") return fsKLep1;
else{
std::cout << "Wrong weights wanted!!!" << std::endl;
return -999;
}
}
void makeMyFSNtuple(string sampleTag, string runTag, string eraS, string versionS){
//### boolean to know which process should be done
// Record start time
auto start = std::chrono::high_resolution_clock::now();
bool isData = false;
bool isMC = false;
TString mcWeight = "";
if (sampleTag == "Data"){
isData = true;
mcWeight = "1.0";
}
else if (sampleTag == "MC"){
isMC = true;
mcWeight = "(genWeight * eventWeight * leptonWeight * jvtWeight * bTagWeight * pileupWeight * FFWeight)"; //### get the MC weight for ttbar sample
}
else{
std::cout<< "Unknown option for Data/MC" << std::endl;
return;
}
std::cout << "Making FS ntuple for " << sampleTag << std::endl;
//### Get all relevant settings from settings.py ###
string eosDirS = "/eos/atlas/atlascerngroupdisk/phys-susy/2L2J-ANA-SUSY-2018-05/SusySkim2LJets/"+versionS+"/SUSY2/"; // eos directory address
string outputDirS = versionS+"/"; // the output directory
int status = mkdir(outputDirS.c_str(),0777);
int status2 = mkdir((outputDirS+"NTuples").c_str(),0777);
//### Prepare output files ###
string fileOut = outputDirS+"Ntuples/fsTree_"+versionS+"_"+sampleTag+"_"+eraS+".root";
string treeOut = "outputTree";
//string fileOut = string(fileOutS);
bool do_err_up = false;
bool do_err_down = false;
//### variable to store the total events in the input file and total events saved in the output
Long64_t totEvents = 0;
Long64_t totSaveEvents = 0;
//#### Start loop over input samples ###
Long64_t nEvents = 0;
Long64_t nSaveEvents = 0;
std::cout << "----------" << std::endl;
std::cout << "Processing " << sampleTag << std::endl;
float lumi = 1.0;
string treeInS = "";
string treeInFileS = "";
//########## change this for v1.2
if (sampleTag == "Data"){
std::cout << "Including Data" << std::endl;
treeInS = "data";
treeInFileS = eosDirS+"SUSY2_Data/"+eraS+"_merged_processed.root";
lumi = 1.0;
}
else{
std::cout << "Including ttbar" << std::endl;
treeInS = "ttbar_NoSys";
string eraName = "";
if(eraS == "MC16a"){
lumi = 36200.0;
eraName = "mc16a";
}
else if(eraS == "MC16cd"){
lumi = 43800.0;
eraName = "mc16cd";
}
treeInFileS = eosDirS+"SUSY2_Bkgs_"+eraName+"/ttbar_merged_processed.root";
}
ROOT::RDataFrame d(treeInS, treeInFileS);
auto count = d.Count();
//### filtering the tree to make a smaller tree
auto dSmall = d.Filter("trigMatch_electronPlusMuonTrig!=0");
auto dCut = dSmall.Filter("nLep_signal >= 2")
.Filter("channel>1");
//# Determine the number of events to loop over
unsigned long long rangeNumber = -1;
unsigned long long maxEvents = 5000;
if (runTag == "Test")
rangeNumber = std::min(maxEvents,*count);
else
rangeNumber = *count;
//# Start loop over all events
std::cout << "Looping over " << rangeNumber << " events" << std::endl;
//# Retrieve all kinematic parameters from the event
vector<string> colNames;
colNames.push_back("met");
colNames.push_back("metPhi");
colNames.push_back("ht");
colNames.push_back("mll");
colNames.push_back("dPhi1");
colNames.push_back("dPhi2");
colNames.push_back("zPt");
colNames.push_back("mt2w");
colNames.push_back("jetN");
colNames.push_back("bjetN");
colNames.push_back("lepN");
colNames.push_back("leadLepPt");
colNames.push_back("trailLepPt");
colNames.push_back("leadLepEta");
colNames.push_back("trailLepEta");
colNames.push_back("leadLepPhi");
colNames.push_back("trailLepPhi");
colNames.push_back("runNumber");
colNames.push_back("eventNumber");
colNames.push_back("eeTrig");
colNames.push_back("eeTrigMatch");
colNames.push_back("mmTrig");
colNames.push_back("mmTrigMatch");
colNames.push_back("emTrig");
colNames.push_back("emTrigMatch");
colNames.push_back("isOS");
if(!isData){
colNames.push_back("genWeight");
colNames.push_back("eventWeight");
colNames.push_back("leptonWeight");
colNames.push_back("jvtWeight");
colNames.push_back("bTagWeight");
colNames.push_back("pileupWeight");
colNames.push_back("FFWeight");
}
auto dOut = dCut.Range(rangeNumber);
auto dNew = dOut.Define("met", "met_Et")
.Define("metPhi", "met_Phi")
.Define("ht", "Ht30")
.Define("dPhi1", "DPhiJ1Met")
.Define("dPhi2", "DPhiJ2Met")
.Define("zPt", "Ptll")
.Define("mt2w", "mt2leplsp_0")
.Define("jetN", "nJet30")
.Define("bjetN", "nBJet30_MV2c10_FixedCutBEff_77")
.Define("lepN", "nLep_signal")
.Define("leadLepPt", "lepPt[0]")
.Define("trailLepPt", "lepPt[1]")
.Define("leadLepEta", "lepEta[0]")
.Define("trailLepEta", "lepEta[1]")
.Define("leadLepPhi", "lepPhi[0]")
.Define("trailLepPhi", "lepPhi[1]")
.Define("runNumber", "RunNumber")
.Define("eventNumber", "EventNumber")
.Define("eeTrig", "trigWeight_1L2LTrig")
.Define("eeTrigMatch", "trigMatch_1L2LTrig")
.Define("mmTrig", "trigWeight_diMuonTrig")
.Define("mmTrigMatch", "trigMatch_diMuonTrig")
.Define("emTrig", "trigWeight_electronPlusMuonTrig")
.Define("emTrigMatch", "trigMatch_diMuonTrig")
.Define("isOS", "int OS = -1; (lepCharge[0]!=lepCharge[1] ? OS=1:OS=0); return OS;")
.Define("fs_mc_weight","mcWeight*lumi")
.Define("fs_weight_ee","fs_mc_weight*applyWeights(lepPt[0], lepPt[1], lepEta[0], lepEta[1], sampleTag, era, channel, \"ee_weight\", outputDirS)")
.Define("fs_weight_mm","fs_mc_weight*applyWeights(lepPt[0], lepPt[1], lepEta[0], lepEta[1], sampleTag, era, channel, \"mm_weight\", outputDirS)")
.Define("fs_weight_ee_up","fs_mc_weight*applyWeights(lepPt[0], lepPt[1], lepEta[0], lepEta[1], sampleTag, era, channel, \"ee_weight_up\", outputDirS)")
.Define("fs_weight_mm_up","fs_mc_weight*applyWeights(lepPt[0], lepPt[1], lepEta[0], lepEta[1], sampleTag, era, channel, \"mm_weight_up\", outputDirS)")
.Define("fs_weight_ee_down","fs_mc_weight*applyWeights(lepPt[0], lepPt[1], lepEta[0], lepEta[1], sampleTag, era, channel, \"ee_weight_down\", outputDirS)")
.Define("fs_weight_mm_down","fs_mc_weight*applyWeights(lepPt[0], lepPt[1], lepEta[0], lepEta[1], sampleTag, era, channel, \"mm_weight_down\", outputDirS)")
.Define("fs_alpha","fs_mc_weight*applyWeights(lepPt[0], lepPt[1], lepEta[0], lepEta[1], sampleTag, era, channel, \"fsAlpha\", outputDirS)")
.Define("fs_k_lep0","fs_mc_weight*applyWeights(lepPt[0], lepPt[1], lepEta[0], lepEta[1], sampleTag, era, channel, \"fsKLep0\", outputDirS)")
.Define("fs_k_lep1","fs_mc_weight*applyWeights(lepPt[0], lepPt[1], lepEta[0], lepEta[1], sampleTag, era, channel, \"fsKLep1\", outputDirS)")
.Define("fsChannel","channel");
dNew.Snapshot(treeOut, fileOut, colNames);
// Record end time
auto finish = std::chrono::high_resolution_clock::now();
std::chrono::duration<double> elapsed = finish - start;
std::cout << "Elapsed time: " << elapsed.count() << " s" << std::endl;
}
Here I get the complaint that for â.Define(âfs_mc_weightâ,âmcWeight*lumiâ)â, mcWeight and lumi are undeclared. This is the error message:
input_line_119:1:47: error: use of undeclared identifier 'mcWeight'
namespace __tdf_28{ auto tdf_f = []() {return mcWeight
^
Error in <TRint::HandleTermInput()>: std::runtime_error caught: Cannot interpret the following expression:
mcWeight
Make sure it is valid C++.
I think I will get similar error for this line too:
Define("fs_weight_ee_up","fs_mc_weight*applyWeights(lepPt[0], lepPt[1], lepEta[0], lepEta[1], sampleTag, era, channel, \"ee_weight_up\", outputDirS)")
as sampleTag, era and outputDirS are user-defined.
May I know how I should use the user-defined variable in RDataFrame scenario?
Thank you,
Arka