Use of user-defined variables inside RDataFrame

arkasantra · November 14, 2018, 6:14pm

Hi,
I am trying to write a tree with RDataFrame. The tree has some branches which will be filled with different values for different user-defined variables. This is the code I want to write:


/*
"""

"""
*/
#include <iostream>
#include "TFile.h"
#include "TH1.h"
#include "TH2.h"
#include "TString.h"
#include <string>
#include <sys/stat.h>
#include "TChain.h"
#include "TTree.h"
#include <ROOT/RDataFrame.hxx>
#include <algorithm>
#include <time.h>
#include <chrono>  // for high_resolution_clock

using namespace std;
using namespace ROOT;

Float_t find_trigger_weight(int channel, float lep_pT, float lep_eta, TFile *weight_dic, float lep2_pT=0, int up_or_down=0){
    //# find trigger weight
    TString name  =  "";
    if(channel == 1){
        if(lep_pT < 65)         name = "eff_low_ee";
        else                    name = "eff_high_ee";
    }
    else if(channel == 0){
        if(lep_pT < 52.5)       name = "eff_low_mm";
        else                    name = "eff_high_mm";
    }
    else if(channel == 2){
        if(lep_pT > 65)         name = "eff_high_em";
        else if(lep2_pT > 52.5) name = "eff_high_me";
        else                    name = "eff_low_em";
    }
    else if(channel == 3){
        if(lep2_pT > 65)        name = "eff_high_em";
        else if(lep_pT > 52.5)  name = "eff_high_me";
        else                    name = "eff_low_me";
    }
    else{
        std::cout << "Channel makes no sense." << channel << std::endl;
        return -1;
    }
    //#find pt/eta bin
    TH2D* histWeights           = (TH2D*)weight_dic->Get(name);
    Int_t binNumber             = histWeights->FindBin(lep_eta, lep_pT);
    Float_t trigWeight          = histWeights->GetBinContent(binNumber);
    Float_t trigWeightErr       = histWeights->GetBinError(binNumber);

    if(up_or_down == -1)        return (trigWeight-trigWeightErr);
    else if(up_or_down ==  1)   return (trigWeight+trigWeightErr);
    else                        return trigWeight;
}

Float_t find_weight(float val_pt, float val_eta, TFile *weight_dic, bool do_err_up, bool do_err_down, TString era, int lepNumber){
    /// find identification weight
    TString lepNumberStr   = Form("%d",lepNumber);
    TH2D *h_Weights        = (TH2D*)weight_dic->Get("h_Weight_"+era+"_"+lepNumberStr);
    int binNumber          = h_Weights->FindBin(val_eta, val_pt);
    Float_t kWeight        = h_Weights->GetBinContent(binNumber);
    Float_t kWeightErr     = h_Weights->GetBinError(binNumber);
        
    if(do_err_up)          return (kWeight + kWeightErr);
    else if(do_err_down)   return (kWeight - kWeightErr);
    else                   return (kWeight);
}


float applyWeights(float pt0, float pt1, float eta0, float eta1, string sampleTag, string era, int channel, string needWeight, string outputFolder){
        
    bool do_err_up        = false;
    bool do_err_down      = false;
    TString eraT          = (TString)era;
    TString sampleTagT    = (TString)sampleTag;
    TString outputDirT    = (TString)outputFolder;
    
    TString lep1FileName  = outputDirT+"TxtFiles/kFactors_lep1_"+eraT+"_"+sampleTagT+".root";
    TFile *lep1File       = new TFile(lep1FileName);
    TString lep0FileName  = outputDirT+"TxtFiles/kFactors_lep0_"+eraT+"_"+sampleTagT+".root";
    TFile *lep0File       = new TFile(lep0FileName);
    TString alphaFileName = outputDirT+"TxtFiles/alphaFactors_"+eraT+"_"+sampleTagT+".root";
    TFile *alphaFile      = new TFile(alphaFileName);
    
    //### getting the weights
    float ee_weight       = .5;
    float mm_weight       = .5;
    float ee_weight_up    = .5;
    float mm_weight_up    = .5;
    float ee_weight_down  = .5;
    float mm_weight_down  = .5;
    
    //### em channel
    if(channel == 2){
        ee_weight      *= find_weight(pt1, fabs(eta1), lep1File, do_err_up, do_err_down, eraT, 1);
        mm_weight      /= find_weight(pt0, fabs(eta0), lep0File, do_err_up, do_err_down, eraT, 0);
        ee_weight_up   *= find_weight(pt1, fabs(eta1), lep1File, true, do_err_down, eraT, 1);
        mm_weight_up   /= find_weight(pt0, fabs(eta0), lep0File, true, do_err_down, eraT, 0);
        ee_weight_down *= find_weight(pt1, fabs(eta1), lep1File, do_err_up, true, eraT, 1);
        mm_weight_down /= find_weight(pt0, fabs(eta0), lep0File, do_err_up, true, eraT, 0);
    }

    //### me channel
    if(channel == 3){
        ee_weight      *= find_weight(pt0, fabs(eta0), lep0File, do_err_up, do_err_down, eraT, 0);
        mm_weight      /= find_weight(pt1, fabs(eta1), lep1File, do_err_up, do_err_down, eraT, 1);
        ee_weight_up   *= find_weight(pt0, fabs(eta0), lep0File, true, do_err_down, eraT, 0);
        mm_weight_up   /= find_weight(pt1, fabs(eta1), lep1File, true, do_err_down, eraT, 1);
        ee_weight_down *= find_weight(pt0, fabs(eta0), lep0File, do_err_up, true, eraT, 0);
        mm_weight_down /= find_weight(pt1, fabs(eta1), lep1File, do_err_up, true, eraT, 1);
    }
    
    float fsKLep0  =  find_weight(pt0, fabs(eta0), lep0File, do_err_up, do_err_down, eraT, 0);
    float fsKLep1  =  find_weight(pt1, fabs(eta1), lep1File, do_err_up, do_err_down, eraT, 1);
    
    //### getting the trigger weight
    float trigger_weight      = 1.0;
    float fsAlpha             = 1.0;
    float trigger_weight_up   = 1.0;
    float trigger_weight_down = 1.0;
    if(find_trigger_weight(channel,pt0,fabs(eta0),alphaFile, pt1) == 0){
        trigger_weight = 1;
        fsAlpha        = 1;
    }
    else{
        trigger_weight      = sqrt( find_trigger_weight(0,pt0,fabs(eta0),alphaFile) * find_trigger_weight(1,pt0,fabs(eta0),alphaFile) ) / find_trigger_weight(channel,pt0,fabs(eta0),alphaFile,pt1);
        trigger_weight_up   = sqrt( find_trigger_weight(0,pt0,fabs(eta0),alphaFile,0, 1) * find_trigger_weight(1,pt0,fabs(eta0),alphaFile,0, 1) ) / find_trigger_weight(channel,pt0,fabs(eta0),alphaFile,pt1,-1);
        trigger_weight_down = sqrt( find_trigger_weight(0,pt0,fabs(eta0),alphaFile, 0, -1) * find_trigger_weight(1,pt0,fabs(eta0),alphaFile, 0, -1) ) / find_trigger_weight(channel,pt0,fabs(eta0),alphaFile,pt1,1);
        fsAlpha            = sqrt( find_trigger_weight(0,pt0,fabs(eta0),alphaFile) * find_trigger_weight(1,pt0,fabs(eta0),alphaFile) ) / find_trigger_weight(channel,pt0,fabs(eta0),alphaFile,pt1);
    }

    ee_weight      *= trigger_weight;
    mm_weight      *= trigger_weight;
    ee_weight_up   *= trigger_weight_up;
    mm_weight_up   *= trigger_weight_up;
    ee_weight_down *= trigger_weight_down;
    mm_weight_down *= trigger_weight_down;
    
    if      (needWeight == "ee_weight")      return ee_weight;
    else if (needWeight == "mm_weight")      return mm_weight;
    else if (needWeight == "ee_weight_up")   return ee_weight_up;
    else if (needWeight == "mm_weight_up")   return mm_weight_up;
    else if (needWeight == "ee_weight_down") return ee_weight_down;
    else if (needWeight == "mm_weight_down") return mm_weight_down;
    else if (needWeight == "fsAlpha")        return fsAlpha;
    else if (needWeight == "fsKLep0")        return fsKLep0;
    else if (needWeight == "fsKLep1")        return fsKLep1;
    else{
        std::cout << "Wrong weights wanted!!!" << std::endl;
        return -999;
    }                                     
}


void makeMyFSNtuple(string sampleTag, string runTag, string eraS, string versionS){
    //### boolean to know which process should be done
    // Record start time
    auto start = std::chrono::high_resolution_clock::now();
    bool isData  =  false;
    bool isMC    =  false;

    TString mcWeight = "";
    if (sampleTag == "Data"){
        isData   = true;
        mcWeight = "1.0";
    }
    else if (sampleTag == "MC"){
        isMC     = true;
        mcWeight = "(genWeight * eventWeight * leptonWeight * jvtWeight * bTagWeight * pileupWeight * FFWeight)";   //### get the MC weight for ttbar sample
    }
    else{
        std::cout<< "Unknown option for Data/MC" << std::endl;
        return;
    }

    std::cout << "Making FS ntuple for " << sampleTag << std::endl;
    

    //### Get all relevant settings from settings.py ###
      
    string eosDirS    =  "/eos/atlas/atlascerngroupdisk/phys-susy/2L2J-ANA-SUSY-2018-05/SusySkim2LJets/"+versionS+"/SUSY2/"; // eos directory address
    string outputDirS =  versionS+"/"; // the output directory

    int status  = mkdir(outputDirS.c_str(),0777);
    int status2 = mkdir((outputDirS+"NTuples").c_str(),0777);
    
    //### Prepare output files ###
    string fileOut  =  outputDirS+"Ntuples/fsTree_"+versionS+"_"+sampleTag+"_"+eraS+".root";
    string treeOut  =  "outputTree";
    //string fileOut  = string(fileOutS);

    bool do_err_up      =  false;
    bool do_err_down    =  false;

    //### variable to store the total events in the input file and total events saved in the output
    Long64_t totEvents      =  0;
    Long64_t totSaveEvents  =  0;

    //#### Start loop over input samples ###
    Long64_t nEvents        =  0;
    Long64_t nSaveEvents    =  0;


    std::cout << "----------" << std::endl;
    std::cout << "Processing " << sampleTag << std::endl;

    float lumi         = 1.0;
    string treeInS     = "";
    string treeInFileS = "";
    
    //########## change this for v1.2
    if (sampleTag == "Data"){
        std::cout << "Including Data" << std::endl;
        treeInS       = "data";
        treeInFileS   = eosDirS+"SUSY2_Data/"+eraS+"_merged_processed.root";
        lumi          = 1.0;
    }
    else{
        std::cout << "Including ttbar" << std::endl;
        treeInS          = "ttbar_NoSys";
        string eraName   = "";
        if(eraS   == "MC16a"){
            lumi    = 36200.0;
            eraName = "mc16a";
        }
        else if(eraS == "MC16cd"){
            lumi    = 43800.0;
            eraName = "mc16cd";
        } 
        treeInFileS  = eosDirS+"SUSY2_Bkgs_"+eraName+"/ttbar_merged_processed.root";
    }
    
    ROOT::RDataFrame d(treeInS, treeInFileS);
    auto count = d.Count();
        

    //### filtering the tree to make a smaller tree
    auto dSmall = d.Filter("trigMatch_electronPlusMuonTrig!=0");
    auto dCut   = dSmall.Filter("nLep_signal >= 2")
                        .Filter("channel>1");
    


    //# Determine the number of events to loop over
    unsigned long long rangeNumber = -1;
    unsigned long long maxEvents   = 5000;
    if (runTag == "Test")
        rangeNumber = std::min(maxEvents,*count);
    else
        rangeNumber = *count;

    //# Start loop over all events
    std::cout << "Looping over " << rangeNumber << " events" << std::endl;

    //# Retrieve all kinematic parameters from the event
    vector<string> colNames;
    colNames.push_back("met");
    colNames.push_back("metPhi");
    colNames.push_back("ht");
    colNames.push_back("mll");
    colNames.push_back("dPhi1");
    colNames.push_back("dPhi2");
    colNames.push_back("zPt");
    colNames.push_back("mt2w");
    colNames.push_back("jetN");
    colNames.push_back("bjetN");
    colNames.push_back("lepN");
    colNames.push_back("leadLepPt");
    colNames.push_back("trailLepPt");
    colNames.push_back("leadLepEta");
    colNames.push_back("trailLepEta");
    colNames.push_back("leadLepPhi");
    colNames.push_back("trailLepPhi");
    colNames.push_back("runNumber");
    colNames.push_back("eventNumber");
    colNames.push_back("eeTrig");
    colNames.push_back("eeTrigMatch");
    colNames.push_back("mmTrig");
    colNames.push_back("mmTrigMatch");
    colNames.push_back("emTrig");
    colNames.push_back("emTrigMatch");
    colNames.push_back("isOS");
    
    if(!isData){
       colNames.push_back("genWeight");
       colNames.push_back("eventWeight");
       colNames.push_back("leptonWeight");
       colNames.push_back("jvtWeight");
       colNames.push_back("bTagWeight");
       colNames.push_back("pileupWeight");
       colNames.push_back("FFWeight");
    }
    
    auto dOut =  dCut.Range(rangeNumber);
    auto dNew =  dOut.Define("met", "met_Et")
                     .Define("metPhi", "met_Phi")
                     .Define("ht", "Ht30")
                     .Define("dPhi1", "DPhiJ1Met")
                     .Define("dPhi2", "DPhiJ2Met")
                     .Define("zPt", "Ptll")
                     .Define("mt2w", "mt2leplsp_0")
                     .Define("jetN", "nJet30")
                     .Define("bjetN", "nBJet30_MV2c10_FixedCutBEff_77")
                     .Define("lepN", "nLep_signal")
                     .Define("leadLepPt", "lepPt[0]")
                     .Define("trailLepPt", "lepPt[1]")
                     .Define("leadLepEta", "lepEta[0]")
                     .Define("trailLepEta", "lepEta[1]")
                     .Define("leadLepPhi", "lepPhi[0]")
                     .Define("trailLepPhi", "lepPhi[1]")
                     .Define("runNumber", "RunNumber")
                     .Define("eventNumber", "EventNumber")
                     .Define("eeTrig", "trigWeight_1L2LTrig")
                     .Define("eeTrigMatch", "trigMatch_1L2LTrig")
                     .Define("mmTrig", "trigWeight_diMuonTrig")
                     .Define("mmTrigMatch", "trigMatch_diMuonTrig")
                     .Define("emTrig", "trigWeight_electronPlusMuonTrig")
                     .Define("emTrigMatch", "trigMatch_diMuonTrig")
                     .Define("isOS", "int OS = -1; (lepCharge[0]!=lepCharge[1] ? OS=1:OS=0); return OS;")
                     .Define("fs_mc_weight","mcWeight*lumi")
                     .Define("fs_weight_ee","fs_mc_weight*applyWeights(lepPt[0], lepPt[1], lepEta[0], lepEta[1], sampleTag, era, channel, \"ee_weight\", outputDirS)")
                     .Define("fs_weight_mm","fs_mc_weight*applyWeights(lepPt[0], lepPt[1], lepEta[0], lepEta[1], sampleTag, era, channel, \"mm_weight\", outputDirS)")
                     .Define("fs_weight_ee_up","fs_mc_weight*applyWeights(lepPt[0], lepPt[1], lepEta[0], lepEta[1], sampleTag, era, channel, \"ee_weight_up\", outputDirS)")
                     .Define("fs_weight_mm_up","fs_mc_weight*applyWeights(lepPt[0], lepPt[1], lepEta[0], lepEta[1], sampleTag, era, channel, \"mm_weight_up\", outputDirS)")
                     .Define("fs_weight_ee_down","fs_mc_weight*applyWeights(lepPt[0], lepPt[1], lepEta[0], lepEta[1], sampleTag, era, channel, \"ee_weight_down\", outputDirS)")
                     .Define("fs_weight_mm_down","fs_mc_weight*applyWeights(lepPt[0], lepPt[1], lepEta[0], lepEta[1], sampleTag, era, channel, \"mm_weight_down\", outputDirS)")
                     .Define("fs_alpha","fs_mc_weight*applyWeights(lepPt[0], lepPt[1], lepEta[0], lepEta[1], sampleTag, era, channel, \"fsAlpha\", outputDirS)")
                     .Define("fs_k_lep0","fs_mc_weight*applyWeights(lepPt[0], lepPt[1], lepEta[0], lepEta[1], sampleTag, era, channel, \"fsKLep0\", outputDirS)")
                     .Define("fs_k_lep1","fs_mc_weight*applyWeights(lepPt[0], lepPt[1], lepEta[0], lepEta[1], sampleTag, era, channel, \"fsKLep1\", outputDirS)")
                     .Define("fsChannel","channel");
            
                     
    dNew.Snapshot(treeOut, fileOut, colNames);
    // Record end time
    auto finish = std::chrono::high_resolution_clock::now();
    std::chrono::duration<double> elapsed = finish - start;
    std::cout << "Elapsed time: " << elapsed.count() << " s" << std::endl;
}

Here I get the complaint that for “.Define(“fs_mc_weight”,“mcWeight*lumi”)”, mcWeight and lumi are undeclared. This is the error message:

input_line_119:1:47: error: use of undeclared identifier 'mcWeight'
namespace __tdf_28{ auto tdf_f = []() {return mcWeight
                                              ^
Error in <TRint::HandleTermInput()>: std::runtime_error caught: Cannot interpret the following expression:
mcWeight

Make sure it is valid C++.

I think I will get similar error for this line too:

Define("fs_weight_ee_up","fs_mc_weight*applyWeights(lepPt[0], lepPt[1], lepEta[0], lepEta[1], sampleTag, era, channel, \"ee_weight_up\", outputDirS)")

as sampleTag, era and outputDirS are user-defined.
May I know how I should use the user-defined variable in RDataFrame scenario?
Thank you,
Arka

beojan · November 14, 2018, 7:25pm

Include * lumi in mcWeight and pass the variable (not wrapped in a string) to Define.

arkasantra · November 14, 2018, 8:07pm

Hi Beojan,
I tried your solution. This is now my modified script:


/*
"""
2018/11/14
Arka Santra

Script that makes the flavour symmetry ntuple for data or MC taking the alpha and k factors as input.
Before running this scripts the alpha and k factors should be produced using the scripts makeAlpha.py and makeK.py. 
Text files with alpha and k should be saved to the output directory specified in settings.py. The root files also contain the same weights, but in histogram format. 

To run the script:

Two arguments should be given to the script:
sampleType: 
Data, MC
outputTag: 
Optional tag for the output file.
If tag=="Test" a test ntuple is produced with 5000 events from each of the input files.
era = data15-16/data17 or MC16a/MC16cd
"""
*/
#include <iostream>
#include "TFile.h"
#include "TH1.h"
#include "TH2.h"
#include "TString.h"
#include <string>
#include <sys/stat.h>
#include "TChain.h"
#include "TTree.h"
#include <ROOT/RDataFrame.hxx>
#include <algorithm>
#include <time.h>
#include <chrono>  // for high_resolution_clock

using namespace std;
using namespace ROOT;

// getting the luminosity weight
double lumiWeight(string sampleTag, float luminosity, double genWeight, double eventWeight, double leptonWeight, double jvtWeight, double bTagWeight, double pileupWeight, double FFWeight){
    double weight = 1.0;
    if(sampleTag == "Data"){
        weight = 1.0;
    }
    else{
        weight = (genWeight * eventWeight * leptonWeight * jvtWeight * bTagWeight * pileupWeight * FFWeight)*luminosity;
    }
    return weight;
}

// getting trigger weights from the root files
Float_t find_trigger_weight(int channel, float lep_pT, float lep_eta, TFile *weight_dic, float lep2_pT=0, int up_or_down=0){
    //# find trigger weight
    // name of the weight histograms
    TString name  =  "";
    // ee channel
    if(channel == 1){
        if(lep_pT < 65)         name = "eff_low_ee";
        else                    name = "eff_high_ee";
    }
    // mm channel
    else if(channel == 0){
        if(lep_pT < 52.5)       name = "eff_low_mm";
        else                    name = "eff_high_mm";
    }
    // em channel
    else if(channel == 2){
        if(lep_pT > 65)         name = "eff_high_em";
        else if(lep2_pT > 52.5) name = "eff_high_me";
        else                    name = "eff_low_em";
    }
    // em channel
    else if(channel == 3){
        if(lep2_pT > 65)        name = "eff_high_em";
        else if(lep_pT > 52.5)  name = "eff_high_me";
        else                    name = "eff_low_me";
    }
    else{
        std::cout << "Channel makes no sense." << channel << std::endl;
        return -1;
    }
    //#find pt/eta bin
    TH2F* histWeights           = (TH2F*)weight_dic->Get(name);
    Int_t binNumber             = histWeights->FindBin(lep_eta, lep_pT);
    
    // find the weight and weight error for each eta and pt bin
    Float_t trigWeight          = histWeights->GetBinContent(binNumber);
    Float_t trigWeightErr       = histWeights->GetBinError(binNumber);

    if(up_or_down == -1)        return (trigWeight-trigWeightErr);
    else if(up_or_down ==  1)   return (trigWeight+trigWeightErr);
    else                        return trigWeight;
}


// getting the identification weights from the root files
Float_t find_weight(float val_pt, float val_eta, TFile *weight_dic, bool do_err_up, bool do_err_down, TString era, int lepNumber){
    // lepton number in TString format
    TString lepNumberStr   = Form("%d",lepNumber);
    TH2D *h_Weights        = (TH2D*)weight_dic->Get("h_Weight_"+era+"_"+lepNumberStr);
    //#find pt/eta bin
    int binNumber          = h_Weights->FindBin(val_eta, val_pt);
    
    // find the weight and weight error for each eta and pt bin
    Float_t kWeight        = h_Weights->GetBinContent(binNumber);
    Float_t kWeightErr     = h_Weights->GetBinError(binNumber);
        
    if(do_err_up)          return (kWeight + kWeightErr);
    else if(do_err_down)   return (kWeight - kWeightErr);
    else                   return (kWeight);
}

// apply the alpha and k correction factors. They will be stored to the branches
float applyWeights(float pt0, float pt1, float eta0, float eta1, string sampleTag, string era, int channel, string needWeight, string outputFolder){
        
    bool do_err_up        = false;
    bool do_err_down      = false;
    // convert the strings to TStrings in order to use in TFile
    TString eraT          = (TString)era;
    TString sampleTagT    = (TString)sampleTag;
    TString outputDirT    = (TString)outputFolder;
    
    // loading the root files containing the weights
    TString lep1FileName  = outputDirT+"TxtFiles/kFactors_lep1_"+eraT+"_"+sampleTagT+".root";
    TFile *lep1File       = new TFile(lep1FileName);
    TString lep0FileName  = outputDirT+"TxtFiles/kFactors_lep0_"+eraT+"_"+sampleTagT+".root";
    TFile *lep0File       = new TFile(lep0FileName);
    TString alphaFileName = outputDirT+"TxtFiles/alphaFactors_"+eraT+"_"+sampleTagT+".root";
    TFile *alphaFile      = new TFile(alphaFileName);
    
    //### getting the identification weights
    float ee_weight       = .5;
    float mm_weight       = .5;
    float ee_weight_up    = .5;
    float mm_weight_up    = .5;
    float ee_weight_down  = .5;
    float mm_weight_down  = .5;
    
    //### em channel
    if(channel == 2){
        ee_weight      *= find_weight(pt1, fabs(eta1), lep1File, do_err_up, do_err_down, eraT, 1);
        mm_weight      /= find_weight(pt0, fabs(eta0), lep0File, do_err_up, do_err_down, eraT, 0);
        ee_weight_up   *= find_weight(pt1, fabs(eta1), lep1File, true, do_err_down, eraT, 1);
        mm_weight_up   /= find_weight(pt0, fabs(eta0), lep0File, true, do_err_down, eraT, 0);
        ee_weight_down *= find_weight(pt1, fabs(eta1), lep1File, do_err_up, true, eraT, 1);
        mm_weight_down /= find_weight(pt0, fabs(eta0), lep0File, do_err_up, true, eraT, 0);
    }

    //### me channel
    if(channel == 3){
        ee_weight      *= find_weight(pt0, fabs(eta0), lep0File, do_err_up, do_err_down, eraT, 0);
        mm_weight      /= find_weight(pt1, fabs(eta1), lep1File, do_err_up, do_err_down, eraT, 1);
        ee_weight_up   *= find_weight(pt0, fabs(eta0), lep0File, true, do_err_down, eraT, 0);
        mm_weight_up   /= find_weight(pt1, fabs(eta1), lep1File, true, do_err_down, eraT, 1);
        ee_weight_down *= find_weight(pt0, fabs(eta0), lep0File, do_err_up, true, eraT, 0);
        mm_weight_down /= find_weight(pt1, fabs(eta1), lep1File, do_err_up, true, eraT, 1);
    }
    
    float fsKLep0  =  find_weight(pt0, fabs(eta0), lep0File, do_err_up, do_err_down, eraT, 0);
    float fsKLep1  =  find_weight(pt1, fabs(eta1), lep1File, do_err_up, do_err_down, eraT, 1);
    
    //### getting the trigger weight
    
    float trigger_weight      = 1.0;
    float fsAlpha             = 1.0;
    float trigger_weight_up   = 1.0;
    float trigger_weight_down = 1.0;
    
    if(find_trigger_weight(channel,pt0,fabs(eta0),alphaFile, pt1) == 0){
        trigger_weight = 1.0;
        fsAlpha        = 1.0;
    }
    else{
        trigger_weight      = sqrt( find_trigger_weight(0,pt0,fabs(eta0),alphaFile) * find_trigger_weight(1,pt0,fabs(eta0),alphaFile) ) / find_trigger_weight(channel,pt0,fabs(eta0),alphaFile,pt1);
        trigger_weight_up   = sqrt( find_trigger_weight(0,pt0,fabs(eta0),alphaFile, 0, 1) * find_trigger_weight(1,pt0,fabs(eta0),alphaFile, 0, 1) ) / find_trigger_weight(channel,pt0,fabs(eta0),alphaFile,pt1, -1);
        trigger_weight_down = sqrt( find_trigger_weight(0,pt0,fabs(eta0),alphaFile, 0, -1) * find_trigger_weight(1,pt0,fabs(eta0),alphaFile, 0, -1) ) / find_trigger_weight(channel,pt0,fabs(eta0),alphaFile,pt1,1);
        fsAlpha            = sqrt( find_trigger_weight(0,pt0,fabs(eta0),alphaFile) * find_trigger_weight(1,pt0,fabs(eta0),alphaFile) ) / find_trigger_weight(channel,pt0,fabs(eta0),alphaFile,pt1);
    }

    ee_weight      *= trigger_weight;
    mm_weight      *= trigger_weight;
    ee_weight_up   *= trigger_weight_up;
    mm_weight_up   *= trigger_weight_up;
    ee_weight_down *= trigger_weight_down;
    mm_weight_down *= trigger_weight_down;
    
    if      (needWeight == "ee_weight")      return ee_weight;
    else if (needWeight == "mm_weight")      return mm_weight;
    else if (needWeight == "ee_weight_up")   return ee_weight_up;
    else if (needWeight == "mm_weight_up")   return mm_weight_up;
    else if (needWeight == "ee_weight_down") return ee_weight_down;
    else if (needWeight == "mm_weight_down") return mm_weight_down;
    else if (needWeight == "fsAlpha")        return fsAlpha;
    else if (needWeight == "fsKLep0")        return fsKLep0;
    else if (needWeight == "fsKLep1")        return fsKLep1;
    else{
        std::cout << "Wrong weights wanted!!!" << std::endl;
        return -999;
    }                                     
}


void makeMyFSNtuple(string sampleTag, string runTag, string eraS, string versionS){
    //### boolean to know which process should be done
    // Record start time
    auto start = std::chrono::high_resolution_clock::now();
    bool isData  =  false;
    bool isMC    =  false;

    if (sampleTag == "Data"){
        isData   = true;
    }
    else if (sampleTag == "MC"){
        isMC     = true;
    }
    else{
        std::cout<< "Unknown option for Data/MC" << std::endl;
        return;
    }

    std::cout << "Making FS ntuple for " << sampleTag << std::endl;
    

    //### Get all relevant settings from settings.py ###
      
    string eosDirS    =  "/eos/atlas/atlascerngroupdisk/phys-susy/2L2J-ANA-SUSY-2018-05/SusySkim2LJets/"+versionS+"/SUSY2/"; // eos directory address
    string outputDirS =  versionS+"/"; // the output directory

    int status  = mkdir(outputDirS.c_str(),0777);
    int status2 = mkdir((outputDirS+"NTuples").c_str(),0777);
    
    //### Prepare output files ###
    string fileOut  =  outputDirS+"Ntuples/fsTree_"+versionS+"_"+sampleTag+"_"+eraS+".root";
    string treeOut  =  "outputTree";
    //string fileOut  = string(fileOutS);

    bool do_err_up      =  false;
    bool do_err_down    =  false;

    //### variable to store the total events in the input file and total events saved in the output
    Long64_t totEvents      =  0;
    Long64_t totSaveEvents  =  0;

    //#### Start loop over input samples ###
    Long64_t nEvents        =  0;
    Long64_t nSaveEvents    =  0;


    std::cout << "----------" << std::endl;
    std::cout << "Processing " << sampleTag << std::endl;

    float lumi         = 1.0;
    string treeInS     = "";
    string treeInFileS = "";
    
    //########## change this for v1.2
    if (sampleTag == "Data"){
        std::cout << "Including Data" << std::endl;
        treeInS       = "data";
        treeInFileS   = eosDirS+"SUSY2_Data/"+eraS+"_merged_processed.root";
        lumi          = 1.0;
    }
    else{
        std::cout << "Including ttbar" << std::endl;
        treeInS          = "ttbar_NoSys";
        string eraName   = "";
        if(eraS   == "MC16a"){
            lumi    = 36200.0;
            eraName = "mc16a";
        }
        else if(eraS == "MC16cd"){
            lumi    = 43800.0;
            eraName = "mc16cd";
        } 
        treeInFileS  = eosDirS+"SUSY2_Bkgs_"+eraName+"/ttbar_merged_processed.root";
    }
    
    ROOT::RDataFrame d(treeInS, treeInFileS);
        

    //### filtering the tree to make a smaller tree
    auto dSmall = d.Filter("trigMatch_electronPlusMuonTrig!=0");
    auto dCut   = dSmall.Filter("nLep_signal >= 2")
                        .Filter("channel>1"); // only use for em and me channels
    auto count = dCut.Count();
    
    //# Determine the number of events to loop over
    unsigned long long rangeNumber = -1;
    unsigned long long maxEvents   = 5000;
    if (runTag == "Test")
        rangeNumber = std::min(maxEvents,*count);
    else
        rangeNumber = *count;

    //# Start loop over all events
    std::cout << "Looping over " << rangeNumber << " events" << std::endl;

    //# Retrieve all kinematic parameters from the event
    vector<string> colNames;
    colNames.push_back("met");
    colNames.push_back("metPhi");
    colNames.push_back("ht");
    colNames.push_back("mll");
    colNames.push_back("dPhi1");
    colNames.push_back("dPhi2");
    colNames.push_back("zPt");
    colNames.push_back("mt2w");
    colNames.push_back("jetN");
    colNames.push_back("bjetN");
    colNames.push_back("lepN");
    colNames.push_back("leadLepPt");
    colNames.push_back("trailLepPt");
    colNames.push_back("leadLepEta");
    colNames.push_back("trailLepEta");
    colNames.push_back("leadLepPhi");
    colNames.push_back("trailLepPhi");
    colNames.push_back("runNumber");
    colNames.push_back("eventNumber");
    colNames.push_back("eeTrig");
    colNames.push_back("eeTrigMatch");
    colNames.push_back("mmTrig");
    colNames.push_back("mmTrigMatch");
    colNames.push_back("emTrig");
    colNames.push_back("emTrigMatch");
    colNames.push_back("isOS");
    
    if(!isData){
       colNames.push_back("genWeight");
       colNames.push_back("eventWeight");
       colNames.push_back("leptonWeight");
       colNames.push_back("jvtWeight");
       colNames.push_back("bTagWeight");
       colNames.push_back("pileupWeight");
       colNames.push_back("FFWeight");
    }
    
    auto dOut =  dCut.Range(rangeNumber);
    auto dNew =  dOut.Define("met", "met_Et")
                     .Define("metPhi", "met_Phi")
                     .Define("ht", "Ht30")
                     .Define("dPhi1", "DPhiJ1Met")
                     .Define("dPhi2", "DPhiJ2Met")
                     .Define("zPt", "Ptll")
                     .Define("mt2w", "mt2leplsp_0")
                     .Define("jetN", "nJet30")
                     .Define("bjetN", "nBJet30_MV2c10_FixedCutBEff_77")
                     .Define("lepN", "nLep_signal")
                     .Define("leadLepPt", "lepPt[0]")
                     .Define("trailLepPt", "lepPt[1]")
                     .Define("leadLepEta", "lepEta[0]")
                     .Define("trailLepEta", "lepEta[1]")
                     .Define("leadLepPhi", "lepPhi[0]")
                     .Define("trailLepPhi", "lepPhi[1]")
                     .Define("runNumber", "RunNumber")
                     .Define("eventNumber", "EventNumber")
                     .Define("eeTrig", "trigWeight_1L2LTrig")
                     .Define("eeTrigMatch", "trigMatch_1L2LTrig")
                     .Define("mmTrig", "trigWeight_diMuonTrig")
                     .Define("mmTrigMatch", "trigMatch_diMuonTrig")
                     .Define("emTrig", "trigWeight_electronPlusMuonTrig")
                     .Define("emTrigMatch", "trigMatch_diMuonTrig")
                     .Define("isOS", "int OS = -1; (lepCharge[0]!=lepCharge[1] ? OS=1:OS=0); return OS;")
                     .Define("sampleTag", "sampleTag")
                     .Define("luminosity", "lumi")
                     .Define("era", "eraS")
                     .Define("outputDirS", "outputDirS")
                     .Define("fs_mc_weight", "lumiWeight(sampleTag, luminosity, genWeight, eventWeight, leptonWeight, jvtWeight, bTagWeight, pileupWeight, FFWeight)")
                     .Define("fs_weight_ee","fs_mc_weight*applyWeights(lepPt[0], lepPt[1], lepEta[0], lepEta[1], sampleTag, era, channel, \"ee_weight\", outputDirS)")
                     .Define("fs_weight_mm","fs_mc_weight*applyWeights(lepPt[0], lepPt[1], lepEta[0], lepEta[1], sampleTag, era, channel, \"mm_weight\", outputDirS)")
                     .Define("fs_weight_ee_up","fs_mc_weight*applyWeights(lepPt[0], lepPt[1], lepEta[0], lepEta[1], sampleTag, era, channel, \"ee_weight_up\", outputDirS)")
                     .Define("fs_weight_mm_up","fs_mc_weight*applyWeights(lepPt[0], lepPt[1], lepEta[0], lepEta[1], sampleTag, era, channel, \"mm_weight_up\", outputDirS)")
                     .Define("fs_weight_ee_down","fs_mc_weight*applyWeights(lepPt[0], lepPt[1], lepEta[0], lepEta[1], sampleTag, era, channel, \"ee_weight_down\", outputDirS)")
                     .Define("fs_weight_mm_down","fs_mc_weight*applyWeights(lepPt[0], lepPt[1], lepEta[0], lepEta[1], sampleTag, era, channel, \"mm_weight_down\", outputDirS)")
                     .Define("fs_alpha","fs_mc_weight*applyWeights(lepPt[0], lepPt[1], lepEta[0], lepEta[1], sampleTag, era, channel, \"fsAlpha\", outputDirS)")
                     .Define("fs_k_lep0","fs_mc_weight*applyWeights(lepPt[0], lepPt[1], lepEta[0], lepEta[1], sampleTag, era, channel, \"fsKLep0\", outputDirS)")
                     .Define("fs_k_lep1","fs_mc_weight*applyWeights(lepPt[0], lepPt[1], lepEta[0], lepEta[1], sampleTag, era, channel, \"fsKLep1\", outputDirS)")
                     .Define("fsChannel","channel");
            
                     
    dNew.Snapshot(treeOut, fileOut, colNames);
    // Record end time
    auto finish = std::chrono::high_resolution_clock::now();
    std::chrono::duration<double> elapsed = finish - start;
    std::cout << "Elapsed time: " << elapsed.count() << " s" << std::endl;
}

But now I get this error:

input_line_119:1:47: error: use of undeclared identifier 'sampleTag'
namespace __tdf_28{ auto tdf_f = []() {return sampleTag
                                              ^
Error in <TRint::HandleTermInput()>: std::runtime_error caught: Cannot interpret the following expression:
sampleTag

Make sure it is valid C++.

It is complaining that “sampleTag” is not declared, even though this is a string I take as an input. May I know the solution to pass “sampleTag” to the RDataFrame object?

Thank you,
Arka

beojan · November 14, 2018, 8:25pm

The only names you can use in those strings are column names.

You may have to use lambdas with captures here.

eguiraud · November 14, 2018, 8:30pm

Hi Arka,
as @beojan says, you cannot use just any variable inside Define strings: you are restricted to column names.

The “undeclared identifier ‘sampleTag’” in this context means that RDataFrame doesn’t know about a thing called “sampleTag”.

You can either add a Define("sampleTag", [sampleTag] { return sampleTag; }) before the Define that needs to use it (simple but less performant) or as @beojan suggests you can use a lambda instead of a string to do your call to lumiWeight:

Define("fs_mc_weight", [sampleTag] (double luminosity, andallotherargs...) { return lumiWeight(sampleTag, luminosity, genWeight, eventWeight, leptonWeight, jvtWeight, bTagWeight, pileupWeight, FFWeight), {"luminosity", "andallothercolumns"})

(more verbose but more performant)

Hope this clarifies things a bit.
Cheers,
Enrico

arkasantra · November 14, 2018, 9:49pm

Hi Enrico,
Sorry, I am still unable to get things done. I used the simpler path of defining sampleTag into the column:

                     .Define("sampleTag", [sampleTag] { return sampleTag; })
                     .Define("luminosity",[lumi] { return lumi; })
                     .Define("era", [eraS] { return eraS; })
                     .Define("outputDirS", [outputDirS] { return outputDirS; })
                     .Define("fs_k_lep0","applyWeights(lepPt[0], lepPt[1], lepEta[0], lepEta[1], sampleTag, era, channel, \"fsKLep0\", outputDirS)");

Yet, I get this error:

input_line_119:1:47: error: use of undeclared identifier 'sampleTag'
namespace __tdf_28{ auto tdf_f = []() {return sampleTag
                                              ^
Error in <TRint::HandleTermInput()>: std::runtime_error caught: Cannot interpret the following expression:
sampleTag

Make sure it is valid C++.

What is going wrong here?

Thanks,
Arka

eguiraud · November 14, 2018, 10:49pm

Uhm, do you still have .Define("sampleTag", "sampleTag") somewhere, or another string that makes use of "sampleTag" before you Define it?

If not, can you post your current full script again?

Cheers,
Enrico

beojan · November 15, 2018, 5:42am

~~Well the snippet you show there has syntax errors because there are no brackets () in the lambdas.~~

Ignore that, turns out the parameter list is optional.

arkasantra · November 15, 2018, 10:16am

Hi Enrico,
Sorry, I used sampleTag before defining it: stupid mistake. Now the code is fine.
Thank you very much,
Arka

arkasantra · November 20, 2018, 5:55pm

Hi Enrico,
I was testing the lambda function method to get the “fs_mc_weight” in my RDataFrame column following your advice:

.Define("fs_mc_weight", [sampleTag] (float luminosity, double genWeight, double eventWeight, double leptonWeight, double jvtWeight, double bTagWeight, double pileupWeight, double FFWeight) { return lumiWeight(sampleTag, luminosity, genWeight, eventWeight, leptonWeight, jvtWeight, bTagWeight, pileupWeight, FFWeight),{"luminosity", "genWeight", "eventWeight",  "leptonWeight", "jvtWeight", "bTagWeight", "pileupWeight", "FFWeight"};})

But this is the compilation error I am getting:

In file included from input_line_12:9:
././makeMyFSNtuple.C:477:338: error: expected ';' after return statement
  ...{ return lumiWeight(sampleTag, luminosity, genWeight, eventWeight, leptonWeight, jvtWeight, bTagWeight, pileupWeight, FFWeight),{"luminosity"...
                                                                                                                                    ^
                                                                                                                                    ;
Error in <ACLiC>: Dictionary generation failed!

It seems there is a syntax error. What should be the right way of writing lambda function?

Thank you,
Arka

eguiraud · November 20, 2018, 7:57pm

Hi,
you are missing a ; at the end of the lambda (as per the error message) and also a } if I read correctly.

Cheers,
Enrico

system · December 4, 2018, 7:57pm

This topic was automatically closed 14 days after the last reply. New replies are no longer allowed.