Hi,
I wrote a TSelector, and include a version of it (with all analysis gutted out but with enough to reproduce my error) below. When I run the selector on a single thread, it works just fine. However, if I try to run it in PROOFLite I get a segfault that I think is happening in the Process method at “fChain->GetTree()->GetEntry()”
Here is testSelector.C
#define testSelector_cxx
#include "testSelector.h"
#include <TH2.h>
#include <TStyle.h>
#include <iostream>
void testSelector::Begin(TTree * /*tree*/)
{
}
void testSelector::SlaveBegin(TTree * tree)
{
TString option = GetOption();
}
Bool_t testSelector::Process(Long64_t entry)
{
fChain->GetTree()->GetEntry(entry);
Double_t x = theS800->crdc1.x;
if (entry%100==0){
cout << "THIS IS X " << x << endl;
}
return kTRUE;
}
void testSelector::SlaveTerminate()
{
}
void testSelector::Terminate()
{
}
And here is testSelector.h:
#ifndef testSelector_h
#define testSelector_h
#include <TROOT.h>
#include <TChain.h>
#include <TFile.h>
#include <TSelector.h>
#include <TObject.h>
#include <TNamed.h>
#include <TCutG.h>
#include <TCanvas.h>
// Header file for the classes stored in the TTree if any.
#include "./RBS800.h"
//#include "/scratch/blair/root/core/base/inc/TObject.h"
#include "./RBDetector.h"
//#include "/scratch/blair/root/core/base/inc/TNamed.h"
#include "./RBS800Crdc.h"
#include "./RBS800CrdcPads.h"
#include "./RBS800CrdcCalc.h"
#include "./RBS800IonChamber.h"
#include "./RBS800Scintillator.h"
#include "./RBS800Hodoscope.h"
#include "./RBS800FpTrack.h"
#include "./RBS800FpParticle.h"
#include "./RBS800Trigger.h"
#include "./RBS800TimeOfFlight.h"
#include <iostream>
using namespace std;
#include "./settings/S800Calibrations.C"
//This is where the loadS800RunSettings function lives
// Fixed size dimensions of array or collections stored in the TTree if any.
const Int_t kMaxfpTrack_fFpParticles = 1;
class testSelector : public TSelector {
public :
TTree *fChain=0; //!pointer to the analyzed TTree or TChain
// Declaration of leaf types
RBS800 *theS800=0;
TBranch *b_theS800=0;
testSelector(TTree * /*tree*/ =0) : fChain(0) { }
virtual ~testSelector() { }
virtual Int_t Version() const { return 2; }
virtual void Begin(TTree *tree);
virtual void SlaveBegin(TTree *tree);
virtual void Init(TTree *tree);
virtual Bool_t Notify();
virtual Bool_t Process(Long64_t entry);
virtual Int_t GetEntry(Long64_t entry, Int_t getall = 0) { return fChain ? fChain->GetTr\
ee()->GetEntry(entry, getall) : 0; }
virtual void SetOption(const char *option) { fOption = option; }
virtual void SetObject(TObject *obj) { fObject = obj; }
virtual void SetInputList(TList *input) { fInput = input; }
virtual TList *GetOutputList() const { return fOutput; }
virtual void SlaveTerminate();
virtual void Terminate();
TString outputFile;
TString runString;
ClassDef(testSelector,0);
};
#endif
#ifdef testSelector_cxx
void testSelector::Init(TTree *tree)
{
theS800 = new RBS800();
// Set branch addresses and branch pointers
if (!tree) return;
fChain = tree;
fChain->SetMakeClass(0);
fChain->SetBranchAddress("s800",&theS800,&b_theS800);
}
Bool_t testSelector::Notify()
{
return kTRUE;
}
#endif // #ifdef testSelector_cxx
where RBS800*.h are the relevant files defining the RBS800 class.
This TSelector works just fine when I run it on a single thread, in other words:
In the interpreter:
root [0] TFile f(“name/of/file.root”)
root [1] TreeName->Process(“testSelector.C+”)
I am trying to run this in PROOFLite in as simple a way as possible with this script. If I modify the TSelector and remove any mention of retrieving data from a file, this script works fine:
testProof.C
#include <sstream>
#include <vector>
#include <ctime>
#include <algorithm>
#endif
void testProof()
{
//Open ROOT files and assemble TChain
TChain* ch = new TChain("E09084Cal");
ch->Add("/mnt/analysis/e09084/rootfilesCal/run-0343Cal.root");
TProof * p = TProof::Open("","workers=1");
ch->SetProof(kTRUE);
ch->Process("testSelector.C+");
delete ch;
}
Yet when I try to run this with testSelector I get the following message:
[code] +++ Starting PROOF-Lite with 1 workers +++
Opening connections to workers: OK (1 workers)
Setting up worker servers: OK (1 workers)
PROOF set to parallel mode (1 worker)
15:00:22 32516 Wrk-0.0 | Info in TProofServLite::HandleCache: loading macro testSelector.C+ …
Info in TProofLite::SetQueryRunning: starting query: 1
Info in TProofQueryResult::SetRunning: nwrks: 1
Looking up for exact location of files: OK (1 files)
Looking up for exact location of files: OK (1 files)
Info in TPacketizerAdaptive::TPacketizerAdaptive: Setting max number of workers per node to 1
Validating files: OK (1 files)
Info in TPacketizerAdaptive::InitStats: fraction of remote files 1.000000
0.0: caught exception triggered by signal ‘1’ while processing dset:‘TDSet:E09084Cal’, file:’/mnt/analysis/e09084/rootfilesCal/run-0343Cal.root’ - check logs for possible stacktrace - last event: 0
Info in TProofLite::MarkBad:
+++ Message from master at walleye.nscl.msu.edu : marking walleye:-1 (0.0) as bad
+++ Reason: undefined message in TProof::CollectInputFrom(…)
+++ Message from master at walleye.nscl.msu.edu : marking walleye:-1 (0.0) as bad
+++ Reason: undefined message in TProof::CollectInputFrom(…)
+++ Most likely your code crashed
+++ Please check the session logs for error messages either using
+++ the ‘Show logs’ button or executing
+++
+++ root [] TProof::Mgr(“walleye.nscl.msu.edu”)->GetSessionLogs()->Display("*")
entries: 1000 (1000)
Error in TPacketizerAdaptive::SplitPerHost: Error removing a missing file
Info in TPacketizerAdaptive::InitStats: fraction of remote files 1.000000
Lite-0: all output objects have been merged
[/code]
Here are the relevant logs:
[code]// --------- Start of element log -----------------
// Ordinal: 0 (role: master)
// Path: /user/manfredi/.proof/projects-e09084-ribbit/session-walleye-1436560628-41484/session-walleye-1436560628-41484.log
// # of retrieved lines: 1
// ------------------------------------------------
// --------- End of element log -------------------
Retrieving logs: 1 ok, 0 not ok (100 % processed)
// --------- Start of element log -----------------
// Ordinal: 0.0 (role: worker)
// Path: /user/manfredi/.proof/projects-e09084-ribbit/session-walleye-1436560628-41484/worker-0.0-walleye-1436560628-41496.log
// # of retrieved lines: 80
// ------------------------------------------------
16:37:08 41496 Wrk-0.0 | Info in TProofServLite::Setup: fWorkDir: /user/manfredi/.proof
16:37:08 41496 Wrk-0.0 | Info in TProofServLite::HandleCache: loading macro testSelector.C+ …
16:37:09 41496 Wrk-0.0 | Info in TProofServLite::HandleProcess: selector obj for ‘testSelector’ found
16:37:09 41496 Wrk-0.0 | Info in TProofServLite::HandleProcess: calling fPlayer->Process() with selector object: testSelector
16:37:09 41496 Wrk-0.0 | Info in TProofPlayerSlave::AssertSelector: Processing via TSelector object
16:37:09 41496 Wrk-0.0 | Info in TEventIter::TEventIter: fPackets list ‘ProcessedPackets_0.0’ created
16:37:09 41496 Wrk-0.0 | Info in TProofPlayerSlave::Process: save partial results? 0 per-packet? 0
16:37:09 41496 Wrk-0.0 | Info in TEventIterTree::GetTrees: the tree cache is in learning phase
Array size is 32
Run 343 Settings
Run settings loaded for run 343
Now I have Run 343
16:37:09 41496 Wrk-0.0 | *** Break ***: segmentation violation
There was a crash.
This is the entire stack trace of all threads:
#0 0x00007f16a325d06e in __libc_waitpid (pid=, stat_loc=0x7ffda69632ac, options=) at …/sysdeps/unix/sysv/linux/waitpid.c:32
#1 0x00007f16a31f1989 in do_system (line=) at …/sysdeps/posix/system.c:149
#2 0x00007f16a4f8c4ec in TUnixSystem::StackTrace() () from /mnt/misc/sw/x86_64/Debian/7/root/gnu/5.34.09/lib/libCore.so.5
#3 0x00007f16a4f8eb53 in TUnixSystem::DispatchSignals(ESignals) () from /mnt/misc/sw/x86_64/Debian/7/root/gnu/5.34.09/lib/libCore.so.5
#4
#5 0x00007f16a4ebde95 in ROOT::delete_TClonesArray(void*) () from /mnt/misc/sw/x86_64/Debian/7/root/gnu/5.34.09/lib/libCore.so.5
#6 0x00007f16a4f58d40 in TClass::Destructor(void*, bool) () from /mnt/misc/sw/x86_64/Debian/7/root/gnu/5.34.09/lib/libCore.so.5
#7 0x00007f16a1c732f5 in TBufferFile::ReadFastArray(void**, TClass const*, int, bool, TMemberStreamer*, TClass const*) () from /mnt/misc/sw/x86_64/Debian/7/root/gnu/5.34.09/lib/libRIO.so
#8 0x00007f16a1c3f6df in int TStreamerInfo::ReadBuffer<char**>(TBuffer&, char** const&, int, int, int, int) () from /mnt/misc/sw/x86_64/Debian/7/root/gnu/5.34.09/lib/libRIO.so
#9 0x00007f16a1b466fb in TStreamerInfoActions::GenericReadAction(TBuffer&, void*, TStreamerInfoActions::TConfiguration const*) () from /mnt/misc/sw/x86_64/Debian/7/root/gnu/5.34.09/lib/libRIO.so
#10 0x00007f16a1c71665 in TBufferFile::ApplySequence(TStreamerInfoActions::TActionSequence const&, void*) () from /mnt/misc/sw/x86_64/Debian/7/root/gnu/5.34.09/lib/libRIO.so
#11 0x00007f16a0962333 in TBranchElement::ReadLeavesMember(TBuffer&) () from /mnt/misc/sw/x86_64/Debian/7/root/gnu/5.34.09/lib/libTree.so
#12 0x00007f16a0917437 in TBranch::GetEntry(long long, int) () from /mnt/misc/sw/x86_64/Debian/7/root/gnu/5.34.09/lib/libTree.so
#13 0x00007f16a09699c5 in TBranchElement::GetEntry(long long, int) () from /mnt/misc/sw/x86_64/Debian/7/root/gnu/5.34.09/lib/libTree.so
#14 0x00007f16a0969980 in TBranchElement::GetEntry(long long, int) () from /mnt/misc/sw/x86_64/Debian/7/root/gnu/5.34.09/lib/libTree.so
#15 0x00007f16a0969980 in TBranchElement::GetEntry(long long, int) () from /mnt/misc/sw/x86_64/Debian/7/root/gnu/5.34.09/lib/libTree.so
#16 0x00007f16a09236f3 in TTree::GetEntry(long long, int) () from /mnt/misc/sw/x86_64/Debian/7/root/gnu/5.34.09/lib/libTree.so
#17 0x00007f16a01b3dba in testSelector::Process(long long) () from /user/manfredi/.proof/projects-e09084-ribbit/session-walleye-1436560628-41484/worker-0.0/./testSelector_C.so
#18 0x00007f169a266179 in TProofPlayer::Process(TDSet*, char const*, char const*, long long, long long) () from /mnt/misc/sw/x86_64/Debian/7/root/gnu/5.34.09/lib/libProofPlayer.so
#19 0x00007f16a0533318 in TProofServ::HandleProcess(TMessage*, TString*) () from /mnt/misc/sw/x86_64/Debian/7/root/gnu/5.34.09/lib/libProof.so
#20 0x00007f16a052c32e in TProofServ::HandleSocketInput(TMessage*, bool) () from /mnt/misc/sw/x86_64/Debian/7/root/gnu/5.34.09/lib/libProof.so
#21 0x00007f16a051f337 in TProofServ::HandleSocketInput() () from /mnt/misc/sw/x86_64/Debian/7/root/gnu/5.34.09/lib/libProof.so
#22 0x00007f16a053f9e1 in TProofServLiteInputHandler::Notify() () from /mnt/misc/sw/x86_64/Debian/7/root/gnu/5.34.09/lib/libProof.so
#23 0x00007f16a4f8dcbd in TUnixSystem::CheckDescriptors() () from /mnt/misc/sw/x86_64/Debian/7/root/gnu/5.34.09/lib/libCore.so.5
#24 0x00007f16a4f8f288 in TUnixSystem::DispatchOneEvent(bool) () from /mnt/misc/sw/x86_64/Debian/7/root/gnu/5.34.09/lib/libCore.so.5
#25 0x00007f16a4fe3c76 in TSystem::InnerLoop() () from /mnt/misc/sw/x86_64/Debian/7/root/gnu/5.34.09/lib/libCore.so.5
#26 0x00007f16a4fe5724 in TSystem::Run() () from /mnt/misc/sw/x86_64/Debian/7/root/gnu/5.34.09/lib/libCore.so.5
#27 0x00007f16a4ff3d0f in TApplication::Run(bool) () from /mnt/misc/sw/x86_64/Debian/7/root/gnu/5.34.09/lib/libCore.so.5
#28 0x0000000000401ce1 in main ()
The lines below might hint at the cause of the crash.
If they do not help you then please submit a bug report at
http://root.cern.ch/bugs. Please post the ENTIRE stack trace
from above as an attachment in addition to anything else
that might help us fixing this issue.
#5 0x00007f16a4ebde95 in ROOT::delete_TClonesArray(void*) () from /mnt/misc/sw/x86_64/Debian/7/root/gnu/5.34.09/lib/libCore.so.5
#6 0x00007f16a4f58d40 in TClass::Destructor(void*, bool) () from /mnt/misc/sw/x86_64/Debian/7/root/gnu/5.34.09/lib/libCore.so.5
#7 0x00007f16a1c732f5 in TBufferFile::ReadFastArray(void**, TClass const*, int, bool, TMemberStreamer*, TClass const*) () from /mnt/misc/sw/x86_64/Debian/7/root/gnu/5.34.09/lib/libRIO.so
#8 0x00007f16a1c3f6df in int TStreamerInfo::ReadBuffer<char**>(TBuffer&, char** const&, int, int, int, int) () from /mnt/misc/sw/x86_64/Debian/7/root/gnu/5.34.09/lib/libRIO.so
#9 0x00007f16a1b466fb in TStreamerInfoActions::GenericReadAction(TBuffer&, void*, TStreamerInfoActions::TConfiguration const*) () from /mnt/misc/sw/x86_64/Debian/7/root/gnu/5.34.09/lib/libRIO.so
#10 0x00007f16a1c71665 in TBufferFile::ApplySequence(TStreamerInfoActions::TActionSequence const&, void*) () from /mnt/misc/sw/x86_64/Debian/7/root/gnu/5.34.09/lib/libRIO.so
#11 0x00007f16a0962333 in TBranchElement::ReadLeavesMember(TBuffer&) () from /mnt/misc/sw/x86_64/Debian/7/root/gnu/5.34.09/lib/libTree.so
#12 0x00007f16a0917437 in TBranch::GetEntry(long long, int) () from /mnt/misc/sw/x86_64/Debian/7/root/gnu/5.34.09/lib/libTree.so
#13 0x00007f16a09699c5 in TBranchElement::GetEntry(long long, int) () from /mnt/misc/sw/x86_64/Debian/7/root/gnu/5.34.09/lib/libTree.so
#14 0x00007f16a0969980 in TBranchElement::GetEntry(long long, int) () from /mnt/misc/sw/x86_64/Debian/7/root/gnu/5.34.09/lib/libTree.so
#15 0x00007f16a0969980 in TBranchElement::GetEntry(long long, int) () from /mnt/misc/sw/x86_64/Debian/7/root/gnu/5.34.09/lib/libTree.so
#16 0x00007f16a09236f3 in TTree::GetEntry(long long, int) () from /mnt/misc/sw/x86_64/Debian/7/root/gnu/5.34.09/lib/libTree.so
#17 0x00007f16a01b3dba in testSelector::Process(long long) () from /user/manfredi/.proof/projects-e09084-ribbit/session-walleye-1436560628-41484/worker-0.0/./testSelector_C.so
#18 0x00007f169a266179 in TProofPlayer::Process(TDSet*, char const*, char const*, long long, long long) () from /mnt/misc/sw/x86_64/Debian/7/root/gnu/5.34.09/lib/libProofPlayer.so
#19 0x00007f16a0533318 in TProofServ::HandleProcess(TMessage*, TString*) () from /mnt/misc/sw/x86_64/Debian/7/root/gnu/5.34.09/lib/libProof.so
#20 0x00007f16a052c32e in TProofServ::HandleSocketInput(TMessage*, bool) () from /mnt/misc/sw/x86_64/Debian/7/root/gnu/5.34.09/lib/libProof.so
#21 0x00007f16a051f337 in TProofServ::HandleSocketInput() () from /mnt/misc/sw/x86_64/Debian/7/root/gnu/5.34.09/lib/libProof.so
#22 0x00007f16a053f9e1 in TProofServLiteInputHandler::Notify() () from /mnt/misc/sw/x86_64/Debian/7/root/gnu/5.34.09/lib/libProof.so
#23 0x00007f16a4f8dcbd in TUnixSystem::CheckDescriptors() () from /mnt/misc/sw/x86_64/Debian/7/root/gnu/5.34.09/lib/libCore.so.5
#24 0x00007f16a4f8f288 in TUnixSystem::DispatchOneEvent(bool) () from /mnt/misc/sw/x86_64/Debian/7/root/gnu/5.34.09/lib/libCore.so.5
#25 0x00007f16a4fe3c76 in TSystem::InnerLoop() () from /mnt/misc/sw/x86_64/Debian/7/root/gnu/5.34.09/lib/libCore.so.5
#26 0x00007f16a4fe5724 in TSystem::Run() () from /mnt/misc/sw/x86_64/Debian/7/root/gnu/5.34.09/lib/libCore.so.5
#27 0x00007f16a4ff3d0f in TApplication::Run(bool) () from /mnt/misc/sw/x86_64/Debian/7/root/gnu/5.34.09/lib/libCore.so.5
#28 0x0000000000401ce1 in main ()
16:37:10 41496 Wrk-0.0 | Error in TProofServLite::HandleException: caugth exception triggered by signal ‘1’ while processing dset:‘TDSet:E09084Cal’, file:’/mnt/analysis/e09084/rootfilesCal/run-0343Cal.root’ - check logs for possible stacktrace - last event: 0
// --------- End of element log -------------------[/code]
I’d appreciate any help or insight you can provide. I would say the problem is with the RBS800 class but that’s contradicted by the fact that the testSelector works in a single thread. Perhaps I am not loading a library where and when I need to but I have tried explicitly doing that as well. These errors persist with different ROOT files and with different numbers of workers.
Thanks,
Juan