//Run as root -l convert_ANSI_N42_42.cpp+(\"Annex B.n42\") -b -q #include "TString.h" #include "TObjString.h" #include "TXMLEngine.h" #include "TFile.h" #include "TVectorD.h" #include "TObjArray.h" #include "TH1I.h" #include #include #include #include using std::cout; using std::endl; using std::to_string; using std::stol; /** * \brief Prints info of a given node and their children (recursively), and creates TDirectory in a previously opened ROOT file * \param xml the xml engine * \param node the pointer to the parent node * \param level the recursivity level (deph), for indentation * \note The function display node will create a TDirectory (subfolder) for each node, respecting hierarchy. The TDirectory name will be the node name, or nodeName_id if an id is found as attribute of the node. Attributes (key-value) are stored as a TNamed object within the node (TDirectory). Node contents are stored as a TObjString. The Spectrum/ChannelData content is stored also as a TH1 spectrum. The LiveTimeDuration is also stored as TTimestamp https://de.wikipedia.org/wiki/ISO_8601 */ void DisplayNode(TXMLEngine* xml, XMLNodePointer_t node, Int_t level); /** * \brief Converts ISO 8601 times to seconds * \param input the input time string in ISO format * \param re the matching pattern (format) * \return time duration in seconds (double) * \see http://stackoverflow.com/questions/23886140/parse-iso-8601-durations * \see https://de.wikipedia.org/wiki/ISO_8601 */ double match_duration(const std::string& input, const std::regex& re); /** * \brief Converts a ChannelData content node (string containing chain of uints) to a TH1 histogram * \param name the spectrum id (unique key) * \param content the string containing the uint chain * \return pointer to TH1*, that is then owned by the caller (has to delete it). NULL if error is encountered. */ TH1* ChannelDataToSpectrum(const TString name, const TString content); /** * \brief This function converts an ANSI N42.42 xml file to a ROOT file storing energy spectra and metadata information * \param filename the filename of the file to open (full path) * \return true if successful conversion, false otherwise * \note Based on https://root.cern.ch/doc/master/xmlreadfile_8C.html and https://root.cern.ch/root/html/tutorials/io/dirs.C.html * \see https://www.nist.gov/programs-projects/ansiieee-n4242-standard */ bool convert_ANSI_N42_42(const char* filename) { // Test input filename conventions if(!TString(filename).EndsWith(".n42")) { cout << "Error in convert_ANSI_N42_42:" << __LINE__ << ": file has incorrect extension (!='.n42'): " << filename << endl; return false; } // First create engine TXMLEngine* xml = new TXMLEngine; // Now try to parse xml file // Only file with restricted xml syntax are supported XMLDocPointer_t xmldoc = xml->ParseFile(filename); if (xmldoc==0) { delete xml; cout << "Error in convert_ANSI_N42_42:" << __LINE__ << ": could not open file " << filename << endl; return false; } // create a new ROOT file TFile* out = new TFile(TString(filename).ReplaceAll(".n42",".root"),"recreate"); // take access to main node XMLNodePointer_t mainnode = xml->DocGetRootElement(xmldoc); // display recursively all nodes and subnodes DisplayNode(xml, mainnode, 1); // Release memory before exit xml->FreeDoc(xmldoc); delete xml; out->Close(); return true; } void DisplayNode(TXMLEngine* xml, XMLNodePointer_t node, Int_t level) { // this function display all accessible information about xml node and its children printf("%*c node: %s\n",level,' ', xml->GetNodeName(node)); // create subdirectory TString dirname; XMLAttrPointer_t attrid = xml->GetFirstAttr(node); if(attrid && TString(xml->GetAttrName(attrid))=="id") dirname = (TString)xml->GetNodeName(node) + "_" + xml->GetAttrValue(attrid); else dirname = xml->GetNodeName(node); if(gDirectory->GetKey(dirname))//exists already, append number { int i=1; while(gDirectory->GetKey(dirname+"_"+to_string(i))) { i++; } gDirectory->mkdir(dirname+"_"+to_string(i)); gDirectory->cd(dirname+"_"+to_string(i)); } else { gDirectory->mkdir(dirname); gDirectory->cd(dirname); } // display namespace XMLNsPointer_t ns = xml->GetNS(node); if (ns!=0) { printf("%*c namespace: %s refer: %s\n",level+2,' ', xml->GetNSName(ns), xml->GetNSReference(ns)); } // display attributes XMLAttrPointer_t attr = xml->GetFirstAttr(node); while (attr!=0) { printf("%*c attr: %s value: %s\n",level+2,' ', xml->GetAttrName(attr), xml->GetAttrValue(attr)); TNamed n(xml->GetAttrName(attr),xml->GetAttrValue(attr));//https://root.cern.ch/phpBB3/viewtopic.php?t=3162 n.Write(); attr = xml->GetNextAttr(attr); } // display content (if exists) const char* content = xml->GetNodeContent(node); if (content!=0) { printf("%*c cont: %s\n",level+2,' ', content); TObjString s(content); s.Write(); if(TString(xml->GetNodeName(node))=="LiveTimeDuration"||TString(xml->GetNodeName(node))=="RealTimeDuration") { const string input = content; std::regex rshort("^((?!T).)*$"); double duration = std::numeric_limits::quiet_NaN(); if (std::regex_match(input, rshort)) // no T (Time) exist { std::regex r("P([[:d:]]+Y)?([[:d:]]+M)?([[:d:]]+D)?"); duration = match_duration(input, r); } else { std::regex r("P([[:d:]]+Y)?([[:d:]]+M)?([[:d:]]+D)?T([[:d:]]+H)?([[:d:]]+M)?([[:d:]]+S|[[:d:]]+\\.[[:d:]]+S)?"); duration = match_duration(input, r); } //https://root.cern.ch/phpBB3/viewtopic.php?t=11341 TVectorD v(1); v[0]=duration; v.Write(xml->GetNodeName(node)); } else if(TString(xml->GetNodeName(node))=="ChannelData") { TString histName = "ChannelData"; XMLNodePointer_t parnode = xml->GetParent(node); if(parnode) { XMLAttrPointer_t attridp = xml->GetFirstAttr(parnode); if(attridp && TString(xml->GetAttrName(attridp))=="id") histName = xml->GetAttrValue(attridp); } TH1* h = ChannelDataToSpectrum(histName,content); if(h) h->Write(); delete h; } } // display all child nodes XMLNodePointer_t child = xml->GetChild(node); while (child!=0) { DisplayNode(xml, child, level+2); child = xml->GetNext(child); } //Go back to parent directory gDirectory->cd(".."); } double match_duration(const std::string& input, const std::regex& re) { std::smatch match; std::regex_search(input, match, re); if (match.empty()) { std::cout << "Pattern do NOT match" << std::endl; return std::numeric_limits::quiet_NaN(); } std::vector vec = {0,0,0,0,0,0}; // years, months, days, hours, minutes, seconds for (size_t i = 1; i < match.size(); ++i) { if (match[i].matched) { std::string str = match[i]; str.pop_back(); // remove last character. vec[i-1] = std::stod(str); } } double duration = 31556926 * vec[0] + // years 2629743.83 * vec[1] + // months 86400 * vec[2] + // days 3600 * vec[3] + // hours 60 * vec[4] + // minutes 1 * vec[5]; // seconds if (duration == 0) { std::cout << "Not valid input" << std::endl; return duration; } std::cout << "duration: " << duration << " [sec.]" << std::endl; return duration; } TH1* ChannelDataToSpectrum(const TString name, const TString content) { if(content=="") { cout << "Error in ChannelDataToSpectrum:" << __LINE__ << ": input content is void" << endl; return NULL; } //https://root.cern.ch/phpBB3/viewtopic.php?t=1378 char delimiters[] = " \t\r\n";//\r for windows line endings Int_t Index = 0; TObjArray* Strings = content.Tokenize(delimiters); if(!Strings) { cout << "Error in ChannelDataToSpectrum:" << __LINE__ << ": Strings is NULL, tokenize failed with this delimiter." << endl; return NULL; } const UInt_t N = Strings->GetEntriesFast(); if(N==0) { cout << "Error in ChannelDataToSpectrum:" << __LINE__ << ": no entries found in histogram, or tokenize failed with this delimiter." << endl; delete Strings; return NULL; } printf("%i:(%i tokens) =%s\n",Index, Strings->GetEntriesFast(), content.Data()); TH1* h = new TH1I(name, name, Strings->GetEntriesFast(), 0, Strings->GetEntries()); if(Strings->GetEntriesFast()) { TIter iString(Strings); TObjString* os=0; Int_t j=0; while ((os=(TObjString*)iString())) { //~ cout << j << "\t" << os << "\t" << os->GetString() << "\t" << ((os->GetString()=="")?"1":"z") << endl; h->SetBinContent(j+1,stol(os->GetString().Data())); j++; } } delete Strings; return h; }