Hi,
I’m trying to read a text file into an RDataFrame, and need to specify that all columns should be of type Double_t. Here is a toy example to illustrate:
{
std::ofstream outfile;
outfile.open("./toydata.csv");
outfile << "5,3\n7,8.1\n4.1,4.67\n-1.3,5.01";
outfile.close();
auto df1 = ROOT::RDF::FromCSV("./toydata.csv", false);
auto df2 = ROOT::RDF::FromCSV(
"./toydata.csv", false, ',', -1, {{"Col0", 'D'}});
auto df3 = ROOT::RDF::FromCSV(
"./toydata.csv", false, ',', -1, {{"Col1", 'D'}});
auto df4 = ROOT::RDF::FromCSV(
"./toydata.csv", false, ',', -1, {{"Col0", 'D'},{"Col1", 'D'}});
std::cout << "colTypes = {}" << std::endl;
df1.Display()->Print();
std::cout << "colTypes = {{\"Col0\", \'D\'}}" << std::endl;
df2.Display()->Print();
std::cout << "colTypes = {{\"Col1\", \'D\'}}" << std::endl;
df3.Display()->Print();
std::cout << "colTypes = {{\"Col0\", \'D\'},{\"Col1\", \'D\'}}" << std::endl;
df4.Display()->Print();
}
This prints
colTypes = {}
+-----+------+------+
| Row | Col0 | Col1 |
+-----+------+------+
| 0 | 5 | 3 |
+-----+------+------+
| 1 | 7 | 8 |
+-----+------+------+
| 2 | 4 | 4 |
+-----+------+------+
| 3 | -1 | 5 |
+-----+------+------+
colTypes = {{"Col0", 'D'}}
+-----+-----------+------+
| Row | Col0 | Col1 |
+-----+-----------+------+
| 0 | 0.0000000 | 0 |
+-----+-----------+------+
| 1 | 0.0000000 | 0 |
+-----+-----------+------+
| 2 | 0.0000000 | 0 |
+-----+-----------+------+
| 3 | 0.0000000 | 0 |
+-----+-----------+------+
colTypes = {{"Col1", 'D'}}
+-----+------+-----------+
| Row | Col0 | Col1 |
+-----+------+-----------+
| 0 | 5 | 0.0000000 |
+-----+------+-----------+
| 1 | 7 | 0.0000000 |
+-----+------+-----------+
| 2 | 4 | 0.0000000 |
+-----+------+-----------+
| 3 | -1 | 0.0000000 |
+-----+------+-----------+
colTypes = {{"Col0", 'D'},{"Col1", 'D'}}
+-----+-----------+-----------+
| Row | Col0 | Col1 |
+-----+-----------+-----------+
| 0 | 0.0000000 | 0.0000000 |
+-----+-----------+-----------+
| 1 | 0.0000000 | 0.0000000 |
+-----+-----------+-----------+
| 2 | 0.0000000 | 0.0000000 |
+-----+-----------+-----------+
| 3 | 0.0000000 | 0.0000000 |
+-----+-----------+-----------+
What is happening here? And what is the correct way to specify column type?
ROOT Version: 6.28/00 from conda-forge
Platform: macosx 10.15.7
Compiler: Not Provided