I have a load of .txt
files which contain 256x256 matrices of ascii numbers. They’re all positive integers, and the largest number is 1022.
I’ve written a python script which converts the files into a TTree
with one branch which represents the 256x256 matrices and one branch which represents the order of the files.
The thing I don’t understand is that although the value for each pixel in the matrix is well below the threshold for a 16 bit unsigned integer, if I save the data with /s
, when I go to read it it doesn’t resemble the input data at all, presumably having saturate the memory allocated for it. But it makes a difference of about a factor of 4 in data size if I save the files as /D
so if it’s possible I’d like to save the data in the more compact format, and anyway I’d like to know what’s going wrong.
Here’s the code:
"""
Takes .txt files containing 256x256 matrices of ascii numbers, and converts them to .root files.
Each .root file contains the value of each position in each 256x256 matrix and its corresponding frame number.
.root files are saved in the corresponding directory with the original data.
"""
# pyRoot imports
import ROOT
from ROOT import TFile, TNtuple, gROOT, TROOT, TTree, AddressOf, addressof
import os, glob, numpy as np
# Path to directories to convert
file_path= "/my/file/path"
run_names = glob.glob(os.path.join(file_path, 'str1*'))
run_names+=(glob.glob(os.path.join(file_path, 'str2*')))
# Class containing functions which convert .txt to .root
class ntuple_maker:
def __init__(self, run_path):
# Find .txt data to be converted
self.run_name = os.path.split(run_path)[1]
self.dir_name = os.path.join(run_path, f"{self.run_name}_UsedFiles/")
self.list_of_files = sorted(glob.glob(os.path.join(self.dir_name, '*')))
# Define and open output .root file
self.output_file_path = os.path.join(run_path, f'{self.run_name}.root')
print('\t',self.output_file_path)
if os.path.exists(run_path):
self.outfile = ROOT.TFile(self.output_file_path, 'RECREATE', 'ROOT file with an NTuple')
# Define variables to store pixel values
self.pixel_values = np.zeros((256, 256), dtype=float)
self.frame_number = np.zeros(1, dtype=int)
# Create a TTree
self.tree = ROOT.TTree("pixel_tree", "Pixel Tree")
# Create branches in the TTree (D = 64 bit floating point, s = 16 bit unsigned integer)
# If I change /D to /s I don't get out what I put in
self.tree.Branch("pixel_value", self.pixel_values, f"pixel[256][256]/D")
self.tree.Branch("frame_number", self.frame_number, "frame_number/D")
def read_frames(self):
print(f"\t\tNumber of files in {self.run_name}: {len(self.list_of_files)}")
# time_idx stands for the time of the file, as represented by the order of the files
# ascii_file is the .txt file itself
for time_idx, ascii_file in enumerate(self.list_of_files):
if time_idx%1000==0:
print(f"\t\t\tProcessing file: {ascii_file}")
if not os.path.exists(ascii_file):
print(f"!!Warning: File {ascii_file} does not exist. Skipping...")
continue
# Check if the file is empty
if os.path.getsize(ascii_file) == 0:
print(f"!!Warning: File {ascii_file} is empty. Skipping...")
continue
# Set TTree variables correctly
self.pixel_values[:] = np.loadtxt(ascii_file)
self.frame_number[0] = time_idx
# To save space, variables are saved as 16 bit unsigned integers.
# Check that neither time_idx or any pixel_value exceeds this
if np.amax(self.pixel_values) > 65535:
print("!* Warning: pixel value too high, change pixel[256][256]/s to pixel[256][256]/i or higher *!")
if self.frame_number > 65535:
print("!* Warning: frame number too high, change frame_number/s to frame_number/i or higher *!")
# Fill TTrree
self.tree.Fill()
def close_file(self):
# Write and close .root output file
try:
self.outfile.Write()
self.outfile.Close()
print('\t\t\t\twritten',self.output_file_path)
except:
print("!!Warning: Couldn't write output file")
for run in run_names:
ntuples = ntuple_maker(run)
ntuples.read_frames()
ntuples.close_file()