Saving TTree with /s overcompresses data

I have a load of .txt files which contain 256x256 matrices of ascii numbers. They’re all positive integers, and the largest number is 1022.

I’ve written a python script which converts the files into a TTree with one branch which represents the 256x256 matrices and one branch which represents the order of the files.

The thing I don’t understand is that although the value for each pixel in the matrix is well below the threshold for a 16 bit unsigned integer, if I save the data with /s, when I go to read it it doesn’t resemble the input data at all, presumably having saturate the memory allocated for it. But it makes a difference of about a factor of 4 in data size if I save the files as /D so if it’s possible I’d like to save the data in the more compact format, and anyway I’d like to know what’s going wrong.

Here’s the code:

"""
Takes .txt files containing 256x256 matrices of ascii numbers, and converts them to .root files.
Each .root file contains the value of each position in each 256x256 matrix and its corresponding frame number.
.root files are saved in the corresponding directory with the original data.
"""

# pyRoot imports
import ROOT
from ROOT import TFile, TNtuple, gROOT, TROOT, TTree, AddressOf, addressof

import os, glob, numpy as np

# Path to directories to convert
file_path= "/my/file/path"
run_names = glob.glob(os.path.join(file_path, 'str1*'))
run_names+=(glob.glob(os.path.join(file_path, 'str2*')))

# Class containing functions which convert .txt to .root
class ntuple_maker:

    def __init__(self, run_path):
        # Find .txt data to be converted
        self.run_name = os.path.split(run_path)[1]
        self.dir_name = os.path.join(run_path, f"{self.run_name}_UsedFiles/")
        self.list_of_files = sorted(glob.glob(os.path.join(self.dir_name, '*')))

        # Define and open output .root file
        self.output_file_path = os.path.join(run_path, f'{self.run_name}.root')
        print('\t',self.output_file_path)
        if os.path.exists(run_path):
            self.outfile = ROOT.TFile(self.output_file_path, 'RECREATE', 'ROOT file with an NTuple')

        # Define variables to store pixel values
        self.pixel_values = np.zeros((256, 256), dtype=float)
        self.frame_number = np.zeros(1, dtype=int)

        # Create a TTree
        self.tree = ROOT.TTree("pixel_tree", "Pixel Tree")

        # Create branches in the TTree (D = 64 bit floating point, s = 16 bit unsigned integer)
        # If I change /D to /s I don't get out what I put in
        self.tree.Branch("pixel_value", self.pixel_values, f"pixel[256][256]/D")
        self.tree.Branch("frame_number", self.frame_number, "frame_number/D")

    def read_frames(self):
        print(f"\t\tNumber of files in {self.run_name}: {len(self.list_of_files)}")

        # time_idx stands for the time of the file, as represented by the order of the files
        # ascii_file is the .txt file itself
        for time_idx, ascii_file in enumerate(self.list_of_files):
            if time_idx%1000==0:
                print(f"\t\t\tProcessing file: {ascii_file}")
            if not os.path.exists(ascii_file):
                print(f"!!Warning: File {ascii_file} does not exist. Skipping...")
                continue

            # Check if the file is empty
            if os.path.getsize(ascii_file) == 0:
                print(f"!!Warning: File {ascii_file} is empty. Skipping...")
                continue
            
            # Set TTree variables correctly
            self.pixel_values[:] = np.loadtxt(ascii_file)
            self.frame_number[0] = time_idx

            # To save space, variables are saved as 16 bit unsigned integers.
            # Check that neither time_idx or any pixel_value exceeds this
            if np.amax(self.pixel_values) > 65535:
                print("!* Warning: pixel value too high, change pixel[256][256]/s to pixel[256][256]/i or higher *!")
            
            if self.frame_number > 65535:
                print("!* Warning: frame number too high, change frame_number/s to frame_number/i or higher *!")

            # Fill TTrree
            self.tree.Fill()
            
    def close_file(self):
        # Write and close .root output file
        try:
            self.outfile.Write()
            self.outfile.Close()
            print('\t\t\t\twritten',self.output_file_path)
        except:
            print("!!Warning: Couldn't write output file")

for run in run_names:
    ntuples = ntuple_maker(run)
    ntuples.read_frames()
    ntuples.close_file()

Hi, perhaps @mdessole can help by chiming in.

Cheers,
D

Hi @bethlong06,
thanks for reaching out!
Can you please detail the assumptions of your code (e.g. what the txt files’ names are, where these files should be stores, what is the folder structure that your code expects, etc)? It would be great if you could as well attach a sample txt file that causes failure.

Cheers,
Monica

The file structure I think should be clear from the example I posted, here’s a test file which reproduces the problem. I’m also attaching the code I use to read the root file

import ROOT
from ROOT import TFile, TNtuple, gROOT, TROOT, TTree, AddressOf, addressof, TH2D

import os, glob, numpy as np

file_path= "test/test.root"
scan_file = ROOT.TFile(file_path,"read")

tree = scan_file.Get("pixel_tree")

integrated_pixels = np.zeros((256,256),dtype=np.float64)

for frame in tree:
    pixel_values_array = np.array(frame.pixel_value)
    
    # Reshape the 1D array to 2D
    pixel_values_2d = pixel_values_array.reshape((256, 256))
    
    # Add to integrated_pixels
    integrated_pixels += pixel_values_2d

# Close the ROOT file
scan_file.Close()

print(integrated_pixels)

# Create a TH2D histogram
hist = ROOT.TH2D("integrated_hist", "Integrated Pixels", 256, 0, 256, 256, 0, 256)

# Fill the histogram with the integrated pixel values
for ii in range(256):
    for jj in range(256):
        hist.SetBinContent(ii + 1, jj + 1, integrated_pixels[ii, jj])

canvas = ROOT.TCanvas("c0", "c0", 900, 700)
hist.Draw("colz")
#canvas.SetLogz()

ROOT.gPad.Update()

test_file.txt (128.7 KB)

Hi @bethlong06, I asked for the folder structure that you code expects, namely “./str1/str1_UsedFiles/”. It is important that you provide all the necessary information to run your code, in order to make us able to reproduce your situation in the least possible time.

After some attempts with your code, I found out that it actually works well provided that your numpy array has the correct data type when you save with /s. Namely, you should define self.pixel_values = np.zeros((256, 256), dtype=np.uint16) instead of self.pixel_values = np.zeros((256, 256), dtype=float).

Thank you, this solved the problem.

This topic was automatically closed 14 days after the last reply. New replies are no longer allowed.