Error when reading .CSV file

I am trying to generate plots by reading .CSV, but i keep getting " ValueError: could not convert string to float: '23.3542"

import pandas as pd
import numpy as np
import os
import matplotlib.mlab as mlab
import matplotlib.pyplot as plt
import sys
working_directory = os.getcwd()
print(working_directory)

path = working_directory + '/Downloads/export.csv'
rows_to_skip= [1,55,78] #only if there is incomplete entry
df = pd.read_csv(path,skiprows = rows_to_skip,usecols=(4,5,6,7,8,9,10,18))
#csv file downloaded from ELOG using 'Find'

'''
Plot the histograms
'''

'''Histogram #1: plot histogram of voltages when log(gain)=4, interpolated from gain curve'''
#data are all strings, need to convert column "Gain Offset" to a float array without uncertainty values
def offset_cleanup():
    offset=df['Gain Offset']
    offsetPar = np.zeros(len(offset)) #create an empty set to store data in for loop
    for i in range(len(offset)):
        line = offset[i]
        if isinstance(line,str):  #numbers are stored as string, "NaN"'s are float.
            count = line.index('+') #find index of '+'
            offsetPar[i] = line[:count]  #only keep digits before the found index, to get rid of uncertainty value
        else:
            offsetPar[i] = offset[i]
        i+=1
    return offsetPar
    #now offsetPar list contains no uncertainty value

def slope_cleanup():
    slope=df['Gain Slope']
    slopePar = np.zeros(len(slope))
    i = 0
    for i in range(len(slope)):
        line = slope[i]
        if isinstance(line,str):
            if len(line)>=8:
                count = line.index('+')
                slopePar[i] = line[:count]
            else:   #some don't have "+", so we can directly use the data
                slopePar[i] = line[:8]
        else:
            slopePar[i] = slope[i]
        i+=1
    return slopePar

    #now slopePar list contains no  uncertainty value

def curvature_cleanup():
    sat=df['Gain Saturation']
    curvaturePar = np.zeros(len(sat))
    for i in range(len(sat)):
        line = sat[i]
        if isinstance(line,str):
            count = line.index('e')+4
            curvaturePar[i] = line[:count]
        else:
            curvaturePar[i] = sat[i]
        i+=1
    return curvaturePar
    #now curvaturePar list contains no uncertainty value

#to find voltage when log(gain)=4
def get_voltages(gain):   #function output depends on gain value
    voltList = []
    for i in range(df.shape[0]):
        curvature = curvature_cleanup()
        slope = slope_cleanup()
        offset = offset_cleanup()
        a= curvature[i]
        b=slope[i]
        c=offset[i] - gain#gain is log(gain)
        coeff = [a, b, c]
        roots = np.roots(coeff)
        voltList.append(roots[1])
    return voltList  #a list of interpolated voltage values for all crystals is created

#plot the voltage value, at log(gain=4), as a histogram.
#Crytals with similar voltage values will be grouped together in experiment later on.
def make_volt_hist():
    volt_data = get_voltages(4)
    plt.hist(volt_data, bins=range(620,950,10), stacked=True, facecolor='blue',ec='black')
    plt.title("Voltages (Log(G)=4)")
    plt.xlabel('Voltage')
    plt.xticks(range(620,950,50))    #Range can be changed
    plt.ylabel('Counts')
    plt.savefig('PATH')
    plt.show()



'''Histogram #2:137Cs Position 3 Peak Resolution'''
def res_cleanup():
    res=df['137Cs Position 3 Peak Resolution']
    resPar= np.zeros (len(res))
    for i in range(len(res)):     #getting rid of uncertainty values
        line = res[i]
        if isinstance(line,str):
            count = line.index('+')
            resPar[i] = line[:count]
        else:
            resPar[i] = res[i]
        i+=1
    resList = resPar.astype(np.float)  #convert string array to float array
    return resList

#this function makes plot#2, a histogram of energy resolution
def make_res_hist():
    resList=res_cleanup()
    plt.hist(resList, bins=range(20,46,2), stacked=True, facecolor='blue',ec='black')
    plt.title("137Cs Energy Resolution")
    plt.xlabel('Calibrated Energy (keV)')
    plt.xticks(range(20,46,2))
    plt.ylabel('Counts')
    plt.savefig('PATH')
    plt.show()



'''Histogram #3:137Cs Total Energy Variation'''
def var_cleanup():
    var=df['137Cs Total Energy Variation']
    varPar= np.zeros(len(var))
    for i in range(len(var)):   #getting ride of uncertainty values
        line = var[i]
        if line[-1:]=='V':
            count = line.index('V')
            varPar[i] = line[:count-2]
        else:
            varPar[i] = line
        i+=1
    varList = varPar.astype(np.float)
    return varList

def make_var_hist():
    varList=var_cleanup()
    plt.hist(varList, bins=range(0,56,2),stacked=True, facecolor='blue',ec='black')
    plt.title("137Cs Peak Energy Variation with Source Position")
    plt.xlabel('Calibrated Energy (keV)')
    plt.xticks(range(0,56,5))
    plt.ylabel('Counts')
    plt.yticks(np.arange(0, 25,step=5))
    plt.savefig('PATH')
    plt.show()





'''
make 2D plots
'''

'''2D plot#1: Resolution vs Variation '''
def res_var_2d():
    resList= res_cleanup()
    varList= var_cleanup()
    plt.hist2d(resList,varList, bins=(11,12),range=[[20,42], [0,60]],cmap='binary')
#     print(max(resList))
#     print(varList)
#     print(max(varList))
    plt.colorbar(label='number of xtals',cmap='binary')
    plt.xlabel("137Cs Position 3 Peak Resolution(keV)")
    plt.ylabel("137Cs Total Energy Variation(keV)")
    plt.xticks(range(20,44,2))
    plt.yticks(range(0,65,5))
    plt.title("Energy Variation vs Peak Resolution")
    plt.show()
    plt.savefig('PATH',dpi=100)

'''2D plot #2: Resolution vs voltage '''
def res_volt_2d():
    resList= res_cleanup()
    plt.hist2d(resList,get_voltages(4), bins=(11,10),range=[[20,42], [660,960]],cmap='binary')
    plt.colorbar(label='number of xtals',cmap='binary')
    plt.xlabel("137Cs Position 3 Peak Resolution(keV)")
    plt.ylabel("Voltages (Log(G)=4)")
    plt.xticks(range(20,44,2))
    plt.yticks(range(660,990,30))
    plt.title("Gain vs Peak Resolution")
    plt.show()
    plt.savefig('PATH',dpi=100)


'''2D plot #3: variation vs voltage '''
def var_volt_2d():
    varList= var_cleanup()
    plt.hist2d(varList,get_voltages(4), bins=(12,10),range=[[0,60], [660,960]],cmap='binary')
    #print(max(varList))
    plt.colorbar(label='number of xtals',cmap='binary')
    plt.xlabel("137Cs Total Energy Variation(keV)")
    plt.ylabel("Voltages (Log(G)=4)")
    plt.xticks(range(0,65,5))
    plt.yticks(range(660,990,30))
    plt.title("Gain vs Peak Variation")
    plt.show()
    plt.savefig('PATH',dpi=100)



'''
Identify the xtals with high resolution
'''
def poor_res_list(res_value):
    bad_xtal= []
    resList=res_cleanup()
    for i in range(len(resList)):
        if resList[i] >= res_value:
            #print(i)
            bad_xtal.append(df['Crystal SN'][i])
    return bad_xtal




make_res_hist()
make_var_hist()
make_volt_hist()

res_var_2d()
res_volt_2d()
var_volt_2d()

print('Crystals with poor resolution are', poor_res_list(30))

Hi @philomathU09; perhaps you could also attach the file export.csv so that we can reproduce.

Cheers,
J.

This topic was automatically closed 14 days after the last reply. New replies are no longer allowed.