ValueError: `logits` and `labels` must have the same shape, received ((None, 1) vs (None, 2))

conrado99 · June 21, 2022, 9:38pm

Hi,
I’m training two networks with PyKeras and i obtain an error: ValueError: logitsandlabels must have the same shape, received ((None, 1) vs (None, 2)) when i define my model such as:

model = Sequential()

model.add(Dense(70, kernel_initializer='glorot_normal', activation='relu', input_dim=6))
model.add(Dropout(0.2))
model.add(Dense(50, kernel_initializer='glorot_normal', activation='relu', kernel_constraint=max_norm(1.)))
model.add(Dropout(0.2))
model.add(Dense(30, kernel_initializer='glorot_normal', activation='relu', kernel_constraint=max_norm(1.)))
model.add(Dropout(0.2))
model.add(Dense(30, kernel_initializer='glorot_normal', activation='relu', kernel_constraint=max_norm(1.)))
model.add(Dropout(0.1))
model.add(Dense(20, kernel_initializer='glorot_normal', activation='relu', kernel_constraint=max_norm(1.)))
model.add(Dropout(0.1))
model.add(Dense(10, kernel_initializer='glorot_normal', activation='relu', kernel_constraint=max_norm(1.)))

model.add(Dense(1, kernel_initializer='glorot_normal', activation='sigmoid'))

and i use a datolader to update a signal Tree and a backgrond tree. This is the code:

import time

from os import environ
environ['KERAS_BACKEND'] = 'theano'

# Set architecture of system (AVX instruction set is not supported on SWAN)
environ['THEANO_FLAGS'] = 'gcc.cxxflags=-march=corei7'
 
from ROOT import TMVA, TFile, TTree, TCut
from subprocess import call
from os.path import isfile



import tensorflow.keras
from keras.utils import np_utils
import tensorflow.keras.callbacks as cb
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation
from tensorflow.keras import regularizers
from tensorflow.keras import backend as K
from tensorflow.keras import optimizers
from tensorflow.keras import callbacks
from tensorflow.keras.constraints import max_norm


 

isW=False


# Setup TMVA
TMVA.Tools.Instance()
TMVA.PyMethodBase.PyInitialize()
if isW: output = TFile.Open('TMVA_WpairJets.root', 'RECREATE')
else: output = TFile.Open('TMVA_ToppairJets.root', 'RECREATE')
factory = TMVA.Factory('TMVAClassification', output,
                       '!V:!Silent:Color:DrawProgressBar:Transformations=I;D;P;G,D:AnalysisType=Classification')
 
# Load data
#if not isfile('tmva_class_example.root'):
#    call(['curl', '-L', '-O', 'http://root.cern.ch/files/tmva_class_example.root'])
 
data = TFile.Open('SgnBkgTrees/SgnBckTree_UNICANttCatGenInfo-v1_FullSelStep_FNano2017-v7_ttCat_TTToSemiLeptonic_TuneCP5_13TeV-powheg-pythia8.root')
signal = data.Get('SgnPairTree')
if isW: background = data.Get('BckWPairTree')
else: background = data.Get('BckTopPairTree')

n_sgn_train = int(signal.GetEntries()/2)
n_bck_train = int(background.GetEntries()/2)

 
dataloader = TMVA.DataLoader('dataset')
for branch in signal.GetListOfBranches():
    dataloader.AddVariable(branch.GetName())
 
dataloader.AddSignalTree(signal, 1.0)
dataloader.AddBackgroundTree(background, 1.0)
dataloader.PrepareTrainingAndTestTree(TCut(''),
                                      'nTrain_Signal='+str(n_sgn_train)+':nTrain_Background='+str(n_bck_train)+':SplitMode=Random:!V')#NormMode=NumEvents:!V')
 
# Generate model
 
# Define model
model = Sequential()

model.add(Dense(70, kernel_initializer='glorot_normal', activation='relu', input_dim=6))
model.add(Dropout(0.2))
model.add(Dense(50, kernel_initializer='glorot_normal', activation='relu', kernel_constraint=max_norm(1.)))
model.add(Dropout(0.2))
model.add(Dense(30, kernel_initializer='glorot_normal', activation='relu', kernel_constraint=max_norm(1.)))
model.add(Dropout(0.2))
model.add(Dense(30, kernel_initializer='glorot_normal', activation='relu', kernel_constraint=max_norm(1.)))
model.add(Dropout(0.1))
model.add(Dense(20, kernel_initializer='glorot_normal', activation='relu', kernel_constraint=max_norm(1.)))
model.add(Dropout(0.1))
model.add(Dense(10, kernel_initializer='glorot_normal', activation='relu', kernel_constraint=max_norm(1.)))

model.add(Dense(1, kernel_initializer='glorot_normal', activation='sigmoid'))


# Set loss and optimizer
model.compile(loss='binary_crossentropy', optimizer=optimizers.RMSprop(0.001, decay=0.001/60), metrics=['accuracy'])


print('Model compiled')


print('Training model...')

n_epochs = 50
n_batch = 128

start_time = time.time()

 
# Store model to file
if isW: model.save('model_W.h5')
else: model.save('model_Top.h5')
model.summary()
 
# Book methods
#factory.BookMethod(dataloader, TMVA.Types.kFisher, 'Fisher',
#                   '!H:!V:Fisher:VarTransform=D,G')
if isW: factory.BookMethod(dataloader, TMVA.Types.kPyKeras, 'PyKeras', 'H:!V:VarTransform=D,G:FilenameModel=model_W.h5:NumEpochs='+str(n_epochs)+':BatchSize='+str(n_batch)+':verbose=2')
else: factory.BookMethod(dataloader, TMVA.Types.kPyKeras, 'PyKeras', 'H:!V:VarTransform=D,G:FilenameModel=model_Top.h5:NumEpochs='+str(n_epochs)+':BatchSize='+str(n_batch)+':verbose=2')

# Run training, test and evaluation
factory.TrainAllMethods()
factory.TestAllMethods()
factory.EvaluateAllMethods()

But if i use this way to train the model (updating the variables in a different form):

training = model.fit(x_train, y_train, epochs=n_epochs,validation_split=0.25, batch_size=n_batch,
                             callbacks = [callbacks.EarlyStopping(monitor='val_loss', patience=100, verbose=1)],
                             verbose=2, shuffle= True)

it works with the same model structure. If i use a layer with 2 nodes at the end of the model, the first way works too, but i think for a binary neural network the last layer must have only one node, and it return values from 0 to 1.

Does anybody know why occurs that?
Are the two methods equivalents if i use in the second one a validation_split=0.5?

Thanks.

eguiraud · June 22, 2022, 10:06am

Hi @conrado99 ,

we need @moneta here, let’s ping him.

moneta · June 22, 2022, 1:39pm

Hi,

Internally PyKeras requires the last layer to have two outputs in binary classification, and n in case of n-class classification. It is true for binary classification you can use only one, but it is at the end equivalent of using 2, just slightly a bit more computation.

This has nothing to do with the validation_split which splits the data in training and validation.

Lorenzo