Hi,
up until some time ago I used a script similar to this to train a bunch of keras models to tune the parameters
def getKerasModel(inputDim, modelName, nLayers = 3, layerSize = 200, dropValue = 0.2, optLabel = 'adam'):
model = Sequential()
model.add(Dense(layerSize, activation='relu', kernel_initializer='normal', input_dim=inputDim))
if dropValue != 0:
model.add(Dropout(dropValue))
for i in range(1, nLayers):
model.add(Dense(layerSize, activation='relu', kernel_initializer='normal'))
if dropValue != 0:
model.add(Dropout(dropValue))
model.add(Dense(2, activation='softmax'))
opt = Adam(lr=0.001)
if optLabel == 'sgd':
opt = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy', optimizer=opt, metrics=['accuracy'])
model.save(modelName)
model.summary()
return
...
...
...
ntu_nLayers = (2, 3, 4)
ntu_layerSize = (100, 150, 200)
ntu_dropValue = (0.0, 0.1, 0.2, 0.3)
for nLayers in ntu_nLayers:
for layerSize in ntu_layerSize:
for dropValue in ntu_dropValue:
suffix = '_' + str(nLayers) + '_' + str(layerSize) + '_' + str(int(dropValue*10))
modelName = 'modelFullScan' + suffix +'.h5'
getKerasModel(nVars, modelName, nLayers, layerSize, dropValue)
dnnOptions = '!H:!V:FilenameModel=' + modelName + ':NumEpochs=15:TriesEarlyStopping=5:BatchSize=1024:ValidationSize=30%'
dnnName = 'DNNMuonIDFullScan' + suffix
factory.BookMethod(dataloader, TMVA.Types.kPyKeras, dnnName, dnnOptions + preprocessingOptions)
print(modelName)
print(dnnOptions + preprocessingOptions)
factory.TrainAllMethods()
factory.TestAllMethods()
factory.EvaluateAllMethods()
This suddenly stopped to work and crash
Let’s downscale a bit.
This works:
model1 = Sequential()
model1.add(Dense(100, activation='relu', kernel_initializer='normal', input_dim=nVars))
model1.add(Dense(100, activation='relu', kernel_initializer='normal'))
model1.add(Dense(2, activation='softmax'))
model1.compile(loss='categorical_crossentropy', optimizer=Adam(lr=0.001), metrics=['accuracy'])
model1.save('a1.h5')
model1.summary()
dnnOptions = '!H:!V:FilenameModel=a1.h5:NumEpochs=15:TriesEarlyStopping=5:BatchSize=1024:ValidationSize=30%'
factory.BookMethod(dataloader, TMVA.Types.kPyKeras, 'DNNMuonIDFullScanA1', dnnOptions + preprocessingOptions)
# Run training, test and evaluation
factory.TrainAllMethods()
factory.TestAllMethods()
factory.EvaluateAllMethods()
but this DOES NOT
model1 = Sequential()
model1.add(Dense(100, activation='relu', kernel_initializer='normal', input_dim=nVars))
model1.add(Dense(100, activation='relu', kernel_initializer='normal'))
model1.add(Dense(2, activation='softmax'))
model1.compile(loss='categorical_crossentropy', optimizer=Adam(lr=0.001), metrics=['accuracy'])
model1.save('a1.h5')
model1.summary()
dnnOptions = '!H:!V:FilenameModel=a1.h5:NumEpochs=15:TriesEarlyStopping=5:BatchSize=1024:ValidationSize=30%'
factory.BookMethod(dataloader, TMVA.Types.kPyKeras, 'DNNMuonIDFullScanA1', dnnOptions + preprocessingOptions)
print(dnnOptions)
model2 = Sequential()
model2.add(Dense(150, activation='relu', kernel_initializer='normal', input_dim=nVars))
model2.add(Dense(150, activation='relu', kernel_initializer='normal'))
model2.add(Dense(2, activation='softmax'))
model2.compile(loss='categorical_crossentropy', optimizer=Adam(lr=0.001), metrics=['accuracy'])
model2.save('a2.h5')
model2.summary()
dnnOptions = '!H:!V:FilenameModel=a2.h5:NumEpochs=15:TriesEarlyStopping=5:BatchSize=1024:ValidationSize=30%'
factory.BookMethod(dataloader, TMVA.Types.kPyKeras, 'DNNMuonIDFullScanA2', dnnOptions + preprocessingOptions)
# Run training, test and evaluation
factory.TrainAllMethods()
factory.TestAllMethods()
factory.EvaluateAllMethods()
It crashes with the following error (which I am not able to interpret) as soon as the training begins
Epoch 1/15
<WARNING> : Failed to run python code: history = model.fit(trainX, trainY, sample_weight=trainWeights, batch_size=batchSize, epochs=numEpochs, verbose=verbose, validation_data=(valX, valY, valWeights), callbacks=callbacks)
<WARNING> : Python error message:
Traceback (most recent call last):
File "<string>", line 1, in <module>
File "/usr/local/lib/python2.7/dist-packages/keras/models.py", line 963, in fit
validation_steps=validation_steps)
File "/usr/local/lib/python2.7/dist-packages/keras/engine/training.py", line 1712, in fit
validation_steps=validation_steps)
File "/usr/local/lib/python2.7/dist-packages/keras/engine/training.py", line 1235, in _fit_loop
outs = f(ins_batch)
File "/usr/local/lib/python2.7/dist-packages/keras/backend/tensorflow_backend.py", line 2475, in __call__
**self.session_kwargs)
File "/home/alberto/.local/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 929, in run
run_metadata_ptr)
File "/home/alberto/.local/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 1152, in _run
feed_dict_tensor, options, run_metadata)
File "/home/alberto/.local/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 1328, in _do_run
run_metadata)
File "/home/alberto/.local/lib/python2.7/site-packages/tensorflow/python/client/session.py", line 1348, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.FailedPreconditionError: Attempting to use uninitialized value dense_2_1/kernel
[[node dense_2_1/kernel/read (defined at /usr/local/lib/python2.7/dist-packages/keras/backend/tensorflow_backend.py:392) ]]
Caused by op u'dense_2_1/kernel/read', defined at:
File "trainingMuonIDscan.py", line 198, in <module>
factory.BookMethod(dataloader, TMVA.Types.kPyKeras, 'DNNMuonIDFullScanA1', dnnOptions + preprocessingOptions)
File "<string>", line 1, in <module>
File "/usr/local/lib/python2.7/dist-packages/keras/models.py", line 243, in load_model
model = model_from_config(model_config, custom_objects=custom_objects)
File "/usr/local/lib/python2.7/dist-packages/keras/models.py", line 317, in model_from_config
return layer_module.deserialize(config, custom_objects=custom_objects)
File "/usr/local/lib/python2.7/dist-packages/keras/layers/__init__.py", line 55, in deserialize
printable_module_name='layer')
File "/usr/local/lib/python2.7/dist-packages/keras/utils/generic_utils.py", line 144, in deserialize_keras_object
list(custom_objects.items())))
File "/usr/local/lib/python2.7/dist-packages/keras/models.py", line 1350, in from_config
model.add(layer)
File "/usr/local/lib/python2.7/dist-packages/keras/models.py", line 492, in add
output_tensor = layer(self.outputs[0])
File "/usr/local/lib/python2.7/dist-packages/keras/engine/topology.py", line 590, in __call__
self.build(input_shapes[0])
File "/usr/local/lib/python2.7/dist-packages/keras/layers/core.py", line 842, in build
constraint=self.kernel_constraint)
File "/usr/local/lib/python2.7/dist-packages/keras/legacy/interfaces.py", line 91, in wrapper
return func(*args, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/keras/engine/topology.py", line 414, in add_weight
constraint=constraint)
File "/usr/local/lib/python2.7/dist-packages/keras/backend/tensorflow_backend.py", line 392, in variable
v = tf.Variable(value, dtype=tf.as_dtype(dtype), name=name)
File "/home/alberto/.local/lib/python2.7/site-packages/tensorflow/python/ops/variables.py", line 213, in __call__
return cls._variable_v1_call(*args, **kwargs)
File "/home/alberto/.local/lib/python2.7/site-packages/tensorflow/python/ops/variables.py", line 176, in _variable_v1_call
aggregation=aggregation)
File "/home/alberto/.local/lib/python2.7/site-packages/tensorflow/python/ops/variables.py", line 155, in <lambda>
previous_getter = lambda **kwargs: default_variable_creator(None, **kwargs)
File "/home/alberto/.local/lib/python2.7/site-packages/tensorflow/python/ops/variable_scope.py", line 2495, in default_variable_creator
expected_shape=expected_shape, import_scope=import_scope)
File "/home/alberto/.local/lib/python2.7/site-packages/tensorflow/python/ops/variables.py", line 217, in __call__
return super(VariableMetaclass, cls).__call__(*args, **kwargs)
File "/home/alberto/.local/lib/python2.7/site-packages/tensorflow/python/ops/variables.py", line 1395, in __init__
constraint=constraint)
File "/home/alberto/.local/lib/python2.7/site-packages/tensorflow/python/ops/variables.py", line 1557, in _init_from_args
self._snapshot = array_ops.identity(self._variable, name="read")
File "/home/alberto/.local/lib/python2.7/site-packages/tensorflow/python/util/dispatch.py", line 180, in wrapper
return target(*args, **kwargs)
File "/home/alberto/.local/lib/python2.7/site-packages/tensorflow/python/ops/array_ops.py", line 81, in identity
ret = gen_array_ops.identity(input, name=name)
File "/home/alberto/.local/lib/python2.7/site-packages/tensorflow/python/ops/gen_array_ops.py", line 3890, in identity
"Identity", input=input, name=name)
File "/home/alberto/.local/lib/python2.7/site-packages/tensorflow/python/framework/op_def_library.py", line 788, in _apply_op_helper
op_def=op_def)
File "/home/alberto/.local/lib/python2.7/site-packages/tensorflow/python/util/deprecation.py", line 507, in new_func
return func(*args, **kwargs)
File "/home/alberto/.local/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 3300, in create_op
op_def=op_def)
File "/home/alberto/.local/lib/python2.7/site-packages/tensorflow/python/framework/ops.py", line 1801, in __init__
self._traceback = tf_stack.extract_stack()
FailedPreconditionError (see above for traceback): Attempting to use uninitialized value dense_2_1/kernel
[[node dense_2_1/kernel/read (defined at /usr/local/lib/python2.7/dist-packages/keras/backend/tensorflow_backend.py:392) ]]
<FATAL> : Failed to train model
***> abort program execution
Traceback (most recent call last):
File "trainingMuonIDscan.py", line 214, in <module>
factory.TrainAllMethods()
Exception: void TMVA::Factory::TrainAllMethods() =>
FATAL error (C++ exception of type runtime_error)
Curiously:
- it crashes at the training of the first model (not the second)
- it does not crashes if I define the model but do not book it into the factory
- it crashes even if I book two times the first model
In other words as soon as I book a second pyKeras model the training fails. I’m 100% that this did not happened in the past (I’m simply rehashing old studies)
What am I missing?
Thanks for any help,
Alberto
ROOT: 6.17/01
Keras: 2.1.4