I have the following loop to train some models on a time series.
my_seed = 7
time_frames = [4,5]
layers = [3,4,5]
----- basic data formating, always gives the same output -------
x1 = numpy.concatenate((x1,x2), axis=0)
y1 = numpy.concatenate((y1,y2), axis=0)
for l in layers:
if (w, l) in trained_models_list:
print("Model " + str(w) + "." + str(l) + " all ready trained!")
else:
print("Training -------> window: ",w, l)
----- resetting everything? -----
tf.compat.v1.reset_default_graph()
tf.keras.backend.clear_session()
tf.config.experimental.enable_op_determinism()
----- reset the seeds -----
os.environ['PYTHONHASHSEED'] = str(my_seed)
numpy.random.seed(my_seed)
tf.random.set_seed(my_seed)
tf.experimental.numpy.random.seed(my_seed)
model = keras.Sequential()
model.add(keras.layers.GRU(units=int(x1.shape[2] * w * (l / (l+1))), activation='tanh', input_shape=(x1.shape[1], x1.shape[2]), return_sequences=True, kernel_initializer=my_kernel_initializer(seed=my_seed), kernel_constraint=MaxNorm(3)))
added_dropout = False
for i in reversed(range(2, l)):
model.add(keras.layers.GRU(units=int(x1.shape[2] * w * (i / l)), activation='tanh', return_sequences=True, kernel_initializer=my_kernel_initializer(seed=my_seed), kernel_constraint=MaxNorm(3)))
if added_dropout == True:
model.add(keras.layers.Dropout(0.1))
added_dropout = False
model.add(keras.layers.GRU(units=int(x1.shape[2] * w * (1 / l)), activation='tanh', return_sequences=False, kernel_initializer=my_kernel_initializer(seed=my_seed)))
model.add(keras.layers.Dense(units=1, activation='linear', kernel_initializer=my_kernel_initializer(seed=my_seed)))
optimizerr = tf.keras.optimizers.Adam(learning_rate=0.001)
model.compile(loss=["mae"], metrics=[tf.keras.metrics.RootMeanSquaredError()], optimizer=optimizerr)
model.summary()
train_dataset = tf.data.Dataset.from_tensor_slices((x1, y1))
val_dataset = tf.data.Dataset.from_tensor_slices((X_val, Y_val))
test_dataset = tf.data.Dataset.from_tensor_slices((X_test, X_test))
train_dataset = train_dataset.batch(my_batch_size)
val_dataset = val_dataset.batch(my_batch_size)
test_dataset = test_dataset.batch(my_batch_size)
history = model.fit(
train_dataset,
epochs=my_epch,
validation_data=val_dataset,
verbose=2
)
path = "xxxx"
model.save(path)
print("SAVED!\n")
Training models in order (window, layers): (4,3)->(4,4)->(4,5)
Give different results in training compared to (4,5)->(4,4)->(4,3)
But training in the same order will always give same results. Could it be because of the GRU layers sharing some continuous zone of memory? since training in different sessions give different results ?
Data is formatted based on window size, and the issue arises within the same window size eg. 4
If you could please help me out I'd be very thankful!