ml/as1_Maggioni_Claudio/src/build_models.py

""" Define, train and save the linear model and the non-linear model """

from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from tensorflow.keras import Input, Model
from tensorflow.keras.models import Sequential, save_model
from tensorflow.keras.layers import Dense
from tensorflow.keras import losses
from sklearn.metrics import mean_squared_error
from utils import save_sklearn_model, save_keras_model
import os
import random
import numpy as np
import tensorflow as tf
from keras import backend as K

#
# FIX THE RANDOM GENERATOR SEEDS
#

# The random generator seed is set to a fixed value for reproducibility
# purposes. Since the libraries use different random generators, we set them
# all to the fixed value below
SEED_VALUE = 0

os.environ['PYTHONHASHSEED'] = str(SEED_VALUE)
random.seed(SEED_VALUE)
np.random.seed(SEED_VALUE)
tf.random.set_seed(SEED_VALUE)
session_conf = tf.compat.v1.ConfigProto(intra_op_parallelism_threads=1,
                                        inter_op_parallelism_threads=1)
sess = tf.compat.v1.Session(graph=tf.compat.v1.get_default_graph(),
                            config=session_conf)
tf.compat.v1.keras.backend.set_session(sess)

# Load data
data = np.load("../data/data.npz")
xs = data["x"]
y = data["y"]
points = np.shape(xs)[0]

#
# LINEAR MODEL
#

print("# Linear regression:")

# We manually include in the feature vectors a '1' column corresponding to
# theta_0, so disable the built in intercept in Sci-kit learn
lr = LinearRegression(fit_intercept=False)

# Build X feature matrix with columns for theta_3 and theta_4
X = np.zeros([points, 5])
X[:, 0] = 1
X[:, 1:3] = xs
X[:, 3] = xs[:, 0] * xs[:, 1]
X[:, 4] = np.sin(xs[:, 0])

# Shuffle and split our data for division in training, and test set
TRAIN_SET_RATIO = 0.1
X_t, X_test, y_t, y_test = train_test_split(X, y, test_size=TRAIN_SET_RATIO)

# Fit with train data
reg = lr.fit(X_t, y_t)

# Print the resulting parameters
print("f(x) = %g + %g * x_1 + %g * x_2 + %g * x_1 * x_2 + %g * sin(x_1)" %
      tuple(reg.coef_))

# Save the model as .pickle
save_sklearn_model(reg, "../deliverable/linear_regression.pickle")

#
# NON-LINEAR MODEL
#

print("\n# Feed-forward NN:")

# Divide previously found training set (X_t, y_t) in another training and a
# validation set.  This division is used for the FFNN training and architecture
# design/tailoring
VALIDATION_SET_RATIO = 0.1
X_train, X_val, y_train, y_val = \
        train_test_split(X_t, y_t, test_size=VALIDATION_SET_RATIO)
np.savez('test', x=X_test, y=y_test, allow_pickle=True)

# Drop additional features added before
X_train = X_train[:, 1:3]
X_val = X_val[:, 1:3]

# Compute mean and std for each feature in the training set
mean = np.mean(X_train, axis=0)
std = np.std(X_train, axis=0)

# Normalize training data according to the mean and variance
X_train -= mean
X_train /= std

# Normalize validation data as well. All further inputs to the NN must be
# normalized using the value `mean` and `std` computed before. Normalization is
# necessary to increase the speed of the learning process
X_val -= mean
X_val /= std

# Define the network's architecture
network = Sequential()
network.add(Dense(22, activation='tanh'))
network.add(Dense(15, activation='sigmoid'))
network.add(Dense(1, activation='linear'))
network.compile(optimizer='adam', loss='mse')

# Define maximum number of iterations and early stopping procedure
EPOCHS = 5000
callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=120)

# Fit the model monitoring validation in the learning process
network.fit(X_train, y_train, epochs=EPOCHS, verbose=1,
            validation_data=(X_val, y_val), callbacks=[callback])

# Save the fitted model and the normalization parameters as well
network.save("../deliverable/nonlinear_model")
save_sklearn_model({"mean": mean, "std": std},
                   "../deliverable/nonlinear_model_normalizers.pickle")

# Print the final validation set MSE, which was used to tailor the NN
# architecture after several manual trials
msq = mean_squared_error(network.predict(X_val), y_val)
print("Final validation MSE for FFNN: %g" % msq)

# vim: set ts=4 sw=4 et tw=79: