hw1: almost done, recheck for safety

2021-05-07 12:32:53 +02:00 · 2021-05-07 12:32:53 +02:00 · 07714779b6
parent dd9ad6d776
commit 07714779b6
11 changed files with 250 additions and 80 deletions
--- a/assignment_1/deliverable/linear_regression.pickle
+++ b/assignment_1/deliverable/linear_regression.pickle
--- a/assignment_1/deliverable/nonlinear_model/saved_model.pb
+++ b/assignment_1/deliverable/nonlinear_model/saved_model.pb
--- a/assignment_1/deliverable/nonlinear_model/variables/variables.data-00000-of-00001
+++ b/assignment_1/deliverable/nonlinear_model/variables/variables.data-00000-of-00001
--- a/assignment_1/deliverable/nonlinear_model/variables/variables.index
+++ b/assignment_1/deliverable/nonlinear_model/variables/variables.index
--- a/assignment_1/deliverable/nonlinear_model_normalizers.pickle
+++ b/assignment_1/deliverable/nonlinear_model_normalizers.pickle
--- a/assignment_1/deliverable/run_model.py
+++ b/assignment_1/deliverable/run_model.py
@ -61,51 +61,39 @@ if __name__ == '__main__':
    # EDITABLE SECTION OF THE SCRIPT: if you need to edit the script, do it here
    ############################################################################
    ms = {"baseline", "linear", "nonlinear"}
    if len(sys.argv) != 2:
        print("%s {model_name}" % sys.argv[0])
        sys.exit(1)
    if sys.argv[1] not in ms:
        print("model name must be one of " + str(ms))
        sys.exit(2)
    y_pred = None
    if sys.argv[1] == "baseline":
        # Load the trained model
        baseline_model_path = './baseline_model.pickle'
        baseline_model = load_model(baseline_model_path)
-        # Predict on the given samples
+    X = np.zeros([x.shape[0], 5])
-        y_pred = baseline_model.predict(x)
+    X[:, 0] = 1
-    else:
+    X[:, 1:3] = x
-        X = np.zeros([x.shape[0], 5])
+    X[:, 3] = x[:, 0] * x[:, 1]
-        X[:, 0] = 1
+    X[:, 4] = np.sin(x[:, 0])
        X[:, 1:3] = x
        X[:, 3] = x[:, 0] * x[:, 1]
        X[:, 4] = np.sin(x[:, 0])
-        if sys.argv[1] == "linear":
+    # Load the linear model
-            # Load the linear model
+    linear_model_path = './linear_regression.pickle'
-            linear_model_path = './linear_regression.pickle'
+    linear_model = load_model(linear_model_path)
            linear_model = load_model(linear_model_path)
-            # Predict on the given samples
+    # Predict on the given samples
-            y_pred = linear_model.predict(X)
+    y_pred = linear_model.predict(X)
-        else:
+    # Load saved Keras model
-            # Load saved Keras model
+    nonlinear_model = models.load_model('./nonlinear_model')
-            nonlinear_model = models.load_model('./nonlinear_model')
+    # Load saved normalizing factors for Xs
-            # Load saved normalizing factors for Xs
+    normalizers = load_model('./nonlinear_model_normalizers.pickle')
            normalizers = load_model('./nonlinear_model_normalizers.pickle')
            # Normalize Xs with normalizing factors
            X = X[:, 1:3]
            X -= normalizers["mean"]
            X /= normalizers["std"]
-            # Run model on normalized data
+    print("Linear model:")
-            y_pred = nonlinear_model.predict(X)
+    mse = evaluate_predictions(y_pred, y)
-            y_pred = y_pred[:, 0]
+    print('MSE: {}'.format(mse))
    # Normalize Xs with normalizing factors
    X = X[:, 1:3]
    X -= normalizers["mean"]
    X /= normalizers["std"]
    # Run model on normalized data
    y_pred = nonlinear_model.predict(X)
    y_pred = y_pred[:, 0]
    print("Non-linear model:")
    ############################################################################
    # STOP EDITABLE SECTION: do not modify anything below this point.
--- a/assignment_1/report_Maggioni_Claudio.md
+++ b/assignment_1/report_Maggioni_Claudio.md
@ -62,6 +62,21 @@ of your choice. Limit the assignment to 2500 words (formulas, tables,
 figures, etc., do not count as words) and do not include any code in the
 report.
 ## Premise on data splitting
 In order to perform a correct statistical analysis, I have splitted the given
 datapoints in a training set, a validation set and a test set. The test set has
 200 points (10% of the given datapoints), the validation set has 180 data points
 (10% of the remaining 90% of the given datapoint) while the training set has
 1620 datapoints (all the remaining datapoints).
 For the linear model, I will perform the linear regression using both the
 training and the validation datapoints (since there is no need to choose
 hyperparameters since the given family of models requires none). For the
 nonlinear model, which is a feedforward NN in my case, I used the training set
 to fit the model and I have used the validation set in order to choose the
 architecture of the neural network.
 ## Task 1
 Use the family of models
@ -71,11 +86,16 @@ $f(\mathbf{x}, \boldsymbol{\theta}) = \theta_0 + \theta_1 \cdot x_1 +
 to fit the data. Write in the report the formula of the model
 substituting parameters $\theta_0, \ldots, \theta_4$ with the estimates
 you've found:
-$$f(\mathbf{x}, \boldsymbol{\theta}) = \_ + \_ \cdot x_1 + \_
+
-\cdot x_2 + \_ \cdot x_1 \cdot x_2 + \_ \cdot \sin(x_1)$$
+$$f(\mathbf{x}, \boldsymbol{\theta}) = 3.13696 -0.468921 \cdot x_1
 -0.766447 \cdot x_2 + 0.0390513 \cdot x_1 x_2 -0.918346 \cdot \sin(x_1)$$
 Evaluate the test performance of your model using the mean squared error
 as performance measure.
 On the 200-datapoint test set described before, the MSE of this linear model is
 $1.22265$. This result can be reproduced by running `src/statistical_tests.py`.
 ## Task 2
 Consider any family of non-linear models of your choice to address the
@ -84,6 +104,41 @@ using the mean squared error as performance measure. Compare your model
 with the linear regression of Task 1. Which one is **statistically**
 better?
 The model I've chosen is a feed-forward neural network with 2 hidden layers:
 - one 22-neuron dense layer with hyperbolic tangent activation function;
 - one 15-neuron dense layer with sigmoidal activation function;
 Finally, the output neuron has a linear activation function. As described
 before, the validation set was used to manually tailor the NNs architecture.
 The fitted model has these final performance parameters:
 - Final MSE on validation set $\approx 0.0143344$
 - Final MSE on the test set $\approx 0.0107676$
 I conclude the NN non-linear model is better than the linear
 regression model because of the lower test set MSE. Since the test set is 200
 elements big, we can safely assume that the loss function applied on the test
 set is a resonable approximation of the structural risk for both models.
 In order to motivate this intuitive argument, I will perform a student's T-test
 to show that the non-linear model is indeed statistically better with 95%
 confidence. The test was implemented in `src/statistical_tests.py` and it has
 been performed by spitting the test set, assuming both resulting sets have an
 expectation of the noise equal to 0, and by computing mean, variance and
 T-score.
 The results of the test are the following:
 - T-test result: 5.37324
 - Linear model noise variance: 6.69426
 - Feedforward NN noise variance: $0.348799 \cdot 10^{-3}$
 The test is clearly outside of the 95% confidence interval of $[-1.96,1.96]$ and
 thus we reject the null hypothesis. Since the variance is lower for the NN we
 conclude that this model is indeed the better one statistically speaking.
 ## Task 3 (Bonus)
 In the [**Github repository of the
@ -95,6 +150,33 @@ performance of your model is better (i.e., the MSE is lower) than ours.
 Of course, you also have to tell us why you think that your model is
 better.
 In order to understand if my model is better than the baseline model, I
 performed another Student's T-test re-using the subset of the test set I've used
 for my linear model. The results of the new test are the following:
 - T-test result: 1.1777 (for a $p$ value of 0.2417396)
 - Baseline model noise variance: $0.366316 \cdot 10^{-3}$
 - Feedforward NN noise variance: $0.348799 \cdot 10^{-3}$
 This test would accept my model as the better one (as the variance is lower)
 with a 75% confidence interval, which is far from ideal but still can be
 significant and could be a good chance at having a better model on the hidden
 test set.
 To further analyze the performance of my NN w.r.t. the baseline model, I have
 computed the MSE over my entire 200-datapoints test set for both models. Here
 are the results:
 - MSE on entire test set for baseline model: 0.0151954
 - MSE on entire test set for feedforward NN model: 0.0107676
 My model shows lower MSE for this particular test set, but even if this test
 wasn't used to train both sets of course the performance may change for a
 different test set like the hidden one. The T-test performed before suggests
 that my model might have a decent chance at performing better than the baseline
 model, but I cannot say that for sure or with a reasonable (like 90% or 95%)
 confidence.
 # Questions
 ## Q1. Training versus Validation
@ -238,7 +320,7 @@ Comment and compare how the (a.) training error, (b.) test error and
    problem was proved impossible to solve by a perceptron model by Minsky and
    Papert, then that would be quite a motivation in front of my boss.
-    On a morev general (and more serious) note, the perceptron model would be
+    On a more general (and more serious) note, the perceptron model would be
    unable to solve the problem in the picture since a perceptron can solve only
    linearly-separable classification problems, and even through a simple
    graphical argument we would be unable to find a line able able to separate
@ -248,11 +330,22 @@ Comment and compare how the (a.) training error, (b.) test error and
 2.  **Would you expect to have better luck with a neural network with
    activation function $h(x) = - x \cdot e^{-2}$ for the hidden units?**
-    The activation function is still linear and data is not linearly separable
+    First of all we notice that that activation function is still linear, even
    if a $-e^{-2}$ scaling factor is applied to the output. Since the problem is
    not linearly separable, we would still have bad luck since the new family of
    models would still be inadequate for solving the problem.
 3.  **What are the main differences and similarities between the
    perceptron and the logistic regression neuron?**
-
+    The perceptron neuron and the logistic regression neuron both are dense
-
+    neurons that recieve as parameters all values from the previous layer as
-
+    single scalars. Both require a number of parameters equal to the number of
    inputs, which are used as scaling factors for the inputs recieved. The main
    difference between the neurons is the activation function: the perceptron
    uses an heavyside activation function, while the logistic regression neuron
    uses a sigmoidal function, providing a probabliistic interpretation of the
    output w.r.t. the classification problem (i.e. by instead of providing just a
    "in X class" or "not in X class" indication, the logistic regression
    neuron can output in the continuous $[0,1]$ range a probabliity of said
    element being in the class).
--- a/assignment_1/report_Maggioni_Claudio.pdf
+++ b/assignment_1/report_Maggioni_Claudio.pdf
--- a/assignment_1/src/build_models.py
+++ b/assignment_1/src/build_models.py
@ -1,22 +1,42 @@
 import numpy as np
 from sklearn.linear_model import LinearRegression
 from sklearn.model_selection import train_test_split
 from tensorflow.keras import Input, Model
 from tensorflow.keras.models import Sequential, save_model
 from tensorflow.keras.layers import Dense
 from tensorflow.keras import losses
 from tensorflow import keras
 import tensorflow as tf
 from sklearn.metrics import mean_squared_error
 from utils import save_sklearn_model, save_keras_model
 import os
 import random
 import numpy as np
 import tensorflow as tf
 from keras import backend as K
 #
 # FIX THE RANDOM GENERATOR SEEDS
 #
 seed_value = 0
 # 1. Set `PYTHONHASHSEED` environment variable at a fixed value
 os.environ['PYTHONHASHSEED'] = str(seed_value)
 # 2. Set `python` built-in pseudo-random generator at a fixed value
 random.seed(seed_value)
 # 3. Set `numpy` pseudo-random generator at a fixed value
 np.random.seed(seed_value)
 # 4. Set the `tensorflow` pseudo-random generator at a fixed value
 tf.random.set_seed(seed_value)
 # 5. Configure a new global `tensorflow` session
 session_conf = tf.compat.v1.ConfigProto(intra_op_parallelism_threads=1,
        inter_op_parallelism_threads=1)
 sess = tf.compat.v1.Session(graph=tf.compat.v1.get_default_graph(),
        config=session_conf)
 tf.compat.v1.keras.backend.set_session(sess)
 d = os.path.dirname(__file__)
-data = np.load(d + "/../data/data.npz")
+data = np.load("../data/data.npz")
 xs = data["x"] # 2000x2
 y = data["y"] # 2000x1
-points = 2000
+points = np.shape(xs)[0]
 # We manually include in the feature vectors a '1' column corresponding to theta_0,
 # so disable
@ -30,13 +50,13 @@ X[:, 3] = xs[:, 0] * xs[:, 1]
 X[:, 4] = np.sin(xs[:, 0])
 # Shuffle our data for division in training, and test set
 np.random.seed(0) # seed the generation for reproducibility purposes
 train_ratio = 0.1
 validation_ratio = 0.1
 X_t, X_test, y_t, y_test = train_test_split(X, y, test_size=train_ratio)
 X_train, X_val, y_train, y_val = train_test_split(X_t, y_t, test_size=validation_ratio)
 np.savez('test', x=X_test, y=y_test, allow_pickle=True)
 # Fit with train data
 reg = lr.fit(X_t, y_t)
@ -46,14 +66,9 @@ print("# Linear regression:")
 # Print the resulting parameters
 print("f(x) = %g + %g * x_1 + %g * x_2 + %g * x_1 * x_2 + %g * sin(x_1)" % tuple(reg.coef_))
-save_sklearn_model(reg, d + "/../deliverable/linear_regression.pickle")
+save_sklearn_model(reg, "../deliverable/linear_regression.pickle")
 # Test using MSQ on test set
 score = reg.score(X_test, y_test)
 print("MSQ error on test set is: %g" % (score))
 ### Non-linear regression:
 # Non-linear regression:
 print("\n# Feed-forward NN:")
 A = X_val
@ -61,9 +76,6 @@ A = X_val
 X_train = X_train[:, 1:3]
 X_val = X_val[:, 1:3]
 # X_train = X_train[:, 1:3]
 # X_val = X_val[:, 1:3]
 mean = np.mean(X_train, axis=0)
 std = np.std(X_train, axis=0)
@ -74,26 +86,20 @@ X_val -= mean
 X_val /= std
 network = Sequential()
-network.add(Dense(20, activation='tanh'))
+network.add(Dense(22, activation='tanh'))
-network.add(Dense(10, activation='relu'))
+network.add(Dense(15, activation='sigmoid'))
 network.add(Dense(7, activation='sigmoid'))
 network.add(Dense(5, activation='relu'))
 network.add(Dense(1, activation='linear'))
-network.compile(optimizer='rmsprop', loss='mse', metrics=['mse'])
+network.compile(optimizer='adam', loss='mse')
-epochs = 1000
+epochs = 5000
-callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=40)
+callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=120)
-network.fit(X_train, y_train, epochs=epochs, verbose=1, batch_size=15,
+network.fit(X_train, y_train, epochs=epochs, verbose=1,
-        validation_data=(X_val, y_val), callbacks=[callback])
+            validation_data=(X_val, y_val), callbacks=[callback])
-network.save(d + "/../deliverable/nonlinear_model")
+network.save("../deliverable/nonlinear_model")
-save_sklearn_model({"mean": mean, "std": std}, d + "/../deliverable/nonlinear_model_normalizers.pickle")
+save_sklearn_model({"mean": mean, "std": std}, "../deliverable/nonlinear_model_normalizers.pickle")
 # Print the final validation set MSE, which was used to tailor the NN architecture after
 # several manual trials
 msq = mean_squared_error(network.predict(X_val), y_val)
 print(msq)
 X_test = X_test[:, 1:3]
 X_test -= mean
 X_test /= std
 msq = mean_squared_error(network.predict(X_test), y_test)
 #print(msq)
--- a/assignment_1/src/statistical_test.py
+++ b/assignment_1/src/statistical_test.py
@ -0,0 +1,83 @@
 """ Statistical Student's T-test between the linear and non-linear model """
 import joblib
 import numpy as np
 from keras import models
 from sklearn.model_selection import train_test_split
 # Load test dataset (which was saved when building models)
 test = np.load('test.npz')
 X_test = test["x"]
 y_test = test["y"]
 # Load both models and the normalizers for the non-linear model)
 lr = joblib.load("../deliverable/linear_regression.pickle")
 bm = joblib.load('../deliverable/baseline_model.pickle')
 nonlinear_model = models.load_model('../deliverable/nonlinear_model')
 normalizers = joblib.load('../deliverable/nonlinear_model_normalizers.pickle')
 # Split (without shuffling, the test set has already been shuffled) the test set
 # in two evenly sized test datasets for student's T-test comparison
 X_a, X_b, Y_a, Y_b = train_test_split(X_test, y_test, test_size=0.5,
                                      shuffle=False)
 # Make sure both sets have the same size
 assert X_a.shape == X_b.shape
 # Compute the noise values for the linear model
 e_a = np.square(Y_a - lr.predict(X_a))
 # Drop the additional parameters for the linear model and normalize Xs with
 # normalizing factors
 X_b = X_b[:, 1:3]
 X_b -= normalizers["mean"]
 X_b /= normalizers["std"]
 # Compute the noise values for the feedforward NN
 e_b = np.square(Y_b - nonlinear_model.predict(X_b).flatten())
 # # of data points in both test sets
 L = X_a.shape[0]
 # Compute mean and variance on the respective noise datasets for both models
 e_mean_a = np.mean(e_a)
 var_e_a = np.sum(np.square(e_a - e_mean_a)) / (L - 1)
 e_mean_b = np.mean(e_b)
 var_e_b = np.sum(np.square(e_b - e_mean_b)) / (L - 1)
 # Compute Student's T-test
 T = (e_mean_a - e_mean_b) / np.sqrt(var_e_a / L + var_e_b / L)
 # Print results
 print("# T-test between linear and FFNN")
 print("T-test result: %g" % T)
 print("Linear model variance: %g" % var_e_a)
 print("Feedforward NN variance: %g" % var_e_b)
 # Now perform another test using the (a) test set for the baseline model
 X_a = X_a[:, 1:3]
 e_a = np.square(Y_a - bm.predict(X_a))
 e_mean_a = np.mean(e_a)
 var_e_a = np.sum(np.square(e_a - e_mean_a)) / (L - 1)
 T = (e_mean_a - e_mean_b) / np.sqrt(var_e_a / L + var_e_b / L)
 print("# T-test between baseline and FFNN")
 print("T-test result: %g" % T)
 print("Baseline model variance: %g" % var_e_a)
 print("Feedforward NN variance: %g" % var_e_b)
 # Compute MSE on entire test set for all models
 print("MSE on entire test set for linear regression model: %g" %
        (np.mean(np.square(y_test - lr.predict(X_test)))))
 X_test = X_test[:, 1:3]
 print("MSE on entire test set for baseline model: %g" %
        (np.mean(np.square(y_test - bm.predict(X_test)))))
 # Normalize Xs first for running my FFNN model
 X_test -= normalizers["mean"]
 X_test /= normalizers["std"]
 print("MSE on entire test set for feedforward NN model: %g" %
        (np.mean(np.square(y_test - nonlinear_model.predict(X_test).flatten()))))
 # vim: set ts=4 sw=4 et tw=80:
--- a/assignment_1/src/test.npz
+++ b/assignment_1/src/test.npz