104 lines
2.9 KiB
Python
104 lines
2.9 KiB
Python
import numpy as np
|
|
from sklearn.linear_model import LinearRegression
|
|
from sklearn.model_selection import train_test_split
|
|
from tensorflow.keras import Input, Model
|
|
from tensorflow.keras.models import Sequential, save_model
|
|
from tensorflow.keras.layers import Dense
|
|
from tensorflow.keras import losses
|
|
from tensorflow import keras
|
|
import tensorflow as tf
|
|
from sklearn.metrics import mean_squared_error
|
|
from utils import save_sklearn_model, save_keras_model
|
|
import os
|
|
|
|
d = os.path.dirname(__file__)
|
|
|
|
data = np.load(d + "/../data/data.npz")
|
|
xs = data["x"] # 2000x2
|
|
y = data["y"] # 2000x1
|
|
points = 2000
|
|
|
|
# We manually include in the feature vectors a '1' column corresponding to theta_0,
|
|
# so disable
|
|
lr = LinearRegression(fit_intercept=False)
|
|
|
|
# Build x feature vector with columns for theta_3 and theta_4
|
|
# variable name explained here: https://vimeo.com/380021022
|
|
X = np.zeros([points, 5])
|
|
X[:, 0] = 1
|
|
X[:, 1:3] = xs
|
|
X[:, 3] = xs[:, 0] * xs[:, 1]
|
|
X[:, 4] = np.sin(xs[:, 0])
|
|
|
|
# Shuffle our data for division in training, and test set
|
|
np.random.seed(0) # seed the generation for reproducibility purposes
|
|
|
|
train_ratio = 0.1
|
|
validation_ratio = 0.1
|
|
|
|
X_t, X_test, y_t, y_test = train_test_split(X, y, test_size=train_ratio)
|
|
X_train, X_val, y_train, y_val = train_test_split(X_t, y_t, test_size=validation_ratio)
|
|
|
|
# Fit with train data
|
|
reg = lr.fit(X_t, y_t)
|
|
|
|
print("# Linear regression:")
|
|
|
|
# Print the resulting parameters
|
|
print("f(x) = %g + %g * x_1 + %g * x_2 + %g * x_1 * x_2 + %g * sin(x_1)" % tuple(reg.coef_))
|
|
|
|
save_sklearn_model(reg, d + "/../deliverable/linear_regression.pickle")
|
|
|
|
# Test using MSQ on test set
|
|
score = reg.score(X_test, y_test)
|
|
print("MSQ error on test set is: %g" % (score))
|
|
|
|
### Non-linear regression:
|
|
|
|
print("\n# Feed-forward NN:")
|
|
|
|
A = X_val
|
|
|
|
X_train = X_train[:, 1:3]
|
|
X_val = X_val[:, 1:3]
|
|
|
|
# X_train = X_train[:, 1:3]
|
|
# X_val = X_val[:, 1:3]
|
|
|
|
mean = np.mean(X_train, axis=0)
|
|
std = np.std(X_train, axis=0)
|
|
|
|
X_train -= mean
|
|
X_train /= std
|
|
|
|
X_val -= mean
|
|
X_val /= std
|
|
|
|
network = Sequential()
|
|
network.add(Dense(30, activation='relu'))
|
|
network.add(Dense(20, activation='relu'))
|
|
network.add(Dense(20, activation='relu'))
|
|
network.add(Dense(10, activation='relu'))
|
|
network.add(Dense(5, activation='relu'))
|
|
network.add(Dense(3, activation='relu'))
|
|
network.add(Dense(2, activation='relu'))
|
|
network.add(Dense(1, activation='linear'))
|
|
network.compile(optimizer='rmsprop', loss='mse', metrics=['mse'])
|
|
|
|
epochs = 100000
|
|
callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=40)
|
|
network.fit(X_train, y_train, epochs=epochs, verbose=1, batch_size=15,
|
|
validation_data=(X_val, y_val), callbacks=[callback])
|
|
|
|
network.save(d + "/../deliverable/nonlinear_model")
|
|
save_sklearn_model({"mean": mean, "std": std}, d + "/../deliverable/nonlinear_model_normalizers.pickle")
|
|
|
|
msq = mean_squared_error(network.predict(X_val), y_val)
|
|
print(msq)
|
|
|
|
X_test = X_test[:, 1:3]
|
|
X_test -= mean
|
|
X_test /= std
|
|
msq = mean_squared_error(network.predict(X_test), y_test)
|
|
print(msq)
|