Solve IML task 1a
This commit is contained in:
95
IML Projects/Task 1a/data/template_solution.py
Normal file
95
IML Projects/Task 1a/data/template_solution.py
Normal file
@@ -0,0 +1,95 @@
|
||||
# This serves as a template which will guide you through the implementation of this task. It is advised
|
||||
# to first read the whole template and get a sense of the overall structure of the code before trying to fill in any of the TODO gaps.
|
||||
# First, we import necessary libraries:
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from sklearn.model_selection import KFold
|
||||
|
||||
# Add any additional imports here (however, the task is solvable without using
|
||||
# any additional imports)
|
||||
# import ...
|
||||
|
||||
def fit(X, y, lam):
|
||||
"""
|
||||
This function receives training data points, then fits the ridge regression on this data
|
||||
with regularization hyperparameter lambda. The weights w of the fitted ridge regression
|
||||
are returned.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X: matrix of floats, dim = (135,13), inputs with 13 features
|
||||
y: array of floats, dim = (135,), input labels
|
||||
lam: float. lambda parameter, used in regularization term
|
||||
|
||||
Returns
|
||||
----------
|
||||
w: array of floats: dim = (13,), optimal parameters of ridge regression
|
||||
"""
|
||||
weights = np.zeros((13,))
|
||||
# TODO: Enter your code here
|
||||
assert weights.shape == (13,)
|
||||
return weights
|
||||
|
||||
|
||||
def calculate_RMSE(w, X, y):
|
||||
"""This function takes test data points (X and y), and computes the empirical RMSE of
|
||||
predicting y from X using a linear model with weights w.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
w: array of floats: dim = (13,), optimal parameters of ridge regression
|
||||
X: matrix of floats, dim = (15,13), inputs with 13 features
|
||||
y: array of floats, dim = (15,), input labels
|
||||
|
||||
Returns
|
||||
----------
|
||||
rmse: float: dim = 1, RMSE value
|
||||
"""
|
||||
rmse = 0
|
||||
# TODO: Enter your code here
|
||||
assert np.isscalar(rmse)
|
||||
return rmse
|
||||
|
||||
|
||||
def average_LR_RMSE(X, y, lambdas, n_folds):
|
||||
"""
|
||||
Main cross-validation loop, implementing 10-fold CV. In every iteration (for every train-test split), the RMSE for every lambda is calculated,
|
||||
and then averaged over iterations.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
X: matrix of floats, dim = (150, 13), inputs with 13 features
|
||||
y: array of floats, dim = (150, ), input labels
|
||||
lambdas: list of floats, len = 5, values of lambda for which ridge regression is fitted and RMSE estimated
|
||||
n_folds: int, number of folds (pieces in which we split the dataset), parameter K in KFold CV
|
||||
|
||||
Returns
|
||||
----------
|
||||
avg_RMSE: array of floats: dim = (5,), average RMSE value for every lambda
|
||||
"""
|
||||
RMSE_mat = np.zeros((n_folds, len(lambdas)))
|
||||
|
||||
# TODO: Enter your code here. Hint: Use functions 'fit' and 'calculate_RMSE' with training and test data
|
||||
# and fill all entries in the matrix 'RMSE_mat'
|
||||
|
||||
avg_RMSE = np.mean(RMSE_mat, axis=0)
|
||||
assert avg_RMSE.shape == (5,)
|
||||
return avg_RMSE
|
||||
|
||||
|
||||
# Main function. You don't have to change this
|
||||
if __name__ == "__main__":
|
||||
# Data loading
|
||||
data = pd.read_csv("train.csv")
|
||||
y = data["y"].to_numpy()
|
||||
data = data.drop(columns="y")
|
||||
# print a few data samples
|
||||
print(data.head())
|
||||
|
||||
X = data.to_numpy()
|
||||
# The function calculating the average RMSE
|
||||
lambdas = [0.1, 1, 10, 100, 200]
|
||||
n_folds = 10
|
||||
avg_RMSE = average_LR_RMSE(X, y, lambdas, n_folds)
|
||||
# Save results in the required format
|
||||
np.savetxt("./results.csv", avg_RMSE, fmt="%.12f")
|
||||
Reference in New Issue
Block a user