From fea7c768b20a7cbe002846774f9945acbcb59ad7 Mon Sep 17 00:00:00 2001 From: wikbom Date: Wed, 10 May 2023 13:10:44 +0200 Subject: [PATCH] lab5 nn learning and testing scripts --- eval.py | 43 ++++++++++++++++++++++++++++++++ learning.py | 70 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 113 insertions(+) create mode 100755 eval.py create mode 100755 learning.py diff --git a/eval.py b/eval.py new file mode 100755 index 0000000..def2dae --- /dev/null +++ b/eval.py @@ -0,0 +1,43 @@ +#! /usr/bin/python3 +import numpy as np +import torch +from torch import nn +import pandas as pd + +from sklearn.metrics import accuracy_score +from sklearn.preprocessing import LabelEncoder +import torch.nn.functional as F + +class Model(nn.Module): + def __init__(self, input_dim): + super(Model, self).__init__() + self.layer1 = nn.Linear(input_dim, 50) + self.layer2 = nn.Linear(50, 20) + self.layer3 = nn.Linear(20, 2) + + def forward(self, x): + x = F.relu(self.layer1(x)) + x = F.relu(self.layer2(x)) + x = F.softmax(self.layer3(x)) + return x + + +test_df = pd.read_csv('testing_data.csv') + +X = test_df[['Pclass', 'Sex', 'Age','SibSp', 'Fare']] +Y = test_df[['Survived']] +Y = np.ravel(Y) +encoder = LabelEncoder() +encoder.fit(Y) +Y = encoder.transform(Y) + +model = Model(X.shape[1]) +model.load_state_dict(torch.load('model.pt')) + + +x_test = torch.tensor(X.values, dtype=torch.float32) +pred = model(x_test) +pred = pred.detach().numpy() +print ("The accuracy is", accuracy_score(Y, np.argmax(pred, axis=1))) + +np.savetxt('prediction.tsv', pred, delimiter='\t') \ No newline at end of file diff --git a/learning.py b/learning.py new file mode 100755 index 0000000..e0c2250 --- /dev/null +++ b/learning.py @@ -0,0 +1,70 @@ +#!/usr/bin/python3 +import numpy as np +import torch +from torch import nn +import pandas as pd +import subprocess + +from sklearn.model_selection import train_test_split +import torch.nn.functional as F +from sklearn.preprocessing import LabelEncoder + + +class Model(nn.Module): + def __init__(self, input_dim): + super(Model, self).__init__() + self.layer1 = nn.Linear(input_dim, 50) + self.layer2 = nn.Linear(50, 20) + self.layer3 = nn.Linear(20, 2) + + def forward(self, x): + x = F.relu(self.layer1(x)) + x = F.relu(self.layer2(x)) + x = F.softmax(self.layer3(x)) + return x + +def print_(loss): + print ("The loss calculated: ", loss) + +if __name__ == "__main__": + df = pd.read_csv("train.csv") + df = df.dropna() #drop NA values + + columns_to_normalize=['Age','Fare'] #NORMALIZATION + for colname in columns_to_normalize: + df[colname]=(df[colname]-df[colname].min())/(df[colname].max()-df[colname].min()) + + X = df[['Pclass', 'Sex', 'Age','SibSp', 'Fare']] #only reasonable numerical data + Y = df[['Survived']] + X.loc[:,('Sex')].replace(['female', 'male'], [0,1], inplace=True) #categorical data transformed to + + X_train, X_test, Y_train, Y_test = train_test_split(X,Y, random_state=45, test_size=0.2, shuffle=True) #split the date into train and test sets + + testing_data = pd.concat([X_test, Y_test], axis=1) + testing_data.to_csv('testing_data.csv', sep=',') + + Y_train = np.ravel(Y_train) + encoder = LabelEncoder() + encoder.fit(Y_train) + Y_train = encoder.transform(Y_train) + + Xt = torch.tensor(X_train.values, dtype = torch.float32) + Yt = torch.tensor(Y_train, dtype=torch.long) + + model = Model(Xt.shape[1]) + optimizer = torch.optim.Adam(model.parameters(), lr=0.01) + loss_fn = nn.CrossEntropyLoss() + epochs = 1000 + + #TRAINING LOOP + for epoch in range(1, epochs+1): + print("Epoch #", epoch) + y_pred = model(Xt) + loss = loss_fn(y_pred, Yt) + print_(loss.item()) + + optimizer.zero_grad() + loss.backward() + optimizer.step() + + torch.save(model.state_dict(), 'model.pt') \ No newline at end of file