lab5 nn learning and testing scripts

This commit is contained in:
wikbom 2023-05-10 13:10:44 +02:00
parent c3729f90ae
commit fea7c768b2
2 changed files with 113 additions and 0 deletions

43
eval.py Executable file
View File

@ -0,0 +1,43 @@
#! /usr/bin/python3
import numpy as np
import torch
from torch import nn
import pandas as pd
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
import torch.nn.functional as F
class Model(nn.Module):
def __init__(self, input_dim):
super(Model, self).__init__()
self.layer1 = nn.Linear(input_dim, 50)
self.layer2 = nn.Linear(50, 20)
self.layer3 = nn.Linear(20, 2)
def forward(self, x):
x = F.relu(self.layer1(x))
x = F.relu(self.layer2(x))
x = F.softmax(self.layer3(x))
return x
test_df = pd.read_csv('testing_data.csv')
X = test_df[['Pclass', 'Sex', 'Age','SibSp', 'Fare']]
Y = test_df[['Survived']]
Y = np.ravel(Y)
encoder = LabelEncoder()
encoder.fit(Y)
Y = encoder.transform(Y)
model = Model(X.shape[1])
model.load_state_dict(torch.load('model.pt'))
x_test = torch.tensor(X.values, dtype=torch.float32)
pred = model(x_test)
pred = pred.detach().numpy()
print ("The accuracy is", accuracy_score(Y, np.argmax(pred, axis=1)))
np.savetxt('prediction.tsv', pred, delimiter='\t')

70
learning.py Executable file
View File

@ -0,0 +1,70 @@
#!/usr/bin/python3
import numpy as np
import torch
from torch import nn
import pandas as pd
import subprocess
from sklearn.model_selection import train_test_split
import torch.nn.functional as F
from sklearn.preprocessing import LabelEncoder
class Model(nn.Module):
def __init__(self, input_dim):
super(Model, self).__init__()
self.layer1 = nn.Linear(input_dim, 50)
self.layer2 = nn.Linear(50, 20)
self.layer3 = nn.Linear(20, 2)
def forward(self, x):
x = F.relu(self.layer1(x))
x = F.relu(self.layer2(x))
x = F.softmax(self.layer3(x))
return x
def print_(loss):
print ("The loss calculated: ", loss)
if __name__ == "__main__":
df = pd.read_csv("train.csv")
df = df.dropna() #drop NA values
columns_to_normalize=['Age','Fare'] #NORMALIZATION
for colname in columns_to_normalize:
df[colname]=(df[colname]-df[colname].min())/(df[colname].max()-df[colname].min())
X = df[['Pclass', 'Sex', 'Age','SibSp', 'Fare']] #only reasonable numerical data
Y = df[['Survived']]
X.loc[:,('Sex')].replace(['female', 'male'], [0,1], inplace=True) #categorical data transformed to
X_train, X_test, Y_train, Y_test = train_test_split(X,Y, random_state=45, test_size=0.2, shuffle=True) #split the date into train and test sets
testing_data = pd.concat([X_test, Y_test], axis=1)
testing_data.to_csv('testing_data.csv', sep=',')
Y_train = np.ravel(Y_train)
encoder = LabelEncoder()
encoder.fit(Y_train)
Y_train = encoder.transform(Y_train)
Xt = torch.tensor(X_train.values, dtype = torch.float32)
Yt = torch.tensor(Y_train, dtype=torch.long)
model = Model(Xt.shape[1])
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
loss_fn = nn.CrossEntropyLoss()
epochs = 1000
#TRAINING LOOP
for epoch in range(1, epochs+1):
print("Epoch #", epoch)
y_pred = model(Xt)
loss = loss_fn(y_pred, Yt)
print_(loss.item())
optimizer.zero_grad()
loss.backward()
optimizer.step()
torch.save(model.state_dict(), 'model.pt')