ium_444501/biblioteki_ml.py

103 lines
2.8 KiB
Python
Raw Normal View History

2022-05-01 14:59:44 +02:00
import sys
2022-04-24 02:37:51 +02:00
import torch
import torch.nn as nn
import torch.nn.functional as F
from sklearn.preprocessing import LabelEncoder
import pandas as pd
2022-05-01 14:59:44 +02:00
# Parametry z konsoli
try:
2022-05-01 15:00:54 +02:00
epochs = int(sys.argv[1])
2022-05-01 14:59:44 +02:00
except:
print('No epoch number passed. Defaulting to 100')
epochs = 100
2022-04-24 02:37:51 +02:00
# Model
class Model(nn.Module):
def __init__(self, input_features=2, hidden_layer1=60, hidden_layer2=90, output_features=3):
super().__init__()
self.fc1 = nn.Linear(input_features, hidden_layer1)
self.fc2 = nn.Linear(hidden_layer1, hidden_layer2)
self.out = nn.Linear(hidden_layer2, output_features)
def forward(self, x):
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.out(x)
return x
# Ładowanie danych
train_set = pd.read_csv('d_train.csv', encoding='latin-1')
train_set = train_set[['Rating', 'Branch', 'Reviewer_Location']]
test_set = pd.read_csv('d_test.csv', encoding='latin-1')
test_set = test_set[['Rating', 'Branch', 'Reviewer_Location']]
# Mapowanie kolumny 'Reviewer_Location' na cyfry
le = LabelEncoder()
le.fit(pd.concat([train_set['Reviewer_Location'], test_set['Reviewer_Location']]))
train_set['Reviewer_Location'] = le.transform(train_set['Reviewer_Location'])
test_set['Reviewer_Location'] = le.transform(test_set['Reviewer_Location'])
# Mapowanie kolumny 'Branch' na inny sposób
mappings = {
'Disneyland_California': 0,
'Disneyland_Paris': 1,
'Disneyland_HongKong': 2
}
train_set['Branch'] = train_set['Branch'].apply(lambda x: mappings[x])
test_set['Branch'] = test_set['Branch'].apply(lambda x: mappings[x])
# Zamiana danych na tensory
X_train = train_set[['Rating', 'Reviewer_Location']].to_numpy()
X_test = test_set[['Rating', 'Reviewer_Location']].to_numpy()
y_train = train_set['Branch'].to_numpy()
y_test = test_set['Branch'].to_numpy()
X_train = torch.FloatTensor(X_train)
X_test = torch.FloatTensor(X_test)
y_train = torch.LongTensor(y_train)
y_test = torch.LongTensor(y_test)
# Hiperparametry
model = Model()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
# Trening
losses = []
for i in range(epochs):
y_pred = model.forward(X_train)
loss = criterion(y_pred, y_train)
losses.append(loss)
print(f'epoch: {i:2} loss: {loss.item():10.8f}')
optimizer.zero_grad()
loss.backward()
optimizer.step()
# Testy
preds = []
with torch.no_grad():
for val in X_test:
y_hat = model.forward(val)
preds.append(y_hat.argmax().item())
df = pd.DataFrame({'Testing Y': y_test, 'Predicted Y': preds})
df['Correct'] = [1 if corr == pred else 0 for corr, pred in zip(df['Testing Y'], df['Predicted Y'])]
print(f"{df['Correct'].sum() / len(df)} percent of predictions correct")
# Zapis do pliku
df.to_csv('neural_network_prediction_results.csv', index=False)
2022-04-25 03:53:55 +02:00
torch.save(model, "model.pkl")