139 lines
4.3 KiB
Python
139 lines
4.3 KiB
Python
import sys
|
|
from urllib.parse import urlparse
|
|
import numpy as np
|
|
import mlflow
|
|
import torch
|
|
import torch.nn as nn
|
|
import torch.nn.functional as F
|
|
from mlflow.experiments import restore_experiment
|
|
from sklearn.preprocessing import LabelEncoder
|
|
import pandas as pd
|
|
|
|
# MLFlow
|
|
mlflow.set_tracking_uri("http://172.17.0.1:5000")
|
|
mlflow.set_experiment("444501")
|
|
|
|
|
|
# Parametry z konsoli
|
|
try:
|
|
epochs = int(sys.argv[1])
|
|
except:
|
|
print('No epoch number passed. Defaulting to 100')
|
|
epochs = 100
|
|
|
|
|
|
# Model
|
|
class Model(nn.Module):
|
|
def __init__(self, input_features=2, hidden_layer1=60, hidden_layer2=90, output_features=3):
|
|
super().__init__()
|
|
self.fc1 = nn.Linear(input_features, hidden_layer1)
|
|
self.fc2 = nn.Linear(hidden_layer1, hidden_layer2)
|
|
self.out = nn.Linear(hidden_layer2, output_features)
|
|
|
|
def forward(self, x):
|
|
x = F.relu(self.fc1(x))
|
|
x = F.relu(self.fc2(x))
|
|
x = self.out(x)
|
|
return x
|
|
|
|
|
|
def train_main(epochs, run):
|
|
# Ładowanie danych
|
|
train_set = pd.read_csv('d_train.csv', encoding='latin-1')
|
|
train_set = train_set[['Rating', 'Branch', 'Reviewer_Location']]
|
|
|
|
test_set = pd.read_csv('d_test.csv', encoding='latin-1')
|
|
test_set = test_set[['Rating', 'Branch', 'Reviewer_Location']]
|
|
|
|
|
|
# Mapowanie kolumny 'Reviewer_Location' na cyfry
|
|
le = LabelEncoder()
|
|
le.fit(pd.concat([train_set['Reviewer_Location'], test_set['Reviewer_Location']]))
|
|
train_set['Reviewer_Location'] = le.transform(train_set['Reviewer_Location'])
|
|
test_set['Reviewer_Location'] = le.transform(test_set['Reviewer_Location'])
|
|
|
|
|
|
# Mapowanie kolumny 'Branch' na inny sposób
|
|
mappings = {
|
|
'Disneyland_California': 0,
|
|
'Disneyland_Paris': 1,
|
|
'Disneyland_HongKong': 2
|
|
}
|
|
train_set['Branch'] = train_set['Branch'].apply(lambda x: mappings[x])
|
|
test_set['Branch'] = test_set['Branch'].apply(lambda x: mappings[x])
|
|
|
|
|
|
# Zamiana danych na tensory
|
|
X_train = train_set[['Rating', 'Reviewer_Location']].to_numpy()
|
|
X_test = test_set[['Rating', 'Reviewer_Location']].to_numpy()
|
|
y_train = train_set['Branch'].to_numpy()
|
|
y_test = test_set['Branch'].to_numpy()
|
|
|
|
X_train = torch.FloatTensor(X_train)
|
|
X_test = torch.FloatTensor(X_test)
|
|
y_train = torch.LongTensor(y_train)
|
|
y_test = torch.LongTensor(y_test)
|
|
|
|
|
|
# Hiperparametry
|
|
model = Model()
|
|
criterion = nn.CrossEntropyLoss()
|
|
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
|
|
|
|
|
|
# Trening
|
|
losses = []
|
|
for i in range(epochs):
|
|
y_pred = model.forward(X_train)
|
|
loss = criterion(y_pred, y_train)
|
|
losses.append(loss)
|
|
print(f'epoch: {i:2} loss: {loss.item():10.8f}')
|
|
|
|
optimizer.zero_grad()
|
|
loss.backward()
|
|
optimizer.step()
|
|
|
|
|
|
# Testy
|
|
preds = []
|
|
with torch.no_grad():
|
|
for val in X_test:
|
|
y_hat = model.forward(val)
|
|
preds.append(y_hat.argmax().item())
|
|
|
|
df = pd.DataFrame({'Testing Y': y_test, 'Predicted Y': preds})
|
|
df['Correct'] = [1 if corr == pred else 0 for corr, pred in zip(df['Testing Y'], df['Predicted Y'])]
|
|
correct = df['Correct'].sum() / len(df)
|
|
print(f"{correct} percent of predictions correct")
|
|
|
|
|
|
# Logi
|
|
mlflow.log_param("epochs", epochs)
|
|
mlflow.log_metric("final_loss", losses[-1].item())
|
|
mlflow.log_metric("accuracy", correct)
|
|
|
|
signature = mlflow.models.signature.infer_signature(X_train.numpy(), np.array(preds))
|
|
tracking_url_type_store = urlparse(mlflow.get_tracking_uri()).scheme
|
|
|
|
if tracking_url_type_store != "file":
|
|
mlflow.pytorch.log_model(model,
|
|
'444501',
|
|
registered_model_name='444501',
|
|
signature=signature,
|
|
input_example=X_test.numpy())
|
|
else:
|
|
mlflow.pytorch.log_model(model,
|
|
'444501',
|
|
signature=signature,
|
|
input_example=X_test.numpy())
|
|
|
|
# Zapis do pliku
|
|
df.to_csv('neural_network_prediction_results.csv', index=False)
|
|
torch.save(model, "model.pkl")
|
|
|
|
|
|
with mlflow.start_run() as run:
|
|
print(f"MLflow run experiment_id: {run.info.experiment_id}")
|
|
print(f"MLflow run artifact_uri: {run.info.artifact_uri}")
|
|
train_main(epochs, run)
|