import sys from urllib.parse import urlparse import numpy as np import mlflow import torch import torch.nn as nn import torch.nn.functional as F from sklearn.preprocessing import LabelEncoder import pandas as pd # MLFlow mlflow.set_experiment("s444501") #mlflow.set_tracking_uri("http://172.17.0.1:5000") # Parametry z konsoli try: epochs = int(sys.argv[1]) except: print('No epoch number passed. Defaulting to 100') epochs = 100 # Model class Model(nn.Module): def __init__(self, input_features=2, hidden_layer1=60, hidden_layer2=90, output_features=3): super().__init__() self.fc1 = nn.Linear(input_features, hidden_layer1) self.fc2 = nn.Linear(hidden_layer1, hidden_layer2) self.out = nn.Linear(hidden_layer2, output_features) def forward(self, x): x = F.relu(self.fc1(x)) x = F.relu(self.fc2(x)) x = self.out(x) return x def train_main(epochs, run): # Ładowanie danych train_set = pd.read_csv('d_train.csv', encoding='latin-1') train_set = train_set[['Rating', 'Branch', 'Reviewer_Location']] test_set = pd.read_csv('d_test.csv', encoding='latin-1') test_set = test_set[['Rating', 'Branch', 'Reviewer_Location']] # Mapowanie kolumny 'Reviewer_Location' na cyfry le = LabelEncoder() le.fit(pd.concat([train_set['Reviewer_Location'], test_set['Reviewer_Location']])) train_set['Reviewer_Location'] = le.transform(train_set['Reviewer_Location']) test_set['Reviewer_Location'] = le.transform(test_set['Reviewer_Location']) # Mapowanie kolumny 'Branch' na inny sposób mappings = { 'Disneyland_California': 0, 'Disneyland_Paris': 1, 'Disneyland_HongKong': 2 } train_set['Branch'] = train_set['Branch'].apply(lambda x: mappings[x]) test_set['Branch'] = test_set['Branch'].apply(lambda x: mappings[x]) # Zamiana danych na tensory X_train = train_set[['Rating', 'Reviewer_Location']].to_numpy() X_test = test_set[['Rating', 'Reviewer_Location']].to_numpy() y_train = train_set['Branch'].to_numpy() y_test = test_set['Branch'].to_numpy() X_train = torch.FloatTensor(X_train) X_test = torch.FloatTensor(X_test) y_train = torch.LongTensor(y_train) y_test = torch.LongTensor(y_test) # Hiperparametry model = Model() criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=0.01) # Trening losses = [] for i in range(epochs): y_pred = model.forward(X_train) loss = criterion(y_pred, y_train) losses.append(loss) print(f'epoch: {i:2} loss: {loss.item():10.8f}') optimizer.zero_grad() loss.backward() optimizer.step() # Testy preds = [] with torch.no_grad(): for val in X_test: y_hat = model.forward(val) preds.append(y_hat.argmax().item()) df = pd.DataFrame({'Testing Y': y_test, 'Predicted Y': preds}) df['Correct'] = [1 if corr == pred else 0 for corr, pred in zip(df['Testing Y'], df['Predicted Y'])] correct = df['Correct'].sum() / len(df) print(f"{correct} percent of predictions correct") # Logi mlflow.log_param("epochs", epochs) mlflow.log_metric("final_loss", losses[-1].item()) mlflow.log_metric("accuracy", correct) signature = mlflow.models.signature.infer_signature(X_train.numpy(), np.array(preds)) tracking_url_type_store = urlparse(mlflow.get_tracking_uri()).scheme if tracking_url_type_store != "file": mlflow.pytorch.log_model(model, 's444501', registered_model_name='s444501', signature=signature, input_example=X_test.numpy()) else: mlflow.pytorch.log_model(model, 's444501', signature=signature, input_example=X_test.numpy()) # Zapis do pliku df.to_csv('neural_network_prediction_results.csv', index=False) torch.save(model, "model.pkl") with mlflow.start_run() as run: print(f"MLflow run experiment_id: {run.info.experiment_id}") print(f"MLflow run artifact_uri: {run.info.artifact_uri}") train_main(epochs, run)