pytorch

2022-04-24 02:37:51 +02:00 · 2022-04-24 02:37:51 +02:00 · 84cd3d6fa9
commit 84cd3d6fa9
parent 00de25502f
2 changed files with 95 additions and 0 deletions
--- a/2
+++ b/2
@ -8,11 +8,13 @@ RUN pip3 install pandas
 RUN pip3 install matplotlib
 RUN pip3 install sklearn
 RUN pip3 install kaggle
+RUN pip3 install torch

 WORKDIR /ium

 COPY ./ium-data.py ./
 COPY ./download.sh ./
+COPY ./biblioteki_ml.py ./

 ARG KAGGLE_KEY
 ARG KAGGLE_USERNAME
--- a/biblioteki_ml.py
+++ b/biblioteki_ml.py
@ -0,0 +1,93 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from sklearn.preprocessing import LabelEncoder
+import pandas as pd
+
+
+# Model
+class Model(nn.Module):
+    def __init__(self, input_features=2, hidden_layer1=60, hidden_layer2=90, output_features=3):
+        super().__init__()
+        self.fc1 = nn.Linear(input_features, hidden_layer1)
+        self.fc2 = nn.Linear(hidden_layer1, hidden_layer2)
+        self.out = nn.Linear(hidden_layer2, output_features)
+
+    def forward(self, x):
+        x = F.relu(self.fc1(x))
+        x = F.relu(self.fc2(x))
+        x = self.out(x)
+        return x
+
+
+# Ładowanie danych
+train_set = pd.read_csv('d_train.csv', encoding='latin-1')
+train_set = train_set[['Rating', 'Branch', 'Reviewer_Location']]
+
+test_set = pd.read_csv('d_test.csv', encoding='latin-1')
+test_set = test_set[['Rating', 'Branch', 'Reviewer_Location']]
+
+
+# Mapowanie kolumny 'Reviewer_Location' na cyfry
+le = LabelEncoder()
+le.fit(pd.concat([train_set['Reviewer_Location'], test_set['Reviewer_Location']]))
+train_set['Reviewer_Location'] = le.transform(train_set['Reviewer_Location'])
+test_set['Reviewer_Location'] = le.transform(test_set['Reviewer_Location'])
+
+
+# Mapowanie kolumny 'Branch' na inny sposób
+mappings = {
+    'Disneyland_California': 0,
+    'Disneyland_Paris': 1,
+    'Disneyland_HongKong': 2
+}
+train_set['Branch'] = train_set['Branch'].apply(lambda x: mappings[x])
+test_set['Branch'] = test_set['Branch'].apply(lambda x: mappings[x])
+
+
+# Zamiana danych na tensory
+X_train = train_set[['Rating', 'Reviewer_Location']].to_numpy()
+X_test = test_set[['Rating', 'Reviewer_Location']].to_numpy()
+y_train = train_set['Branch'].to_numpy()
+y_test = test_set['Branch'].to_numpy()
+
+X_train = torch.FloatTensor(X_train)
+X_test = torch.FloatTensor(X_test)
+y_train = torch.LongTensor(y_train)
+y_test = torch.LongTensor(y_test)
+
+
+# Hiperparametry
+model = Model()
+criterion = nn.CrossEntropyLoss()
+optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
+
+
+# Trening
+epochs = 100
+losses = []
+for i in range(epochs):
+    y_pred = model.forward(X_train)
+    loss = criterion(y_pred, y_train)
+    losses.append(loss)
+    print(f'epoch: {i:2}  loss: {loss.item():10.8f}')
+
+    optimizer.zero_grad()
+    loss.backward()
+    optimizer.step()
+
+
+# Testy
+preds = []
+with torch.no_grad():
+    for val in X_test:
+        y_hat = model.forward(val)
+        preds.append(y_hat.argmax().item())
+
+df = pd.DataFrame({'Testing Y': y_test, 'Predicted Y': preds})
+df['Correct'] = [1 if corr == pred else 0 for corr, pred in zip(df['Testing Y'], df['Predicted Y'])]
+print(f"{df['Correct'].sum() / len(df)} percent of predictions correct")
+
+
+# Zapis do pliku
+df.to_csv('neural_network_prediction_results.csv', index=False)