This commit is contained in:
s444501 2022-04-24 02:37:51 +02:00
parent 00de25502f
commit 84cd3d6fa9
2 changed files with 95 additions and 0 deletions

View File

@ -8,11 +8,13 @@ RUN pip3 install pandas
RUN pip3 install matplotlib RUN pip3 install matplotlib
RUN pip3 install sklearn RUN pip3 install sklearn
RUN pip3 install kaggle RUN pip3 install kaggle
RUN pip3 install torch
WORKDIR /ium WORKDIR /ium
COPY ./ium-data.py ./ COPY ./ium-data.py ./
COPY ./download.sh ./ COPY ./download.sh ./
COPY ./biblioteki_ml.py ./
ARG KAGGLE_KEY ARG KAGGLE_KEY
ARG KAGGLE_USERNAME ARG KAGGLE_USERNAME

93
biblioteki_ml.py Normal file
View File

@ -0,0 +1,93 @@
import torch
import torch.nn as nn
import torch.nn.functional as F
from sklearn.preprocessing import LabelEncoder
import pandas as pd
# Model
class Model(nn.Module):
def __init__(self, input_features=2, hidden_layer1=60, hidden_layer2=90, output_features=3):
super().__init__()
self.fc1 = nn.Linear(input_features, hidden_layer1)
self.fc2 = nn.Linear(hidden_layer1, hidden_layer2)
self.out = nn.Linear(hidden_layer2, output_features)
def forward(self, x):
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.out(x)
return x
# Ładowanie danych
train_set = pd.read_csv('d_train.csv', encoding='latin-1')
train_set = train_set[['Rating', 'Branch', 'Reviewer_Location']]
test_set = pd.read_csv('d_test.csv', encoding='latin-1')
test_set = test_set[['Rating', 'Branch', 'Reviewer_Location']]
# Mapowanie kolumny 'Reviewer_Location' na cyfry
le = LabelEncoder()
le.fit(pd.concat([train_set['Reviewer_Location'], test_set['Reviewer_Location']]))
train_set['Reviewer_Location'] = le.transform(train_set['Reviewer_Location'])
test_set['Reviewer_Location'] = le.transform(test_set['Reviewer_Location'])
# Mapowanie kolumny 'Branch' na inny sposób
mappings = {
'Disneyland_California': 0,
'Disneyland_Paris': 1,
'Disneyland_HongKong': 2
}
train_set['Branch'] = train_set['Branch'].apply(lambda x: mappings[x])
test_set['Branch'] = test_set['Branch'].apply(lambda x: mappings[x])
# Zamiana danych na tensory
X_train = train_set[['Rating', 'Reviewer_Location']].to_numpy()
X_test = test_set[['Rating', 'Reviewer_Location']].to_numpy()
y_train = train_set['Branch'].to_numpy()
y_test = test_set['Branch'].to_numpy()
X_train = torch.FloatTensor(X_train)
X_test = torch.FloatTensor(X_test)
y_train = torch.LongTensor(y_train)
y_test = torch.LongTensor(y_test)
# Hiperparametry
model = Model()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
# Trening
epochs = 100
losses = []
for i in range(epochs):
y_pred = model.forward(X_train)
loss = criterion(y_pred, y_train)
losses.append(loss)
print(f'epoch: {i:2} loss: {loss.item():10.8f}')
optimizer.zero_grad()
loss.backward()
optimizer.step()
# Testy
preds = []
with torch.no_grad():
for val in X_test:
y_hat = model.forward(val)
preds.append(y_hat.argmax().item())
df = pd.DataFrame({'Testing Y': y_test, 'Predicted Y': preds})
df['Correct'] = [1 if corr == pred else 0 for corr, pred in zip(df['Testing Y'], df['Predicted Y'])]
print(f"{df['Correct'].sum() / len(df)} percent of predictions correct")
# Zapis do pliku
df.to_csv('neural_network_prediction_results.csv', index=False)