DL library

This commit is contained in:
Maciej Czajka 2022-04-21 00:36:14 +02:00
parent 7bd0c9e8ef
commit 3f29b90a69
5 changed files with 19098 additions and 1 deletions

18801
Biblioteka_DL/all_games.csv Normal file

File diff suppressed because one or more lines are too long

260
Biblioteka_DL/dllib.py Normal file
View File

@ -0,0 +1,260 @@
import numpy as np
import torch
import pandas as pd
from torch import nn
from torch.autograd import Variable
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import torch.nn.functional as F
from datetime import datetime
class Model(nn.Module):
def __init__(self, input_dim):
super(Model, self).__init__()
self.layer1 = nn.Linear(input_dim, 50)
self.layer2 = nn.Linear(50, 40)
self.layer3 = nn.Linear(40, 3)
def forward(self, x):
x = F.relu(self.layer1(x))
x = F.relu(self.layer2(x))
x = F.softmax(self.layer3(x)) # To check with the loss function
return x
# funkcja usuwająca wiersze zawierające platformę "Stadia"
def delete_stadia(games):
index_list = []
for i in range(0, len(games["platform"])):
try:
if games["platform"][i] == " Stadia":
index_list.append(i)
except:
continue
games.drop(index_list, inplace=True)
return games.reset_index()
# funkcja usuwająca wiersze zawierające "tbd" w kolumnie "user_review"
def delete_tbd(games):
index_list = []
for i in range(0, len(games["platform"])):
try:
if games["user_review"][i] == "tbd":
index_list.append(i)
except:
continue
games.drop(index_list, inplace=True)
return games.reset_index()
def delete_PC(games):
index_list = []
for i in range(0, len(games["platform"])):
try:
if games["platform"][i] == " PC":
index_list.append(i)
except:
continue
games.drop(index_list, inplace=True)
return games.reset_index()
# funkcja zmieniająca kolumnę "user_review" ze stringa na numeric
def user_review_to_numeric(games):
games["user_review"] = pd.to_numeric(games["user_review"])
return games
# funkcja normalizująca wartości w kolumnie "meta_score" i "user_review"
def normalization(games):
games['meta_score'] = games['meta_score'] / 100.0
games['user_review'] = games['user_review'] / 10.0
return games
# PlayStation - 0
# PlayStation 2 - 1
# PlayStation 3 - 2
# PlayStation 4 - 3
# PlayStation 5 - 4
# PlayStation Vita - 5
# Xbox - 6
# Xbox 360 - 7
# Xbox Series X - 8
# Nintendo 64 - 9
# GameCube - 10
# DS - 11
# 3DS - 12
# Wii - 13
# Wii U - 14
# Switch - 15
# PC - 16
# Dreamcast - 17
# Game Boy Advance - 18
# PSP - 19
# Xbox One - 20
# def platform_to_number(games):
# for i in range(0, len(games["platform"])):
#
# if games["platform"][i] == " PlayStation":
# games["platform"][i] = 0
# elif games["platform"][i] == " PlayStation 2":
# games["platform"][i] = 1
# elif games["platform"][i] == " PlayStation 3":
# games["platform"][i] = 2
# elif games["platform"][i] == " PlayStation 4":
# games["platform"][i] = 3
# elif games["platform"][i] == " PlayStation 5":
# games["platform"][i] = 4
# elif games["platform"][i] == " PlayStation Vita":
# games["platform"][i] = 5
# elif games["platform"][i] == " Xbox":
# games["platform"][i] = 6
# elif games["platform"][i] == " Xbox 360":
# games["platform"][i] = 7
# elif games["platform"][i] == " Xbox Series X":
# games["platform"][i] = 8
# elif games["platform"][i] == " Nintendo 64":
# games["platform"][i] = 9
# elif games["platform"][i] == " GameCube":
# games["platform"][i] = 10
# elif games["platform"][i] == " DS":
# games["platform"][i] = 11
# elif games["platform"][i] == " 3DS":
# games["platform"][i] = 12
# elif games["platform"][i] == " Wii":
# games["platform"][i] = 13
# elif games["platform"][i] == " Wii U":
# games["platform"][i] = 14
# elif games["platform"][i] == " Switch":
# games["platform"][i] = 15
# elif games["platform"][i] == " PC":
# games["platform"][i] = 16
# elif games["platform"][i] == " Dreamcast":
# games["platform"][i] = 17
# elif games["platform"][i] == " Game Boy Advance":
# games["platform"][i] = 18
# elif games["platform"][i] == " PSP":
# games["platform"][i] = 19
# elif games["platform"][i] == " Xbox One":
# games["platform"][i] = 20
#
# return games
# old - 0
# mid - 1
# new - 2
def platform_to_number(games):
for i in range(0, len(games["platform"])):
if games["platform"][i] == " PlayStation":
games["platform"][i] = 0
elif games["platform"][i] == " PlayStation 2":
games["platform"][i] = 0
elif games["platform"][i] == " PlayStation 3":
games["platform"][i] = 1
elif games["platform"][i] == " PlayStation 4":
games["platform"][i] = 2
elif games["platform"][i] == " PlayStation 5":
games["platform"][i] = 2
elif games["platform"][i] == " PlayStation Vita":
games["platform"][i] = 1
elif games["platform"][i] == " Xbox":
games["platform"][i] = 0
elif games["platform"][i] == " Xbox 360":
games["platform"][i] = 1
elif games["platform"][i] == " Xbox Series X":
games["platform"][i] = 2
elif games["platform"][i] == " Nintendo 64":
games["platform"][i] = 0
elif games["platform"][i] == " GameCube":
games["platform"][i] = 0
elif games["platform"][i] == " DS":
games["platform"][i] = 0
elif games["platform"][i] == " 3DS":
games["platform"][i] = 1
elif games["platform"][i] == " Wii":
games["platform"][i] = 0
elif games["platform"][i] == " Wii U":
games["platform"][i] = 1
elif games["platform"][i] == " Switch":
games["platform"][i] = 2
elif games["platform"][i] == " PC":
dt = datetime.strptime(games["release_date"][i], '%B %d, %Y')
if (dt.year == 1995 or dt.year == 1996 or dt.year == 1997 or dt.year == 1998
or dt.year == 1999 or dt.year == 2000 or dt.year == 2001 or dt.year == 2002
or dt.year == 2003 or dt.year == 2004 or dt.year == 2005):
games["platform"][i] = 0
if (dt.year == 2006 or dt.year == 2007 or dt.year == 2008 or dt.year == 2009
or dt.year == 2010 or dt.year == 2011 or dt.year == 2012 or dt.year == 2013
or dt.year == 2014 or dt.year == 2015 or dt.year == 2016):
games["platform"][i] = 1
if (dt.year == 2017 or dt.year == 2018 or dt.year == 2019
or dt.year == 2020 or dt.year == 2021):
games["platform"][i] = 2
# games["platform"][i] = 0
elif games["platform"][i] == " Dreamcast":
games["platform"][i] = 0
elif games["platform"][i] == " Game Boy Advance":
games["platform"][i] = 0
elif games["platform"][i] == " PSP":
games["platform"][i] = 1
elif games["platform"][i] == " Xbox One":
games["platform"][i] = 2
return games
games = pd.read_csv('/dane/all_games.csv', sep=',')
games = platform_to_number(games)
games = delete_stadia(games)
games = delete_tbd(games)
games = user_review_to_numeric(games)
games = normalization(games)
games.drop(['level_0', 'index'], axis='columns', inplace=True)
labels_g = pd.DataFrame(games["platform"], dtype=np.int64)
labels_g = labels_g.to_numpy()
features_g = {'meta_score': games['meta_score'],
'user_review': games['user_review']}
features_g = pd.DataFrame(features_g, dtype=np.float64)
features_g = features_g.to_numpy()
features_train_g, features_test_g, labels_train_g, labels_test_g = train_test_split(features_g,
labels_g,
random_state=1,
shuffle=True)
# Training
model = Model(features_train_g.shape[1])
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
loss_fn = nn.CrossEntropyLoss()
epochs = 1000
def print_(loss):
print ("The loss calculated: ", loss)
# Not using dataloader
x_train, y_train = Variable(torch.from_numpy(features_train_g)).float(), Variable(torch.from_numpy(labels_train_g)).long()
for epoch in range(1, epochs + 1):
print("Epoch #", epoch)
y_pred = model(x_train)
loss = loss_fn(y_pred, y_train.squeeze(-1))
print_(loss.item())
# Zero gradients
optimizer.zero_grad()
loss.backward() # Gradients
optimizer.step() # Update
# Prediction
x_test = Variable(torch.from_numpy(features_test_g)).float()
pred = model(x_test)
pred = pred.detach().numpy()
print("The accuracy is", accuracy_score(labels_test_g, np.argmax(pred, axis=1)))
pred = pd.DataFrame(pred)
pred.to_csv('result.csv')

View File

@ -8,6 +8,11 @@ RUN pip3 install kaggle
RUN pip3 install pandas RUN pip3 install pandas
RUN pip3 install matplotlib RUN pip3 install matplotlib
RUN pip3 install sklearn RUN pip3 install sklearn
RUN pip3 install numpy
RUN pip3 install wheel --no-deps -U
RUN pip3 install torch
#RUN pip3 install --ignore-installed --upgrade https://storage.googleapis.com/tensorflow/mac/cpu/#tensorflow-0.12.0-py3-none-any.whl
#RUN pip3 install keras
RUN apt-get install unzip RUN apt-get install unzip
ARG CUTOFF ARG CUTOFF
@ -23,6 +28,8 @@ WORKDIR /app
COPY download.sh . COPY download.sh .
COPY Zajecia_2/main.py . COPY Zajecia_2/main.py .
COPY Biblioteka_DL/dllib.py .
COPY Biblioteka_DL/all_games.csv .
RUN ./download.sh RUN ./download.sh
#CMD ["python3", "./main.py"] #CMD ["python3", "./dllib.py"]

28
Dockerfile_old Executable file
View File

@ -0,0 +1,28 @@
FROM ubuntu:latest
RUN apt-get update
RUN apt-get install -y python3-pip
RUN pip3 install --upgrade pip
RUN pip3 install --upgrade Pillow
RUN pip3 install kaggle
RUN pip3 install pandas
RUN pip3 install matplotlib
RUN pip3 install sklearn
RUN apt-get install unzip
ARG CUTOFF
ARG KAGGLE_USERNAME
ARG KAGGLE_KEY
ENV CUTOFF=${CUTOFF}
ENV KAGGLE_USERNAME=${KAGGLE_USERNAME}
ENV KAGGLE_KEY=${KAGGLE_KEY}
RUN mkdir /dane
WORKDIR /app
COPY download.sh .
COPY Zajecia_2/main.py .
RUN ./download.sh
#CMD ["python3", "./main.py"]

View File

@ -26,6 +26,7 @@ pipeline {
stage('Script'){ stage('Script'){
steps { steps {
sh 'python3 ./Zajecia_2/main.py' sh 'python3 ./Zajecia_2/main.py'
sh 'python3 ./Biblioteka_DL/dllib.py'
archiveArtifacts artifacts: 'all_games.csv.dev, all_games.csv.test, all_games.csv.train', followSymlinks: false archiveArtifacts artifacts: 'all_games.csv.dev, all_games.csv.test, all_games.csv.train', followSymlinks: false
} }
} }