From 62d35cee502bbd64dacbbab40d8244ab7ad705ce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20Wa=C5=82=C4=99sa?= Date: Fri, 6 May 2022 22:45:36 +0200 Subject: [PATCH] =?UTF-8?q?Prze=C5=9Blij=20pliki=20do=20''?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- ml_pytorch_results.ipynb | 351 +++++++++++++++++++++++++++++++++++++++ ml_pytorch_results.py | 208 +++++++++++++++++++++++ 2 files changed, 559 insertions(+) create mode 100644 ml_pytorch_results.ipynb create mode 100644 ml_pytorch_results.py diff --git a/ml_pytorch_results.ipynb b/ml_pytorch_results.ipynb new file mode 100644 index 0000000..1f09985 --- /dev/null +++ b/ml_pytorch_results.ipynb @@ -0,0 +1,351 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "98cddc6a-2ce1-4933-a2b7-96d2c2d197f4", + "metadata": {}, + "outputs": [ + { + "data": { + "application/javascript": [ + "if (window.IPython && IPython.notebook.kernel) IPython.notebook.kernel.execute('jovian.utils.jupyter.get_notebook_name_saved = lambda: \"' + IPython.notebook.notebook_name + '\"')" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import torch\n", + "import jovian\n", + "import torchvision\n", + "import matplotlib\n", + "import torch.nn as nn\n", + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "import torch.nn.functional as F\n", + "from torchvision.datasets.utils import download_url\n", + "from torch.utils.data import DataLoader, TensorDataset, random_split\n", + "import random\n", + "import os\n", + "import sys\n", + "from sklearn.metrics import mean_squared_error\n", + "from sklearn.metrics import mean_absolute_error" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "7bb63556-d009-4d9f-9de0-033a30ad3fc4", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(['matches', 'wins', 'draws', 'loses', 'scored', 'missed', 'pts'],\n", + " ['position'])" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#load data\n", + "dataframe = pd.read_csv(\"understat.csv\")\n", + "\n", + "#choose columns\n", + "input_cols=list(dataframe.columns)[4:11]\n", + "output_cols = ['position']\n", + "input_cols, output_cols" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "c8151c46-c234-42b7-a786-50c73e3aa2f5", + "metadata": {}, + "outputs": [], + "source": [ + "def dataframe_to_arrays(dataframe):\n", + " dataframe_loc = dataframe.copy(deep=True)\n", + " inputs_array = dataframe_loc[input_cols].to_numpy()\n", + " targets_array = dataframe_loc[output_cols].to_numpy()\n", + " return inputs_array, targets_array\n", + "\n", + "inputs_array, targets_array = dataframe_to_arrays(dataframe)\n", + "\n", + "inputs = torch.from_numpy(inputs_array).type(torch.float)\n", + "targets = torch.from_numpy(targets_array).type(torch.float)\n", + "\n", + "dataset = TensorDataset(inputs, targets)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "8c89947b-c2fe-407d-9588-3f0087df5955", + "metadata": {}, + "outputs": [], + "source": [ + "train_ds, val_ds = random_split(dataset, [548, 136])\n", + "batch_size=50\n", + "train_loader = DataLoader(train_ds, batch_size, shuffle=True)\n", + "val_loader = DataLoader(val_ds, batch_size)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "3b1426a0-5b15-46f8-aea9-871462ca9467", + "metadata": {}, + "outputs": [], + "source": [ + "class Model_xPosition(nn.Module):\n", + " def __init__(self):\n", + " super().__init__()\n", + " self.linear = nn.Linear(input_size,output_size) \n", + " \n", + " def forward(self, xb): \n", + " out = self.linear(xb)\n", + " return out\n", + " \n", + " def training_step(self, batch):\n", + " inputs, targets = batch \n", + " # Generate predictions\n", + " out = self(inputs) \n", + " # Calcuate loss\n", + " loss = F.l1_loss(out,targets) \n", + " return loss\n", + " \n", + " def validation_step(self, batch):\n", + " inputs, targets = batch\n", + " out = self(inputs)\n", + " loss = F.l1_loss(out,targets) \n", + " return {'val_loss': loss.detach()}\n", + " \n", + " def validation_epoch_end(self, outputs):\n", + " batch_losses = [x['val_loss'] for x in outputs]\n", + " epoch_loss = torch.stack(batch_losses).mean() \n", + " return {'val_loss': epoch_loss.item()}\n", + " \n", + " def epoch_end(self, epoch, result, num_epochs):\n", + " if (epoch+1) % 100 == 0 or epoch == num_epochs-1:\n", + " print(\"Epoch {} loss: {:.4f}\".format(epoch+1, result['val_loss']))\n", + " \n", + " \n", + "def evaluate(model, val_loader):\n", + " outputs = [model.validation_step(batch) for batch in val_loader]\n", + " return model.validation_epoch_end(outputs)\n", + "\n", + "def fit(epochs, lr, model, train_loader, val_loader, opt_func=torch.optim.SGD):\n", + " history = []\n", + " optimizer = opt_func(model.parameters(), lr)\n", + " for epoch in range(epochs):\n", + " for batch in train_loader:\n", + " loss = model.training_step(batch)\n", + " loss.backward()\n", + " optimizer.step()\n", + " optimizer.zero_grad()\n", + " result = evaluate(model, val_loader)\n", + " model.epoch_end(epoch, result, epochs)\n", + " history.append(result)\n", + " return history" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "f2e22e9a-8724-4084-b706-0be266846c05", + "metadata": {}, + "outputs": [], + "source": [ + "input_size = len(input_cols)\n", + "output_size = len(output_cols)\n", + "model=Model_xPosition()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "efacafe4-797a-4588-b0d8-2e4d883e639a", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 100 loss: 2.2152\n", + "Epoch 200 loss: 1.8737\n", + "Epoch 300 loss: 1.8362\n", + "Epoch 400 loss: 1.7904\n", + "Epoch 500 loss: 1.7507\n", + "Epoch 600 loss: 1.7174\n", + "Epoch 700 loss: 1.6977\n", + "Epoch 800 loss: 1.6847\n", + "Epoch 900 loss: 1.6743\n", + "Epoch 1000 loss: 1.6645\n" + ] + } + ], + "source": [ + "epochs = 1000\n", + "lr = 1e-5\n", + "learning_proccess = fit(epochs, lr, model, train_loader, val_loader)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "7007ab5a-dc79-4321-beed-cd54dd197858", + "metadata": {}, + "outputs": [], + "source": [ + "def predict_single(input, target, model):\n", + " inputs = input.unsqueeze(0)\n", + " predictions = model(inputs)\n", + " prediction = predictions[0].detach()\n", + "\n", + " return \"Target: \"+str(target)+\" Predicted: \"+str(prediction)+\"\\n\"" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "c946417a-693a-463a-b123-54348266ff6e", + "metadata": {}, + "outputs": [], + "source": [ + "def prediction(input, target, model):\n", + " inputs = input.unsqueeze(0)\n", + " predictions = model(inputs)\n", + " predicted = predictions[0].detach()\n", + " return predicted" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "50c62065-5094-4595-995c-6d0b71f1f28a", + "metadata": {}, + "outputs": [], + "source": [ + "with open(\"result.txt\", \"a+\") as file:\n", + " for i in range(0, len(val_ds), 1):\n", + " input_, target = val_ds[i]\n", + " file.write(str(predict_single(input_, target, model)))" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "fdd6468c-3d50-4131-b2d2-e3190d8b19e5", + "metadata": {}, + "outputs": [], + "source": [ + "expected = []\n", + "predicted = []\n", + "for i in range(0, len(val_ds), 1):\n", + " input_, target = val_ds[i]\n", + " expected.append(float(target))\n", + " predicted.append(float(prediction(input_, target, model)))\n", + "\n", + "MSE = mean_squared_error(expected, predicted)\n", + "MAE = mean_absolute_error(expected, predicted)\n", + "\n", + "with open(\"metrics.txt\", \"a+\") as file:\n", + " file.write(\"Mean squared error: MSE = \"+ str(MSE) + \"\\n\")\n", + " file.write(\"Mean absolute error: MAE = \"+ str(MAE)+ \"\\n\")\n", + "\n", + "with open(\"MSE.txt\", \"a+\") as file:\n", + " file.write(str(MSE) + \"\\n\")" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "4a1e5b16-7d80-47b0-8313-e44667687779", + "metadata": {}, + "outputs": [], + "source": [ + "with open('MSE.txt') as file:\n", + " y_MSE = [float(line) for line in file if line]\n", + " x_builds = list(range(1, len(y_MSE) + 1))" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "1da02242-846a-499c-92eb-4c80fe5f43c4", + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "\n", + "text/plain": [ + "
" + ] + }, + "metadata": { + "needs_background": "light" + }, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plt.xlabel('Number of builds')\n", + "plt.ylabel('MSE')\n", + "plt.plot(x_builds, y_MSE, label='Mean squared error')\n", + "plt.legend()\n", + "plt.show()\n", + "plt.savefig('RMSplot.png')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8dffe789-1ad5-44f1-8f21-92b9c89ed974", + "metadata": {}, + "outputs": [], + "source": [ + "!jupyter nbconvert --to script ml_pytorch.ipynb" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.7" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/ml_pytorch_results.py b/ml_pytorch_results.py new file mode 100644 index 0000000..d5db0ca --- /dev/null +++ b/ml_pytorch_results.py @@ -0,0 +1,208 @@ +#!/usr/bin/env python +# coding: utf-8 + +# In[1]: + + +import torch +import jovian +import torchvision +import matplotlib +import torch.nn as nn +import pandas as pd +import matplotlib.pyplot as plt +import seaborn as sns +import torch.nn.functional as F +from torchvision.datasets.utils import download_url +from torch.utils.data import DataLoader, TensorDataset, random_split +import random +import os +import sys +from sklearn.metrics import mean_squared_error +from sklearn.metrics import mean_absolute_error + + +# In[2]: + + +#load data +dataframe = pd.read_csv("understat.csv") + +#choose columns +input_cols=list(dataframe.columns)[4:11] +output_cols = ['position'] +input_cols, output_cols + + +# In[3]: + + +def dataframe_to_arrays(dataframe): + dataframe_loc = dataframe.copy(deep=True) + inputs_array = dataframe_loc[input_cols].to_numpy() + targets_array = dataframe_loc[output_cols].to_numpy() + return inputs_array, targets_array + +inputs_array, targets_array = dataframe_to_arrays(dataframe) + +inputs = torch.from_numpy(inputs_array).type(torch.float) +targets = torch.from_numpy(targets_array).type(torch.float) + +dataset = TensorDataset(inputs, targets) + + +# In[4]: + + +train_ds, val_ds = random_split(dataset, [548, 136]) +batch_size=50 +train_loader = DataLoader(train_ds, batch_size, shuffle=True) +val_loader = DataLoader(val_ds, batch_size) + + +# In[5]: + + +class Model_xPosition(nn.Module): + def __init__(self): + super().__init__() + self.linear = nn.Linear(input_size,output_size) + + def forward(self, xb): + out = self.linear(xb) + return out + + def training_step(self, batch): + inputs, targets = batch + # Generate predictions + out = self(inputs) + # Calcuate loss + loss = F.l1_loss(out,targets) + return loss + + def validation_step(self, batch): + inputs, targets = batch + out = self(inputs) + loss = F.l1_loss(out,targets) + return {'val_loss': loss.detach()} + + def validation_epoch_end(self, outputs): + batch_losses = [x['val_loss'] for x in outputs] + epoch_loss = torch.stack(batch_losses).mean() + return {'val_loss': epoch_loss.item()} + + def epoch_end(self, epoch, result, num_epochs): + if (epoch+1) % 100 == 0 or epoch == num_epochs-1: + print("Epoch {} loss: {:.4f}".format(epoch+1, result['val_loss'])) + + +def evaluate(model, val_loader): + outputs = [model.validation_step(batch) for batch in val_loader] + return model.validation_epoch_end(outputs) + +def fit(epochs, lr, model, train_loader, val_loader, opt_func=torch.optim.SGD): + history = [] + optimizer = opt_func(model.parameters(), lr) + for epoch in range(epochs): + for batch in train_loader: + loss = model.training_step(batch) + loss.backward() + optimizer.step() + optimizer.zero_grad() + result = evaluate(model, val_loader) + model.epoch_end(epoch, result, epochs) + history.append(result) + return history + + +# In[6]: + + +input_size = len(input_cols) +output_size = len(output_cols) +model=Model_xPosition() + + +# In[7]: + + +epochs = 1000 +lr = 1e-5 +learning_proccess = fit(epochs, lr, model, train_loader, val_loader) + + +# In[8]: + + +def predict_single(input, target, model): + inputs = input.unsqueeze(0) + predictions = model(inputs) + prediction = predictions[0].detach() + + return "Target: "+str(target)+" Predicted: "+str(prediction)+"\n" + + +# In[9]: + + +def prediction(input, target, model): + inputs = input.unsqueeze(0) + predictions = model(inputs) + predicted = predictions[0].detach() + return predicted + + +# In[10]: + + +with open("result.txt", "a+") as file: + for i in range(0, len(val_ds), 1): + input_, target = val_ds[i] + file.write(str(predict_single(input_, target, model))) + + +# In[11]: + + +expected = [] +predicted = [] +for i in range(0, len(val_ds), 1): + input_, target = val_ds[i] + expected.append(float(target)) + predicted.append(float(prediction(input_, target, model))) + +MSE = mean_squared_error(expected, predicted) +MAE = mean_absolute_error(expected, predicted) + +with open("metrics.txt", "a+") as file: + file.write("Mean squared error: MSE = "+ str(MSE) + "\n") + file.write("Mean absolute error: MAE = "+ str(MAE)+ "\n") + +with open("MSE.txt", "a+") as file: + file.write(str(MSE) + "\n") + + +# In[12]: + + +with open('MSE.txt') as file: + y_MSE = [float(line) for line in file if line] + x_builds = list(range(1, len(y_MSE) + 1)) + + +# In[13]: + + +plt.xlabel('Number of builds') +plt.ylabel('MSE') +plt.plot(x_builds, y_MSE, label='Mean squared error') +plt.legend() +plt.show() +plt.savefig('RMSplot.png') + + +# In[ ]: + + +# get_ipython().system('jupyter nbconvert --to script ml_pytorch.ipynb') +