diff --git a/ml_pytorch.ipynb b/ml_pytorch.ipynb new file mode 100644 index 0000000..943859e --- /dev/null +++ b/ml_pytorch.ipynb @@ -0,0 +1,309 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "98cddc6a-2ce1-4933-a2b7-96d2c2d197f4", + "metadata": {}, + "outputs": [ + { + "data": { + "application/javascript": [ + "if (window.IPython && IPython.notebook.kernel) IPython.notebook.kernel.execute('jovian.utils.jupyter.get_notebook_name_saved = lambda: \"' + IPython.notebook.notebook_name + '\"')" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import torch\n", + "import jovian\n", + "import torchvision\n", + "import matplotlib\n", + "import torch.nn as nn\n", + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "import torch.nn.functional as F\n", + "from torchvision.datasets.utils import download_url\n", + "from torch.utils.data import DataLoader, TensorDataset, random_split\n", + "import random\n", + "import os\n", + "import sys" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "7bb63556-d009-4d9f-9de0-033a30ad3fc4", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(['matches', 'wins', 'draws', 'loses', 'scored', 'missed', 'pts'],\n", + " ['position'])" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "#load data\n", + "dataframe = pd.read_csv(\"understat.csv\")\n", + "\n", + "#choose columns\n", + "input_cols=list(dataframe.columns)[4:11]\n", + "output_cols = ['position']\n", + "input_cols, output_cols" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "c8151c46-c234-42b7-a786-50c73e3aa2f5", + "metadata": {}, + "outputs": [], + "source": [ + "def dataframe_to_arrays(dataframe):\n", + " dataframe_loc = dataframe.copy(deep=True)\n", + " inputs_array = dataframe_loc[input_cols].to_numpy()\n", + " targets_array = dataframe_loc[output_cols].to_numpy()\n", + " return inputs_array, targets_array\n", + "\n", + "inputs_array, targets_array = dataframe_to_arrays(dataframe)\n", + "\n", + "inputs = torch.from_numpy(inputs_array).type(torch.float)\n", + "targets = torch.from_numpy(targets_array).type(torch.float)\n", + "\n", + "dataset = TensorDataset(inputs, targets)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "8c89947b-c2fe-407d-9588-3f0087df5955", + "metadata": {}, + "outputs": [], + "source": [ + "train_ds, val_ds = random_split(dataset, [548, 136])\n", + "batch_size=50\n", + "train_loader = DataLoader(train_ds, batch_size, shuffle=True)\n", + "val_loader = DataLoader(val_ds, batch_size)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "3b1426a0-5b15-46f8-aea9-871462ca9467", + "metadata": {}, + "outputs": [], + "source": [ + "class Model_xPosition(nn.Module):\n", + " def __init__(self):\n", + " super().__init__()\n", + " self.linear = nn.Linear(input_size,output_size) \n", + " \n", + " def forward(self, xb): \n", + " out = self.linear(xb)\n", + " return out\n", + " \n", + " def training_step(self, batch):\n", + " inputs, targets = batch \n", + " # Generate predictions\n", + " out = self(inputs) \n", + " # Calcuate loss\n", + " loss = F.l1_loss(out,targets) \n", + " return loss\n", + " \n", + " def validation_step(self, batch):\n", + " inputs, targets = batch\n", + " out = self(inputs)\n", + " loss = F.l1_loss(out,targets) \n", + " return {'val_loss': loss.detach()}\n", + " \n", + " def validation_epoch_end(self, outputs):\n", + " batch_losses = [x['val_loss'] for x in outputs]\n", + " epoch_loss = torch.stack(batch_losses).mean() \n", + " return {'val_loss': epoch_loss.item()}\n", + " \n", + " def epoch_end(self, epoch, result, num_epochs):\n", + " if (epoch+1) % 100 == 0 or epoch == num_epochs-1:\n", + " print(\"Epoch {} loss: {:.4f}\".format(epoch+1, result['val_loss']))\n", + " \n", + " \n", + "def evaluate(model, val_loader):\n", + " outputs = [model.validation_step(batch) for batch in val_loader]\n", + " return model.validation_epoch_end(outputs)\n", + "\n", + "def fit(epochs, lr, model, train_loader, val_loader, opt_func=torch.optim.SGD):\n", + " history = []\n", + " optimizer = opt_func(model.parameters(), lr)\n", + " for epoch in range(epochs):\n", + " for batch in train_loader:\n", + " loss = model.training_step(batch)\n", + " loss.backward()\n", + " optimizer.step()\n", + " optimizer.zero_grad()\n", + " result = evaluate(model, val_loader)\n", + " model.epoch_end(epoch, result, epochs)\n", + " history.append(result)\n", + " return history" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "f2e22e9a-8724-4084-b706-0be266846c05", + "metadata": {}, + "outputs": [], + "source": [ + "input_size = len(input_cols)\n", + "output_size = len(output_cols)\n", + "model=Model_xPosition()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "efacafe4-797a-4588-b0d8-2e4d883e639a", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epoch 100 loss: 6.2637\n", + "Epoch 200 loss: 2.9712\n", + "Epoch 300 loss: 1.9724\n", + "Epoch 400 loss: 1.9376\n", + "Epoch 500 loss: 1.9199\n", + "Epoch 600 loss: 1.9033\n", + "Epoch 700 loss: 1.8863\n", + "Epoch 800 loss: 1.8703\n", + "Epoch 900 loss: 1.8552\n", + "Epoch 1000 loss: 1.8405\n", + "Epoch 1100 loss: 1.8267\n", + "Epoch 1200 loss: 1.8134\n", + "Epoch 1300 loss: 1.8010\n", + "Epoch 1400 loss: 1.7876\n", + "Epoch 1500 loss: 1.7748\n", + "Epoch 1600 loss: 1.7626\n", + "Epoch 1700 loss: 1.7497\n", + "Epoch 1800 loss: 1.7387\n", + "Epoch 1900 loss: 1.7270\n", + "Epoch 2000 loss: 1.7162\n" + ] + } + ], + "source": [ + "epochs = 2000\n", + "lr = 1e-5\n", + "learning_proccess = fit(epochs, lr, model, train_loader, val_loader)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "7007ab5a-dc79-4321-beed-cd54dd197858", + "metadata": {}, + "outputs": [], + "source": [ + "def predict_single(input, target, model):\n", + " inputs = input.unsqueeze(0)\n", + " predictions = model(inputs)\n", + " prediction = predictions[0].detach()\n", + "\n", + " return \"Target: \"+str(target)+\" Predicted: \"+str(prediction)+\"\\n\"" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "1e6ed168-2cdc-45dc-a0ff-e147ac4c46be", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Target: tensor([16.]) Predicted: tensor([13.5861])\n", + "Target: tensor([14.]) Predicted: tensor([10.1553])\n", + "Target: tensor([19.]) Predicted: tensor([16.5709])\n", + "Target: tensor([18.]) Predicted: tensor([18.5809])\n", + "Target: tensor([2.]) Predicted: tensor([2.5676])\n", + "Target: tensor([14.]) Predicted: tensor([13.4065])\n", + "Target: tensor([11.]) Predicted: tensor([11.6196])\n", + "Target: tensor([13.]) Predicted: tensor([13.1022])\n", + "Target: tensor([17.]) Predicted: tensor([14.5672])\n", + "Target: tensor([1.]) Predicted: tensor([-1.9346])\n" + ] + } + ], + "source": [ + "for i in random.sample(range(0, len(val_ds)), 10):\n", + " input_, target = val_ds[i]\n", + " print(predict_single(input_, target, model),end=\"\")" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "50c62065-5094-4595-995c-6d0b71f1f28a", + "metadata": {}, + "outputs": [], + "source": [ + "with open(\"result.txt\", \"w+\") as file:\n", + " for i in range(0, len(val_ds), 1):\n", + " input_, target = val_ds[i]\n", + " file.write(str(predict_single(input_, target, model)))" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "8dffe789-1ad5-44f1-8f21-92b9c89ed974", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[NbConvertApp] Converting notebook ml_pytorch.ipynb to script\n", + "[NbConvertApp] Writing 3828 bytes to ml_pytorch.py\n" + ] + } + ], + "source": [ + "!jupyter nbconvert --to script ml_pytorch.ipynb" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.7" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/ml_pytorch.py b/ml_pytorch.py index 1c803da..9f0b449 100644 --- a/ml_pytorch.py +++ b/ml_pytorch.py @@ -1,30 +1,74 @@ #!/usr/bin/env python # coding: utf-8 -# In[233]: +# In[1]: import torch +import jovian +import torchvision +import matplotlib import torch.nn as nn -import torch.nn.functional as F import pandas as pd -import numpy as np -import random -from sklearn.model_selection import train_test_split +import matplotlib.pyplot as plt +import seaborn as sns +import torch.nn.functional as F +from torchvision.datasets.utils import download_url from torch.utils.data import DataLoader, TensorDataset, random_split -from sklearn import preprocessing +import random +import os +import sys -class Model(nn.Module): + +# In[2]: + + +#load data +dataframe = pd.read_csv("understat.csv") + +#choose columns +input_cols=list(dataframe.columns)[4:11] +output_cols = ['position'] +input_cols, output_cols + + +# In[4]: + + +def dataframe_to_arrays(dataframe): + dataframe_loc = dataframe.copy(deep=True) + inputs_array = dataframe_loc[input_cols].to_numpy() + targets_array = dataframe_loc[output_cols].to_numpy() + return inputs_array, targets_array + +inputs_array, targets_array = dataframe_to_arrays(dataframe) + +inputs = torch.from_numpy(inputs_array).type(torch.float) +targets = torch.from_numpy(targets_array).type(torch.float) + +dataset = TensorDataset(inputs, targets) + + +# In[7]: + + +train_ds, val_ds = random_split(dataset, [548, 136]) +batch_size=50 +train_loader = DataLoader(train_ds, batch_size, shuffle=True) +val_loader = DataLoader(val_ds, batch_size) + + +# In[8]: + + +class Model_xPosition(nn.Module): def __init__(self): super().__init__() - # self.fc1 = nn.Linear(2, 60) - # self.fc2 = nn.Linear(60, 30) - # self.out = nn.Linear(30, 1) - self.linear = nn.Linear(2, 616) - - def forward(self, x): - out = torch.sigmoid(self.linear(x)) - return out + self.linear = nn.Linear(input_size,output_size) + + def forward(self, xb): + out = self.linear(xb) + return out def training_step(self, batch): inputs, targets = batch @@ -36,9 +80,7 @@ class Model(nn.Module): def validation_step(self, batch): inputs, targets = batch - # Generate predictions out = self(inputs) - # Calculate loss loss = F.l1_loss(out,targets) return {'val_loss': loss.detach()} @@ -48,73 +90,10 @@ class Model(nn.Module): return {'val_loss': epoch_loss.item()} def epoch_end(self, epoch, result, num_epochs): - # Print result every 100th epoch if (epoch+1) % 100 == 0 or epoch == num_epochs-1: - print("Epoch [{}], val_loss: {:.4f}".format(epoch+1, result['val_loss'])) - - -# In[234]: - - -data = pd.read_csv('understat.csv') - - -# In[235]: - - -training_data = data.sample(frac=0.9, random_state=25) -testing_data = data.drop(training_data.index) - - -# In[236]: - - -train_set = training_data[['matches', 'wins', 'position']] -test_set = testing_data[['matches', 'wins', 'position']] - - -# In[237]: - - -# Zamiana danych na tensory -X_train = train_set[['matches', 'wins']].to_numpy() -X_test = test_set[['matches', 'wins']].to_numpy() -y_train = train_set['position'].to_numpy() -y_test = test_set['position'].to_numpy() - -X_train = torch.FloatTensor(X_train) -X_test = torch.FloatTensor(X_test) -y_train = torch.LongTensor(y_train) -y_test = torch.LongTensor(y_test) - - -# In[238]: - - -train_dataset = TensorDataset(X_train, y_train) -test_dataset = TensorDataset(X_test, y_test) - - -# In[239]: - - -batch_size=50 -train_loader = DataLoader(train_dataset, batch_size, shuffle=True) -test_loader = DataLoader(test_dataset, batch_size) - - -# In[240]: - - -# Hiperparametry -model = Model() -criterion = nn.CrossEntropyLoss() -optimizer = torch.optim.Adam(model.parameters(), lr=0.01) - - -# In[241]: - - + print("Epoch {} loss: {:.4f}".format(epoch+1, result['val_loss'])) + + def evaluate(model, val_loader): outputs = [model.validation_step(batch) for batch in val_loader] return model.validation_epoch_end(outputs) @@ -134,34 +113,23 @@ def fit(epochs, lr, model, train_loader, val_loader, opt_func=torch.optim.SGD): return history -# In[242]: +# In[9]: -epochs = 1000 - -def print_(loss): - print ("The loss calculated: ", loss) - +input_size = len(input_cols) +output_size = len(output_cols) +model=Model_xPosition() -# In[243]: +# In[11]: -for epoch in range(1, epochs+1): - - y_pred = model(X_train) - loss = loss_fn(y_pred, y_train) - if epoch%100 == 0: - print ("Epoch #",epoch) - print_(loss.item()) - - # Zero gradients - optimizer.zero_grad() - loss.backward() # Gradients - optimizer.step() # Update +epochs = 2000 +lr = 1e-5 +learning_proccess = fit(epochs, lr, model, train_loader, val_loader) -# In[244]: +# In[13]: def predict_single(input, target, model): @@ -169,23 +137,23 @@ def predict_single(input, target, model): predictions = model(inputs) prediction = predictions[0].detach() - return "Target: "+str(target)+"----- Prediction: "+str(prediction)+"\n" + return "Target: "+str(target)+" Predicted: "+str(prediction)+"\n" -# In[245]: +# In[14]: -for i in random.sample(range(0, len(test_dataset)), 10): - input_, target = test_dataset[i] +for i in random.sample(range(0, len(val_ds)), 10): + input_, target = val_ds[i] print(predict_single(input_, target, model),end="") - -# In[246]: +# In[15]: with open("result.txt", "w+") as file: - for i in range(0, len(test_dataset), 1): - input_, target = test_dataset[i] + for i in range(0, len(val_ds), 1): + input_, target = val_ds[i] file.write(str(predict_single(input_, target, model))) +