update ml_pytorch

This commit is contained in:
Sebastian 2022-04-26 16:52:42 +02:00
parent eb6d2ec4d0
commit c29b8ef427
2 changed files with 388 additions and 111 deletions

309
ml_pytorch.ipynb Normal file
View File

@ -0,0 +1,309 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "98cddc6a-2ce1-4933-a2b7-96d2c2d197f4",
"metadata": {},
"outputs": [
{
"data": {
"application/javascript": [
"if (window.IPython && IPython.notebook.kernel) IPython.notebook.kernel.execute('jovian.utils.jupyter.get_notebook_name_saved = lambda: \"' + IPython.notebook.notebook_name + '\"')"
],
"text/plain": [
"<IPython.core.display.Javascript object>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import torch\n",
"import jovian\n",
"import torchvision\n",
"import matplotlib\n",
"import torch.nn as nn\n",
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"import seaborn as sns\n",
"import torch.nn.functional as F\n",
"from torchvision.datasets.utils import download_url\n",
"from torch.utils.data import DataLoader, TensorDataset, random_split\n",
"import random\n",
"import os\n",
"import sys"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "7bb63556-d009-4d9f-9de0-033a30ad3fc4",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(['matches', 'wins', 'draws', 'loses', 'scored', 'missed', 'pts'],\n",
" ['position'])"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#load data\n",
"dataframe = pd.read_csv(\"understat.csv\")\n",
"\n",
"#choose columns\n",
"input_cols=list(dataframe.columns)[4:11]\n",
"output_cols = ['position']\n",
"input_cols, output_cols"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "c8151c46-c234-42b7-a786-50c73e3aa2f5",
"metadata": {},
"outputs": [],
"source": [
"def dataframe_to_arrays(dataframe):\n",
" dataframe_loc = dataframe.copy(deep=True)\n",
" inputs_array = dataframe_loc[input_cols].to_numpy()\n",
" targets_array = dataframe_loc[output_cols].to_numpy()\n",
" return inputs_array, targets_array\n",
"\n",
"inputs_array, targets_array = dataframe_to_arrays(dataframe)\n",
"\n",
"inputs = torch.from_numpy(inputs_array).type(torch.float)\n",
"targets = torch.from_numpy(targets_array).type(torch.float)\n",
"\n",
"dataset = TensorDataset(inputs, targets)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "8c89947b-c2fe-407d-9588-3f0087df5955",
"metadata": {},
"outputs": [],
"source": [
"train_ds, val_ds = random_split(dataset, [548, 136])\n",
"batch_size=50\n",
"train_loader = DataLoader(train_ds, batch_size, shuffle=True)\n",
"val_loader = DataLoader(val_ds, batch_size)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "3b1426a0-5b15-46f8-aea9-871462ca9467",
"metadata": {},
"outputs": [],
"source": [
"class Model_xPosition(nn.Module):\n",
" def __init__(self):\n",
" super().__init__()\n",
" self.linear = nn.Linear(input_size,output_size) \n",
" \n",
" def forward(self, xb): \n",
" out = self.linear(xb)\n",
" return out\n",
" \n",
" def training_step(self, batch):\n",
" inputs, targets = batch \n",
" # Generate predictions\n",
" out = self(inputs) \n",
" # Calcuate loss\n",
" loss = F.l1_loss(out,targets) \n",
" return loss\n",
" \n",
" def validation_step(self, batch):\n",
" inputs, targets = batch\n",
" out = self(inputs)\n",
" loss = F.l1_loss(out,targets) \n",
" return {'val_loss': loss.detach()}\n",
" \n",
" def validation_epoch_end(self, outputs):\n",
" batch_losses = [x['val_loss'] for x in outputs]\n",
" epoch_loss = torch.stack(batch_losses).mean() \n",
" return {'val_loss': epoch_loss.item()}\n",
" \n",
" def epoch_end(self, epoch, result, num_epochs):\n",
" if (epoch+1) % 100 == 0 or epoch == num_epochs-1:\n",
" print(\"Epoch {} loss: {:.4f}\".format(epoch+1, result['val_loss']))\n",
" \n",
" \n",
"def evaluate(model, val_loader):\n",
" outputs = [model.validation_step(batch) for batch in val_loader]\n",
" return model.validation_epoch_end(outputs)\n",
"\n",
"def fit(epochs, lr, model, train_loader, val_loader, opt_func=torch.optim.SGD):\n",
" history = []\n",
" optimizer = opt_func(model.parameters(), lr)\n",
" for epoch in range(epochs):\n",
" for batch in train_loader:\n",
" loss = model.training_step(batch)\n",
" loss.backward()\n",
" optimizer.step()\n",
" optimizer.zero_grad()\n",
" result = evaluate(model, val_loader)\n",
" model.epoch_end(epoch, result, epochs)\n",
" history.append(result)\n",
" return history"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "f2e22e9a-8724-4084-b706-0be266846c05",
"metadata": {},
"outputs": [],
"source": [
"input_size = len(input_cols)\n",
"output_size = len(output_cols)\n",
"model=Model_xPosition()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "efacafe4-797a-4588-b0d8-2e4d883e639a",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Epoch 100 loss: 6.2637\n",
"Epoch 200 loss: 2.9712\n",
"Epoch 300 loss: 1.9724\n",
"Epoch 400 loss: 1.9376\n",
"Epoch 500 loss: 1.9199\n",
"Epoch 600 loss: 1.9033\n",
"Epoch 700 loss: 1.8863\n",
"Epoch 800 loss: 1.8703\n",
"Epoch 900 loss: 1.8552\n",
"Epoch 1000 loss: 1.8405\n",
"Epoch 1100 loss: 1.8267\n",
"Epoch 1200 loss: 1.8134\n",
"Epoch 1300 loss: 1.8010\n",
"Epoch 1400 loss: 1.7876\n",
"Epoch 1500 loss: 1.7748\n",
"Epoch 1600 loss: 1.7626\n",
"Epoch 1700 loss: 1.7497\n",
"Epoch 1800 loss: 1.7387\n",
"Epoch 1900 loss: 1.7270\n",
"Epoch 2000 loss: 1.7162\n"
]
}
],
"source": [
"epochs = 2000\n",
"lr = 1e-5\n",
"learning_proccess = fit(epochs, lr, model, train_loader, val_loader)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "7007ab5a-dc79-4321-beed-cd54dd197858",
"metadata": {},
"outputs": [],
"source": [
"def predict_single(input, target, model):\n",
" inputs = input.unsqueeze(0)\n",
" predictions = model(inputs)\n",
" prediction = predictions[0].detach()\n",
"\n",
" return \"Target: \"+str(target)+\" Predicted: \"+str(prediction)+\"\\n\""
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "1e6ed168-2cdc-45dc-a0ff-e147ac4c46be",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Target: tensor([16.]) Predicted: tensor([13.5861])\n",
"Target: tensor([14.]) Predicted: tensor([10.1553])\n",
"Target: tensor([19.]) Predicted: tensor([16.5709])\n",
"Target: tensor([18.]) Predicted: tensor([18.5809])\n",
"Target: tensor([2.]) Predicted: tensor([2.5676])\n",
"Target: tensor([14.]) Predicted: tensor([13.4065])\n",
"Target: tensor([11.]) Predicted: tensor([11.6196])\n",
"Target: tensor([13.]) Predicted: tensor([13.1022])\n",
"Target: tensor([17.]) Predicted: tensor([14.5672])\n",
"Target: tensor([1.]) Predicted: tensor([-1.9346])\n"
]
}
],
"source": [
"for i in random.sample(range(0, len(val_ds)), 10):\n",
" input_, target = val_ds[i]\n",
" print(predict_single(input_, target, model),end=\"\")"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "50c62065-5094-4595-995c-6d0b71f1f28a",
"metadata": {},
"outputs": [],
"source": [
"with open(\"result.txt\", \"w+\") as file:\n",
" for i in range(0, len(val_ds), 1):\n",
" input_, target = val_ds[i]\n",
" file.write(str(predict_single(input_, target, model)))"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "8dffe789-1ad5-44f1-8f21-92b9c89ed974",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"[NbConvertApp] Converting notebook ml_pytorch.ipynb to script\n",
"[NbConvertApp] Writing 3828 bytes to ml_pytorch.py\n"
]
}
],
"source": [
"!jupyter nbconvert --to script ml_pytorch.ipynb"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.7"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@ -1,29 +1,73 @@
#!/usr/bin/env python
# coding: utf-8
# In[233]:
# In[1]:
import torch
import jovian
import torchvision
import matplotlib
import torch.nn as nn
import torch.nn.functional as F
import pandas as pd
import numpy as np
import random
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import seaborn as sns
import torch.nn.functional as F
from torchvision.datasets.utils import download_url
from torch.utils.data import DataLoader, TensorDataset, random_split
from sklearn import preprocessing
import random
import os
import sys
class Model(nn.Module):
# In[2]:
#load data
dataframe = pd.read_csv("understat.csv")
#choose columns
input_cols=list(dataframe.columns)[4:11]
output_cols = ['position']
input_cols, output_cols
# In[4]:
def dataframe_to_arrays(dataframe):
dataframe_loc = dataframe.copy(deep=True)
inputs_array = dataframe_loc[input_cols].to_numpy()
targets_array = dataframe_loc[output_cols].to_numpy()
return inputs_array, targets_array
inputs_array, targets_array = dataframe_to_arrays(dataframe)
inputs = torch.from_numpy(inputs_array).type(torch.float)
targets = torch.from_numpy(targets_array).type(torch.float)
dataset = TensorDataset(inputs, targets)
# In[7]:
train_ds, val_ds = random_split(dataset, [548, 136])
batch_size=50
train_loader = DataLoader(train_ds, batch_size, shuffle=True)
val_loader = DataLoader(val_ds, batch_size)
# In[8]:
class Model_xPosition(nn.Module):
def __init__(self):
super().__init__()
# self.fc1 = nn.Linear(2, 60)
# self.fc2 = nn.Linear(60, 30)
# self.out = nn.Linear(30, 1)
self.linear = nn.Linear(2, 616)
self.linear = nn.Linear(input_size,output_size)
def forward(self, x):
out = torch.sigmoid(self.linear(x))
def forward(self, xb):
out = self.linear(xb)
return out
def training_step(self, batch):
@ -36,9 +80,7 @@ class Model(nn.Module):
def validation_step(self, batch):
inputs, targets = batch
# Generate predictions
out = self(inputs)
# Calculate loss
loss = F.l1_loss(out,targets)
return {'val_loss': loss.detach()}
@ -48,71 +90,8 @@ class Model(nn.Module):
return {'val_loss': epoch_loss.item()}
def epoch_end(self, epoch, result, num_epochs):
# Print result every 100th epoch
if (epoch+1) % 100 == 0 or epoch == num_epochs-1:
print("Epoch [{}], val_loss: {:.4f}".format(epoch+1, result['val_loss']))
# In[234]:
data = pd.read_csv('understat.csv')
# In[235]:
training_data = data.sample(frac=0.9, random_state=25)
testing_data = data.drop(training_data.index)
# In[236]:
train_set = training_data[['matches', 'wins', 'position']]
test_set = testing_data[['matches', 'wins', 'position']]
# In[237]:
# Zamiana danych na tensory
X_train = train_set[['matches', 'wins']].to_numpy()
X_test = test_set[['matches', 'wins']].to_numpy()
y_train = train_set['position'].to_numpy()
y_test = test_set['position'].to_numpy()
X_train = torch.FloatTensor(X_train)
X_test = torch.FloatTensor(X_test)
y_train = torch.LongTensor(y_train)
y_test = torch.LongTensor(y_test)
# In[238]:
train_dataset = TensorDataset(X_train, y_train)
test_dataset = TensorDataset(X_test, y_test)
# In[239]:
batch_size=50
train_loader = DataLoader(train_dataset, batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size)
# In[240]:
# Hiperparametry
model = Model()
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
# In[241]:
print("Epoch {} loss: {:.4f}".format(epoch+1, result['val_loss']))
def evaluate(model, val_loader):
@ -134,34 +113,23 @@ def fit(epochs, lr, model, train_loader, val_loader, opt_func=torch.optim.SGD):
return history
# In[242]:
# In[9]:
epochs = 1000
def print_(loss):
print ("The loss calculated: ", loss)
input_size = len(input_cols)
output_size = len(output_cols)
model=Model_xPosition()
# In[243]:
# In[11]:
for epoch in range(1, epochs+1):
y_pred = model(X_train)
loss = loss_fn(y_pred, y_train)
if epoch%100 == 0:
print ("Epoch #",epoch)
print_(loss.item())
# Zero gradients
optimizer.zero_grad()
loss.backward() # Gradients
optimizer.step() # Update
epochs = 2000
lr = 1e-5
learning_proccess = fit(epochs, lr, model, train_loader, val_loader)
# In[244]:
# In[13]:
def predict_single(input, target, model):
@ -169,23 +137,23 @@ def predict_single(input, target, model):
predictions = model(inputs)
prediction = predictions[0].detach()
return "Target: "+str(target)+"----- Prediction: "+str(prediction)+"\n"
return "Target: "+str(target)+" Predicted: "+str(prediction)+"\n"
# In[245]:
# In[14]:
for i in random.sample(range(0, len(test_dataset)), 10):
input_, target = test_dataset[i]
for i in random.sample(range(0, len(val_ds)), 10):
input_, target = val_ds[i]
print(predict_single(input_, target, model),end="")
# In[246]:
# In[15]:
with open("result.txt", "w+") as file:
for i in range(0, len(test_dataset), 1):
input_, target = test_dataset[i]
for i in range(0, len(val_ds), 1):
input_, target = val_ds[i]
file.write(str(predict_single(input_, target, model)))