update ml_pytorch
This commit is contained in:
parent
eb6d2ec4d0
commit
c29b8ef427
309
ml_pytorch.ipynb
Normal file
309
ml_pytorch.ipynb
Normal file
@ -0,0 +1,309 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "98cddc6a-2ce1-4933-a2b7-96d2c2d197f4",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"application/javascript": [
|
||||
"if (window.IPython && IPython.notebook.kernel) IPython.notebook.kernel.execute('jovian.utils.jupyter.get_notebook_name_saved = lambda: \"' + IPython.notebook.notebook_name + '\"')"
|
||||
],
|
||||
"text/plain": [
|
||||
"<IPython.core.display.Javascript object>"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import torch\n",
|
||||
"import jovian\n",
|
||||
"import torchvision\n",
|
||||
"import matplotlib\n",
|
||||
"import torch.nn as nn\n",
|
||||
"import pandas as pd\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"import seaborn as sns\n",
|
||||
"import torch.nn.functional as F\n",
|
||||
"from torchvision.datasets.utils import download_url\n",
|
||||
"from torch.utils.data import DataLoader, TensorDataset, random_split\n",
|
||||
"import random\n",
|
||||
"import os\n",
|
||||
"import sys"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "7bb63556-d009-4d9f-9de0-033a30ad3fc4",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"(['matches', 'wins', 'draws', 'loses', 'scored', 'missed', 'pts'],\n",
|
||||
" ['position'])"
|
||||
]
|
||||
},
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"#load data\n",
|
||||
"dataframe = pd.read_csv(\"understat.csv\")\n",
|
||||
"\n",
|
||||
"#choose columns\n",
|
||||
"input_cols=list(dataframe.columns)[4:11]\n",
|
||||
"output_cols = ['position']\n",
|
||||
"input_cols, output_cols"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "c8151c46-c234-42b7-a786-50c73e3aa2f5",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def dataframe_to_arrays(dataframe):\n",
|
||||
" dataframe_loc = dataframe.copy(deep=True)\n",
|
||||
" inputs_array = dataframe_loc[input_cols].to_numpy()\n",
|
||||
" targets_array = dataframe_loc[output_cols].to_numpy()\n",
|
||||
" return inputs_array, targets_array\n",
|
||||
"\n",
|
||||
"inputs_array, targets_array = dataframe_to_arrays(dataframe)\n",
|
||||
"\n",
|
||||
"inputs = torch.from_numpy(inputs_array).type(torch.float)\n",
|
||||
"targets = torch.from_numpy(targets_array).type(torch.float)\n",
|
||||
"\n",
|
||||
"dataset = TensorDataset(inputs, targets)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "8c89947b-c2fe-407d-9588-3f0087df5955",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"train_ds, val_ds = random_split(dataset, [548, 136])\n",
|
||||
"batch_size=50\n",
|
||||
"train_loader = DataLoader(train_ds, batch_size, shuffle=True)\n",
|
||||
"val_loader = DataLoader(val_ds, batch_size)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "3b1426a0-5b15-46f8-aea9-871462ca9467",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"class Model_xPosition(nn.Module):\n",
|
||||
" def __init__(self):\n",
|
||||
" super().__init__()\n",
|
||||
" self.linear = nn.Linear(input_size,output_size) \n",
|
||||
" \n",
|
||||
" def forward(self, xb): \n",
|
||||
" out = self.linear(xb)\n",
|
||||
" return out\n",
|
||||
" \n",
|
||||
" def training_step(self, batch):\n",
|
||||
" inputs, targets = batch \n",
|
||||
" # Generate predictions\n",
|
||||
" out = self(inputs) \n",
|
||||
" # Calcuate loss\n",
|
||||
" loss = F.l1_loss(out,targets) \n",
|
||||
" return loss\n",
|
||||
" \n",
|
||||
" def validation_step(self, batch):\n",
|
||||
" inputs, targets = batch\n",
|
||||
" out = self(inputs)\n",
|
||||
" loss = F.l1_loss(out,targets) \n",
|
||||
" return {'val_loss': loss.detach()}\n",
|
||||
" \n",
|
||||
" def validation_epoch_end(self, outputs):\n",
|
||||
" batch_losses = [x['val_loss'] for x in outputs]\n",
|
||||
" epoch_loss = torch.stack(batch_losses).mean() \n",
|
||||
" return {'val_loss': epoch_loss.item()}\n",
|
||||
" \n",
|
||||
" def epoch_end(self, epoch, result, num_epochs):\n",
|
||||
" if (epoch+1) % 100 == 0 or epoch == num_epochs-1:\n",
|
||||
" print(\"Epoch {} loss: {:.4f}\".format(epoch+1, result['val_loss']))\n",
|
||||
" \n",
|
||||
" \n",
|
||||
"def evaluate(model, val_loader):\n",
|
||||
" outputs = [model.validation_step(batch) for batch in val_loader]\n",
|
||||
" return model.validation_epoch_end(outputs)\n",
|
||||
"\n",
|
||||
"def fit(epochs, lr, model, train_loader, val_loader, opt_func=torch.optim.SGD):\n",
|
||||
" history = []\n",
|
||||
" optimizer = opt_func(model.parameters(), lr)\n",
|
||||
" for epoch in range(epochs):\n",
|
||||
" for batch in train_loader:\n",
|
||||
" loss = model.training_step(batch)\n",
|
||||
" loss.backward()\n",
|
||||
" optimizer.step()\n",
|
||||
" optimizer.zero_grad()\n",
|
||||
" result = evaluate(model, val_loader)\n",
|
||||
" model.epoch_end(epoch, result, epochs)\n",
|
||||
" history.append(result)\n",
|
||||
" return history"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "f2e22e9a-8724-4084-b706-0be266846c05",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"input_size = len(input_cols)\n",
|
||||
"output_size = len(output_cols)\n",
|
||||
"model=Model_xPosition()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "efacafe4-797a-4588-b0d8-2e4d883e639a",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Epoch 100 loss: 6.2637\n",
|
||||
"Epoch 200 loss: 2.9712\n",
|
||||
"Epoch 300 loss: 1.9724\n",
|
||||
"Epoch 400 loss: 1.9376\n",
|
||||
"Epoch 500 loss: 1.9199\n",
|
||||
"Epoch 600 loss: 1.9033\n",
|
||||
"Epoch 700 loss: 1.8863\n",
|
||||
"Epoch 800 loss: 1.8703\n",
|
||||
"Epoch 900 loss: 1.8552\n",
|
||||
"Epoch 1000 loss: 1.8405\n",
|
||||
"Epoch 1100 loss: 1.8267\n",
|
||||
"Epoch 1200 loss: 1.8134\n",
|
||||
"Epoch 1300 loss: 1.8010\n",
|
||||
"Epoch 1400 loss: 1.7876\n",
|
||||
"Epoch 1500 loss: 1.7748\n",
|
||||
"Epoch 1600 loss: 1.7626\n",
|
||||
"Epoch 1700 loss: 1.7497\n",
|
||||
"Epoch 1800 loss: 1.7387\n",
|
||||
"Epoch 1900 loss: 1.7270\n",
|
||||
"Epoch 2000 loss: 1.7162\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"epochs = 2000\n",
|
||||
"lr = 1e-5\n",
|
||||
"learning_proccess = fit(epochs, lr, model, train_loader, val_loader)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "7007ab5a-dc79-4321-beed-cd54dd197858",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def predict_single(input, target, model):\n",
|
||||
" inputs = input.unsqueeze(0)\n",
|
||||
" predictions = model(inputs)\n",
|
||||
" prediction = predictions[0].detach()\n",
|
||||
"\n",
|
||||
" return \"Target: \"+str(target)+\" Predicted: \"+str(prediction)+\"\\n\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"id": "1e6ed168-2cdc-45dc-a0ff-e147ac4c46be",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Target: tensor([16.]) Predicted: tensor([13.5861])\n",
|
||||
"Target: tensor([14.]) Predicted: tensor([10.1553])\n",
|
||||
"Target: tensor([19.]) Predicted: tensor([16.5709])\n",
|
||||
"Target: tensor([18.]) Predicted: tensor([18.5809])\n",
|
||||
"Target: tensor([2.]) Predicted: tensor([2.5676])\n",
|
||||
"Target: tensor([14.]) Predicted: tensor([13.4065])\n",
|
||||
"Target: tensor([11.]) Predicted: tensor([11.6196])\n",
|
||||
"Target: tensor([13.]) Predicted: tensor([13.1022])\n",
|
||||
"Target: tensor([17.]) Predicted: tensor([14.5672])\n",
|
||||
"Target: tensor([1.]) Predicted: tensor([-1.9346])\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"for i in random.sample(range(0, len(val_ds)), 10):\n",
|
||||
" input_, target = val_ds[i]\n",
|
||||
" print(predict_single(input_, target, model),end=\"\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"id": "50c62065-5094-4595-995c-6d0b71f1f28a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"with open(\"result.txt\", \"w+\") as file:\n",
|
||||
" for i in range(0, len(val_ds), 1):\n",
|
||||
" input_, target = val_ds[i]\n",
|
||||
" file.write(str(predict_single(input_, target, model)))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"id": "8dffe789-1ad5-44f1-8f21-92b9c89ed974",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"[NbConvertApp] Converting notebook ml_pytorch.ipynb to script\n",
|
||||
"[NbConvertApp] Writing 3828 bytes to ml_pytorch.py\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"!jupyter nbconvert --to script ml_pytorch.ipynb"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.7"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
182
ml_pytorch.py
182
ml_pytorch.py
@ -1,29 +1,73 @@
|
||||
#!/usr/bin/env python
|
||||
# coding: utf-8
|
||||
|
||||
# In[233]:
|
||||
# In[1]:
|
||||
|
||||
|
||||
import torch
|
||||
import jovian
|
||||
import torchvision
|
||||
import matplotlib
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import random
|
||||
from sklearn.model_selection import train_test_split
|
||||
import matplotlib.pyplot as plt
|
||||
import seaborn as sns
|
||||
import torch.nn.functional as F
|
||||
from torchvision.datasets.utils import download_url
|
||||
from torch.utils.data import DataLoader, TensorDataset, random_split
|
||||
from sklearn import preprocessing
|
||||
import random
|
||||
import os
|
||||
import sys
|
||||
|
||||
class Model(nn.Module):
|
||||
|
||||
# In[2]:
|
||||
|
||||
|
||||
#load data
|
||||
dataframe = pd.read_csv("understat.csv")
|
||||
|
||||
#choose columns
|
||||
input_cols=list(dataframe.columns)[4:11]
|
||||
output_cols = ['position']
|
||||
input_cols, output_cols
|
||||
|
||||
|
||||
# In[4]:
|
||||
|
||||
|
||||
def dataframe_to_arrays(dataframe):
|
||||
dataframe_loc = dataframe.copy(deep=True)
|
||||
inputs_array = dataframe_loc[input_cols].to_numpy()
|
||||
targets_array = dataframe_loc[output_cols].to_numpy()
|
||||
return inputs_array, targets_array
|
||||
|
||||
inputs_array, targets_array = dataframe_to_arrays(dataframe)
|
||||
|
||||
inputs = torch.from_numpy(inputs_array).type(torch.float)
|
||||
targets = torch.from_numpy(targets_array).type(torch.float)
|
||||
|
||||
dataset = TensorDataset(inputs, targets)
|
||||
|
||||
|
||||
# In[7]:
|
||||
|
||||
|
||||
train_ds, val_ds = random_split(dataset, [548, 136])
|
||||
batch_size=50
|
||||
train_loader = DataLoader(train_ds, batch_size, shuffle=True)
|
||||
val_loader = DataLoader(val_ds, batch_size)
|
||||
|
||||
|
||||
# In[8]:
|
||||
|
||||
|
||||
class Model_xPosition(nn.Module):
|
||||
def __init__(self):
|
||||
super().__init__()
|
||||
# self.fc1 = nn.Linear(2, 60)
|
||||
# self.fc2 = nn.Linear(60, 30)
|
||||
# self.out = nn.Linear(30, 1)
|
||||
self.linear = nn.Linear(2, 616)
|
||||
self.linear = nn.Linear(input_size,output_size)
|
||||
|
||||
def forward(self, x):
|
||||
out = torch.sigmoid(self.linear(x))
|
||||
def forward(self, xb):
|
||||
out = self.linear(xb)
|
||||
return out
|
||||
|
||||
def training_step(self, batch):
|
||||
@ -36,9 +80,7 @@ class Model(nn.Module):
|
||||
|
||||
def validation_step(self, batch):
|
||||
inputs, targets = batch
|
||||
# Generate predictions
|
||||
out = self(inputs)
|
||||
# Calculate loss
|
||||
loss = F.l1_loss(out,targets)
|
||||
return {'val_loss': loss.detach()}
|
||||
|
||||
@ -48,71 +90,8 @@ class Model(nn.Module):
|
||||
return {'val_loss': epoch_loss.item()}
|
||||
|
||||
def epoch_end(self, epoch, result, num_epochs):
|
||||
# Print result every 100th epoch
|
||||
if (epoch+1) % 100 == 0 or epoch == num_epochs-1:
|
||||
print("Epoch [{}], val_loss: {:.4f}".format(epoch+1, result['val_loss']))
|
||||
|
||||
|
||||
# In[234]:
|
||||
|
||||
|
||||
data = pd.read_csv('understat.csv')
|
||||
|
||||
|
||||
# In[235]:
|
||||
|
||||
|
||||
training_data = data.sample(frac=0.9, random_state=25)
|
||||
testing_data = data.drop(training_data.index)
|
||||
|
||||
|
||||
# In[236]:
|
||||
|
||||
|
||||
train_set = training_data[['matches', 'wins', 'position']]
|
||||
test_set = testing_data[['matches', 'wins', 'position']]
|
||||
|
||||
|
||||
# In[237]:
|
||||
|
||||
|
||||
# Zamiana danych na tensory
|
||||
X_train = train_set[['matches', 'wins']].to_numpy()
|
||||
X_test = test_set[['matches', 'wins']].to_numpy()
|
||||
y_train = train_set['position'].to_numpy()
|
||||
y_test = test_set['position'].to_numpy()
|
||||
|
||||
X_train = torch.FloatTensor(X_train)
|
||||
X_test = torch.FloatTensor(X_test)
|
||||
y_train = torch.LongTensor(y_train)
|
||||
y_test = torch.LongTensor(y_test)
|
||||
|
||||
|
||||
# In[238]:
|
||||
|
||||
|
||||
train_dataset = TensorDataset(X_train, y_train)
|
||||
test_dataset = TensorDataset(X_test, y_test)
|
||||
|
||||
|
||||
# In[239]:
|
||||
|
||||
|
||||
batch_size=50
|
||||
train_loader = DataLoader(train_dataset, batch_size, shuffle=True)
|
||||
test_loader = DataLoader(test_dataset, batch_size)
|
||||
|
||||
|
||||
# In[240]:
|
||||
|
||||
|
||||
# Hiperparametry
|
||||
model = Model()
|
||||
criterion = nn.CrossEntropyLoss()
|
||||
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
|
||||
|
||||
|
||||
# In[241]:
|
||||
print("Epoch {} loss: {:.4f}".format(epoch+1, result['val_loss']))
|
||||
|
||||
|
||||
def evaluate(model, val_loader):
|
||||
@ -134,34 +113,23 @@ def fit(epochs, lr, model, train_loader, val_loader, opt_func=torch.optim.SGD):
|
||||
return history
|
||||
|
||||
|
||||
# In[242]:
|
||||
# In[9]:
|
||||
|
||||
|
||||
epochs = 1000
|
||||
|
||||
def print_(loss):
|
||||
print ("The loss calculated: ", loss)
|
||||
input_size = len(input_cols)
|
||||
output_size = len(output_cols)
|
||||
model=Model_xPosition()
|
||||
|
||||
|
||||
|
||||
# In[243]:
|
||||
# In[11]:
|
||||
|
||||
|
||||
for epoch in range(1, epochs+1):
|
||||
|
||||
y_pred = model(X_train)
|
||||
loss = loss_fn(y_pred, y_train)
|
||||
if epoch%100 == 0:
|
||||
print ("Epoch #",epoch)
|
||||
print_(loss.item())
|
||||
|
||||
# Zero gradients
|
||||
optimizer.zero_grad()
|
||||
loss.backward() # Gradients
|
||||
optimizer.step() # Update
|
||||
epochs = 2000
|
||||
lr = 1e-5
|
||||
learning_proccess = fit(epochs, lr, model, train_loader, val_loader)
|
||||
|
||||
|
||||
# In[244]:
|
||||
# In[13]:
|
||||
|
||||
|
||||
def predict_single(input, target, model):
|
||||
@ -169,23 +137,23 @@ def predict_single(input, target, model):
|
||||
predictions = model(inputs)
|
||||
prediction = predictions[0].detach()
|
||||
|
||||
return "Target: "+str(target)+"----- Prediction: "+str(prediction)+"\n"
|
||||
return "Target: "+str(target)+" Predicted: "+str(prediction)+"\n"
|
||||
|
||||
|
||||
# In[245]:
|
||||
# In[14]:
|
||||
|
||||
|
||||
for i in random.sample(range(0, len(test_dataset)), 10):
|
||||
input_, target = test_dataset[i]
|
||||
for i in random.sample(range(0, len(val_ds)), 10):
|
||||
input_, target = val_ds[i]
|
||||
print(predict_single(input_, target, model),end="")
|
||||
|
||||
|
||||
|
||||
# In[246]:
|
||||
# In[15]:
|
||||
|
||||
|
||||
with open("result.txt", "w+") as file:
|
||||
for i in range(0, len(test_dataset), 1):
|
||||
input_, target = test_dataset[i]
|
||||
for i in range(0, len(val_ds), 1):
|
||||
input_, target = val_ds[i]
|
||||
file.write(str(predict_single(input_, target, model)))
|
||||
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user