Jenkins-2 Zadanie 1.
This commit is contained in:
parent
8a09b3e485
commit
38d765d1f2
3
.gitignore
vendored
Normal file
3
.gitignore
vendored
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
venv
|
||||||
|
.ipynb_checkpoints
|
||||||
|
.vscode
|
27
Dockerfile
Normal file
27
Dockerfile
Normal file
@ -0,0 +1,27 @@
|
|||||||
|
# Nasz obraz będzie dzidziczył z obrazu Ubuntu w wersji latest
|
||||||
|
FROM ubuntu:focal
|
||||||
|
|
||||||
|
# Instalujemy niezbędne zależności. Zwróć uwagę na flagę "-y" (assume yes)
|
||||||
|
RUN apt update
|
||||||
|
RUN apt install -y python3 python3-pip dos2unix git
|
||||||
|
RUN pip3 install kaggle
|
||||||
|
RUN apt install -y unzip
|
||||||
|
RUN mkdir /.kaggle
|
||||||
|
RUN chmod -R 777 /.kaggle
|
||||||
|
#RUN export KAGGLE_CONFIG_DIR=~/.kaggle
|
||||||
|
COPY ./requirments.txt ./
|
||||||
|
RUN pip3 install -r requirments.txt
|
||||||
|
RUN pip3 install torch==1.8.1+cpu torchvision==0.9.1+cpu torchaudio==0.8.1 -f https://download.pytorch.org/whl/torch_stable.html
|
||||||
|
|
||||||
|
# Stwórzmy w kontenerze (jeśli nie istnieje) katalog /app i przejdźmy do niego (wszystkie kolejne polecenia RUN, CMD, ENTRYPOINT, COPY i ADD będą w nim wykonywane)
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
# Skopiujmy nasz skrypt do katalogu /app w kontenerze
|
||||||
|
# COPY ./skrypt.sh ./
|
||||||
|
# RUN chmod +x skrypt.sh
|
||||||
|
# RUN dos2unix skrypt.sh
|
||||||
|
|
||||||
|
COPY ./dlgssdpytorch.py ./
|
||||||
|
RUN chmod +x dlgssdpytorch.py
|
||||||
|
COPY ./create_dataset.py ./
|
||||||
|
RUN chmod +x create_dataset.py
|
51291
Global_Superstore2.csv
Normal file
51291
Global_Superstore2.csv
Normal file
File diff suppressed because it is too large
Load Diff
BIN
Global_Superstore2.csv.zip
Normal file
BIN
Global_Superstore2.csv.zip
Normal file
Binary file not shown.
29
Jenkins_train
Normal file
29
Jenkins_train
Normal file
@ -0,0 +1,29 @@
|
|||||||
|
pipeline {
|
||||||
|
agent any
|
||||||
|
stages {
|
||||||
|
stage('checkout') {
|
||||||
|
steps {
|
||||||
|
checkout([$class: 'GitSCM', branches: [[name: '*/master']], doGenerateSubmoduleConfigurations: false, extensions: [], submoduleCfg: [], userRemoteConfigs: [[url: 'https://git.wmi.amu.edu.pl/s426206/ium_426206.git']]])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
stage('docker') {
|
||||||
|
steps {
|
||||||
|
script {
|
||||||
|
def img = docker.build('rokoch/ium:01')
|
||||||
|
img.inside {
|
||||||
|
sh 'chmod +x dlgssdpytorch.py'
|
||||||
|
sh 'python3 ./dlgssdpytorch.py'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
stage('end') {
|
||||||
|
steps {
|
||||||
|
//Zarchiwizuj wynik
|
||||||
|
archiveArtifacts 'model.pt'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
97
Jenkinsfile
vendored
97
Jenkinsfile
vendored
@ -1,46 +1,51 @@
|
|||||||
pipeline {
|
pipeline {
|
||||||
agent any
|
agent any
|
||||||
parameters {
|
parameters {
|
||||||
string(
|
string(
|
||||||
defaultValue: '0',
|
defaultValue: '',
|
||||||
description: 'Umożliwia zdefiniowanie wielkości odcięcia zbioru danych.',
|
description: 'Parametry trenowania.',
|
||||||
name: 'CUTOFF',
|
name: 'PARAMETRY',
|
||||||
trim: false
|
trim: false
|
||||||
)
|
)
|
||||||
string(
|
string(
|
||||||
defaultValue: 'rokoch',
|
defaultValue: 'rokoch',
|
||||||
description: 'Kaggle username',
|
description: 'Kaggle username',
|
||||||
name: 'KAGGLE_USERNAME',
|
name: 'KAGGLE_USERNAME',
|
||||||
trim: false
|
trim: false
|
||||||
)
|
)
|
||||||
password(
|
password(
|
||||||
defaultValue: '',
|
defaultValue: '',
|
||||||
description: 'Kaggle token taken from kaggle.json file, as described in https://github.com/Kaggle/kaggle-api#api-credentials',
|
description: 'Kaggle token taken from kaggle.json file, as described in https://github.com/Kaggle/kaggle-api#api-credentials',
|
||||||
name: 'KAGGLE_KEY'
|
name: 'KAGGLE_KEY'
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
stages {
|
stages {
|
||||||
stage('checkout') {
|
stage('checkout') {
|
||||||
steps {
|
steps {
|
||||||
checkout([$class: 'GitSCM', branches: [[name: '*/master']], doGenerateSubmoduleConfigurations: false, extensions: [], submoduleCfg: [], userRemoteConfigs: [[url: 'https://git.wmi.amu.edu.pl/s426206/ium_426206.git']]])
|
checkout([$class: 'GitSCM', branches: [[name: '*/master']], doGenerateSubmoduleConfigurations: false, extensions: [], submoduleCfg: [], userRemoteConfigs: [[url: 'https://git.wmi.amu.edu.pl/s426206/ium_426206.git']]])
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
stage('sh') {
|
stage('docker') {
|
||||||
steps {
|
steps {
|
||||||
withEnv(["KAGGLE_USERNAME=${params.KAGGLE_USERNAME}",
|
withEnv(["KAGGLE_USERNAME=${params.KAGGLE_USERNAME}",
|
||||||
"KAGGLE_KEY=${params.KAGGLE_KEY}"]) {
|
"KAGGLE_KEY=${params.KAGGLE_KEY}"]) {
|
||||||
sh "chmod +x skrypt.sh"
|
script {
|
||||||
sh "./skrypt.sh ${params.CUTOFF} | tee output.txt"
|
def img = docker.build('rokoch/ium:01')
|
||||||
}
|
img.inside {
|
||||||
}
|
sh 'chmod +x create_dataset.py'
|
||||||
}
|
sh 'python3 ./create_dataset.py $PARAMETRY'
|
||||||
|
}
|
||||||
stage('end') {
|
}
|
||||||
steps {
|
}
|
||||||
//Zarchiwizuj wynik
|
}
|
||||||
archiveArtifacts 'output.txt,Global_Superstore22.csv,Global_Superstore2.csv.dev,Global_Superstore2.csv.test,Global_Superstore2.csv.train'
|
}
|
||||||
}
|
|
||||||
}
|
stage('end') {
|
||||||
}
|
steps {
|
||||||
}
|
//Zarchiwizuj wynik
|
||||||
|
archiveArtifacts 'train_dataset.pt,val_dataset.pt'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
32
Jenkinsfile_stats
Normal file
32
Jenkinsfile_stats
Normal file
@ -0,0 +1,32 @@
|
|||||||
|
pipeline {
|
||||||
|
//agent { docker {image 'rokoch/ium:01' }}
|
||||||
|
agent any
|
||||||
|
parameters {
|
||||||
|
buildSelector(
|
||||||
|
defaultSelector: lastSuccessful(),
|
||||||
|
description: 'Which build to use for copying artifacts',
|
||||||
|
name: 'BUILD_SELECTOR')
|
||||||
|
}
|
||||||
|
stages {
|
||||||
|
stage('Copy artifact') {
|
||||||
|
steps {
|
||||||
|
copyArtifacts filter: 'Global_Superstore22.csv,Global_Superstore2.csv.dev,Global_Superstore2.csv.test,Global_Superstore2.csv.train', fingerprintArtifacts: false, projectName: 's426206-create-dataset', selector: buildParameter('BUILD_SELECTOR')
|
||||||
|
}
|
||||||
|
}
|
||||||
|
stage('Clone repo') {
|
||||||
|
steps {
|
||||||
|
script {
|
||||||
|
//docker.withRegistry("https://hub.docker.com/r/rokoch/ium"){
|
||||||
|
docker.image("rokoch/ium:01").inside {
|
||||||
|
sh 'rm -rf ium_426206'
|
||||||
|
sh 'git clone https://git.wmi.amu.edu.pl/s426206/ium_426206.git'
|
||||||
|
sh "chmod +x ium_426206/stats.sh"
|
||||||
|
sh "ium_426206/stats.sh | tee output.txt"
|
||||||
|
archiveArtifacts 'output.txt'
|
||||||
|
}
|
||||||
|
//}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -1,3 +1,3 @@
|
|||||||
# ium_426206
|
# ium_426206
|
||||||
|
|
||||||
Inżynieria Uczenia Maszynowego
|
Inżynieria Uczenia Maszynowego
|
59
create_dataset.py
Normal file
59
create_dataset.py
Normal file
@ -0,0 +1,59 @@
|
|||||||
|
import zipfile
|
||||||
|
import torch
|
||||||
|
import pandas as pd
|
||||||
|
import datetime
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
from kaggle.api.kaggle_api_extended import KaggleApi
|
||||||
|
import torch.nn as nn
|
||||||
|
from torch.utils.data.dataset import random_split
|
||||||
|
from torch.utils.data import Dataset, TensorDataset
|
||||||
|
from sklearn import preprocessing
|
||||||
|
|
||||||
|
|
||||||
|
api = KaggleApi()
|
||||||
|
api.authenticate()
|
||||||
|
api.dataset_download_file('apoorvaappz/global-super-store-dataset',
|
||||||
|
file_name='Global_Superstore2.csv', path='./')
|
||||||
|
|
||||||
|
with zipfile.ZipFile('Global_Superstore2.csv.zip', 'r') as zipref:
|
||||||
|
zipref.extractall('.')
|
||||||
|
|
||||||
|
data = pd.read_csv("Global_Superstore2.csv", header=0, sep=',')
|
||||||
|
|
||||||
|
data["Order Date"] = pd.to_datetime(data["Order Date"])
|
||||||
|
data = data.sort_values(by="Order Date")
|
||||||
|
|
||||||
|
#print(data)
|
||||||
|
|
||||||
|
byMonthsYears = {}
|
||||||
|
for index, row in data.iterrows():
|
||||||
|
#datee = datetime.datetime.strptime(row['Order Date'], "%d-%m-%Y")
|
||||||
|
#byMonthsYears.setdefault(datee.strftime("%m-%Y"), 0)
|
||||||
|
#byMonthsYears[datee.strftime("%m-%Y")] += row['Sales']
|
||||||
|
byMonthsYears.setdefault(row['Order Date'].strftime("%d-%m-%Y"), 0)
|
||||||
|
byMonthsYears[row['Order Date'].strftime("%d-%m-%Y")] += row['Sales']
|
||||||
|
df = data.groupby('Order Date').agg({'Customer Name':'count', 'Sales': 'sum'}).reset_index().rename(columns={'Sales':'Sales sum', 'Customer Name':'Sales count'})
|
||||||
|
|
||||||
|
#normalizacja danych
|
||||||
|
flcols = df[['Sales count', 'Sales sum']].columns
|
||||||
|
x = df[['Sales count', 'Sales sum']].values
|
||||||
|
# min_max_scaler = preprocessing.MinMaxScaler()
|
||||||
|
max_abs_scaler = preprocessing.MaxAbsScaler()
|
||||||
|
# x_scaled = min_max_scaler.fit_transform(x)
|
||||||
|
x_scaled = max_abs_scaler.fit_transform(x)
|
||||||
|
normcols = pd.DataFrame(x_scaled, columns=flcols)
|
||||||
|
for col in flcols:
|
||||||
|
df[col] = normcols[col]
|
||||||
|
#df.to_csv('mms_norm.csv')
|
||||||
|
|
||||||
|
x_tensor = torch.tensor(df['Sales sum'].values).float()
|
||||||
|
y_tensor = torch.tensor(df['Sales count'].values).float()
|
||||||
|
|
||||||
|
dataset = TensorDataset(x_tensor, y_tensor)
|
||||||
|
|
||||||
|
lengths = [int(len(dataset)*0.8), int(len(dataset)*0.2)]
|
||||||
|
train_dataset, val_dataset = random_split(dataset, lengths)
|
||||||
|
|
||||||
|
torch.save(train_dataset, 'train_dataset.pt')
|
||||||
|
torch.save(val_dataset, 'val_dataset.pt')
|
180
dlgssdpytorch copy.py
Normal file
180
dlgssdpytorch copy.py
Normal file
@ -0,0 +1,180 @@
|
|||||||
|
import zipfile
|
||||||
|
import torch
|
||||||
|
import pandas as pd
|
||||||
|
import matplotlib
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
import datetime
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
from kaggle.api.kaggle_api_extended import KaggleApi
|
||||||
|
import torch.nn as nn
|
||||||
|
import torch.optim as optim
|
||||||
|
from torch.utils.data.dataset import random_split
|
||||||
|
from torch.utils.data import Dataset, TensorDataset, DataLoader
|
||||||
|
from torchviz import make_dot
|
||||||
|
from sklearn import preprocessing
|
||||||
|
|
||||||
|
|
||||||
|
# api = KaggleApi()
|
||||||
|
# api.authenticate()
|
||||||
|
# api.dataset_download_file('apoorvaappz/global-super-store-dataset',
|
||||||
|
# file_name='Global_Superstore2.csv', path='./')
|
||||||
|
|
||||||
|
# with zipfile.ZipFile('Global_Superstore2.csv.zip', 'r') as zipref:
|
||||||
|
# zipref.extractall('.')
|
||||||
|
|
||||||
|
data = pd.read_csv("Global_Superstore2.csv", header=0, sep=',')
|
||||||
|
|
||||||
|
data["Order Date"] = pd.to_datetime(data["Order Date"])
|
||||||
|
data = data.sort_values(by="Order Date")
|
||||||
|
|
||||||
|
#print(data)
|
||||||
|
|
||||||
|
byMonthsYears = {}
|
||||||
|
for index, row in data.iterrows():
|
||||||
|
#datee = datetime.datetime.strptime(row['Order Date'], "%d-%m-%Y")
|
||||||
|
#byMonthsYears.setdefault(datee.strftime("%m-%Y"), 0)
|
||||||
|
#byMonthsYears[datee.strftime("%m-%Y")] += row['Sales']
|
||||||
|
byMonthsYears.setdefault(row['Order Date'].strftime("%d-%m-%Y"), 0)
|
||||||
|
byMonthsYears[row['Order Date'].strftime("%d-%m-%Y")] += row['Sales']
|
||||||
|
df = data.groupby('Order Date').agg({'Customer Name':'count', 'Sales': 'sum'}).reset_index().rename(columns={'Sales':'Sales sum', 'Customer Name':'Sales count'})
|
||||||
|
#normalizacja danych
|
||||||
|
flcols = df[['Sales count', 'Sales sum']].columns
|
||||||
|
x = df[['Sales count', 'Sales sum']].values
|
||||||
|
# min_max_scaler = preprocessing.MinMaxScaler()
|
||||||
|
max_abs_scaler = preprocessing.MaxAbsScaler()
|
||||||
|
# x_scaled = min_max_scaler.fit_transform(x)
|
||||||
|
x_scaled = max_abs_scaler.fit_transform(x)
|
||||||
|
normcols = pd.DataFrame(x_scaled, columns=flcols)
|
||||||
|
for col in flcols:
|
||||||
|
df[col] = normcols[col]
|
||||||
|
df.to_csv('mms_norm.csv')
|
||||||
|
exit()
|
||||||
|
# fig, ax = plt.subplots()
|
||||||
|
# fig.set_figheight(15)
|
||||||
|
# fig.set_figwidth(20)
|
||||||
|
# ax.scatter(df['Month and Year'], df['Sum of sales'])
|
||||||
|
#plt.show()
|
||||||
|
# # Data Generation
|
||||||
|
# np.random.seed(42)
|
||||||
|
# x = np.random.rand(100, 1)
|
||||||
|
# y = 1 + 2 * x + .1 * np.random.randn(100, 1)
|
||||||
|
|
||||||
|
# # Shuffles the indices
|
||||||
|
# idx = np.arange(100)
|
||||||
|
# np.random.shuffle(idx)
|
||||||
|
|
||||||
|
# # Uses first 80 random indices for train
|
||||||
|
# train_idx = idx[:80]
|
||||||
|
# # Uses the remaining indices for validation
|
||||||
|
# val_idx = idx[80:]
|
||||||
|
|
||||||
|
# # Generates train and validation sets
|
||||||
|
# x_train, y_train = x[train_idx], y[train_idx]
|
||||||
|
# x_val, y_val = x[val_idx], y[val_idx]
|
||||||
|
# x_tensor = torch.from_numpy(x_train).float()
|
||||||
|
# y_tensor = torch.from_numpy(y_train).float()
|
||||||
|
|
||||||
|
x_tensor = torch.tensor(df['Sales sum'].values).float()
|
||||||
|
y_tensor = torch.tensor(df['Sales count'].values).float()
|
||||||
|
|
||||||
|
dataset = TensorDataset(x_tensor, y_tensor)
|
||||||
|
|
||||||
|
|
||||||
|
#torch.manual_seed(42)
|
||||||
|
lengths = [int(len(dataset)*0.8), int(len(dataset)*0.2)]
|
||||||
|
train_dataset, val_dataset = random_split(dataset, lengths)
|
||||||
|
|
||||||
|
train_loader = DataLoader(dataset=train_dataset)
|
||||||
|
val_loader = DataLoader(dataset=val_dataset)
|
||||||
|
|
||||||
|
|
||||||
|
class LayerLinearRegression(nn.Module):
|
||||||
|
def __init__(self):
|
||||||
|
super().__init__()
|
||||||
|
# Instead of our custom parameters, we use a Linear layer with single input and single output
|
||||||
|
self.linear = nn.Linear(1, 1)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
# Now it only takes a call to the layer to make predictions
|
||||||
|
return self.linear(x)
|
||||||
|
|
||||||
|
model = LayerLinearRegression()
|
||||||
|
# Checks model's parameters
|
||||||
|
#print(model.state_dict())
|
||||||
|
|
||||||
|
lr = 1e-3
|
||||||
|
n_epochs = 100
|
||||||
|
|
||||||
|
loss_fn = nn.MSELoss(reduction='mean')
|
||||||
|
optimizer = optim.SGD(model.parameters(), lr=lr)
|
||||||
|
|
||||||
|
def make_train_step(model, loss_fn, optimizer):
|
||||||
|
# Builds function that performs a step in the train loop
|
||||||
|
def train_step(x, y):
|
||||||
|
# Sets model to TRAIN mode
|
||||||
|
model.train()
|
||||||
|
# Makes predictions
|
||||||
|
yhat = model(x)
|
||||||
|
# Computes loss
|
||||||
|
loss = loss_fn(y, yhat)
|
||||||
|
# Computes gradients
|
||||||
|
loss.backward()
|
||||||
|
# Updates parameters and zeroes gradients
|
||||||
|
optimizer.step()
|
||||||
|
optimizer.zero_grad()
|
||||||
|
# Returns the loss
|
||||||
|
return loss.item()
|
||||||
|
|
||||||
|
# Returns the function that will be called inside the train loop
|
||||||
|
return train_step
|
||||||
|
|
||||||
|
# Creates the train_step function for our model, loss function and optimizer
|
||||||
|
train_step = make_train_step(model, loss_fn, optimizer)
|
||||||
|
training_losses = []
|
||||||
|
validation_losses = []
|
||||||
|
print(model.state_dict())
|
||||||
|
# For each epoch...
|
||||||
|
for epoch in range(n_epochs):
|
||||||
|
losses = []
|
||||||
|
# Uses loader to fetch one mini-batch for training
|
||||||
|
for x_batch, y_batch in train_loader:
|
||||||
|
# NOW, sends the mini-batch data to the device
|
||||||
|
# so it matches location of the MODEL
|
||||||
|
# x_batch = x_batch.to(device)
|
||||||
|
# y_batch = y_batch.to(device)
|
||||||
|
# One stpe of training
|
||||||
|
loss = train_step(x_batch, y_batch)
|
||||||
|
losses.append(loss)
|
||||||
|
training_loss = np.mean(losses)
|
||||||
|
training_losses.append(training_loss)
|
||||||
|
|
||||||
|
# After finishing training steps for all mini-batches,
|
||||||
|
# it is time for evaluation!
|
||||||
|
|
||||||
|
# We tell PyTorch to NOT use autograd...
|
||||||
|
# Do you remember why?
|
||||||
|
with torch.no_grad():
|
||||||
|
val_losses = []
|
||||||
|
# Uses loader to fetch one mini-batch for validation
|
||||||
|
for x_val, y_val in val_loader:
|
||||||
|
# Again, sends data to same device as model
|
||||||
|
# x_val = x_val.to(device)
|
||||||
|
# y_val = y_val.to(device)
|
||||||
|
|
||||||
|
# What is that?!
|
||||||
|
model.eval()
|
||||||
|
# Makes predictions
|
||||||
|
yhat = model(x_val)
|
||||||
|
# Computes validation loss
|
||||||
|
val_loss = loss_fn(y_val, yhat)
|
||||||
|
val_losses.append(val_loss.item())
|
||||||
|
validation_loss = np.mean(val_losses)
|
||||||
|
validation_losses.append(validation_loss)
|
||||||
|
|
||||||
|
print(f"[{epoch+1}] Training loss: {training_loss:.3f}\t Validation loss: {validation_loss:.3f}")
|
||||||
|
|
||||||
|
# Checks model's parameters
|
||||||
|
print(model.state_dict())
|
||||||
|
print(np.mean(losses))
|
||||||
|
print(np.mean(val_losses))
|
115
dlgssdpytorch.py
Normal file
115
dlgssdpytorch.py
Normal file
@ -0,0 +1,115 @@
|
|||||||
|
import torch
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
import torch.nn as nn
|
||||||
|
import torch.optim as optim
|
||||||
|
from torch.utils.data import Dataset, TensorDataset, DataLoader
|
||||||
|
import argparse
|
||||||
|
|
||||||
|
parser = argparse.ArgumentParser(description='Program do uczenia modelu')
|
||||||
|
parser.add_argument('-l', '--lr', type=float, default=1e-3, help="Współczynik uczenia (lr)", required=False)
|
||||||
|
parser.add_argument('-e', '--epochs', type=int, default=100, help="Liczba epok", required=False)
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
lr = args.lr
|
||||||
|
n_epochs = args.epochs
|
||||||
|
|
||||||
|
train_dataset = torch.load('train_dataset.pt')
|
||||||
|
val_dataset = torch.load('val_dataset.pt')
|
||||||
|
|
||||||
|
train_loader = DataLoader(dataset=train_dataset)
|
||||||
|
val_loader = DataLoader(dataset=val_dataset)
|
||||||
|
|
||||||
|
class LayerLinearRegression(nn.Module):
|
||||||
|
def __init__(self):
|
||||||
|
super().__init__()
|
||||||
|
# Instead of our custom parameters, we use a Linear layer with single input and single output
|
||||||
|
self.linear = nn.Linear(1, 1)
|
||||||
|
|
||||||
|
def forward(self, x):
|
||||||
|
# Now it only takes a call to the layer to make predictions
|
||||||
|
return self.linear(x)
|
||||||
|
|
||||||
|
model = LayerLinearRegression()
|
||||||
|
# Checks model's parameters
|
||||||
|
#print(model.state_dict())
|
||||||
|
|
||||||
|
loss_fn = nn.MSELoss(reduction='mean')
|
||||||
|
optimizer = optim.SGD(model.parameters(), lr=lr)
|
||||||
|
|
||||||
|
def make_train_step(model, loss_fn, optimizer):
|
||||||
|
# Builds function that performs a step in the train loop
|
||||||
|
def train_step(x, y):
|
||||||
|
# Sets model to TRAIN mode
|
||||||
|
model.train()
|
||||||
|
# Makes predictions
|
||||||
|
yhat = model(x)
|
||||||
|
# Computes loss
|
||||||
|
loss = loss_fn(y, yhat)
|
||||||
|
# Computes gradients
|
||||||
|
loss.backward()
|
||||||
|
# Updates parameters and zeroes gradients
|
||||||
|
optimizer.step()
|
||||||
|
optimizer.zero_grad()
|
||||||
|
# Returns the loss
|
||||||
|
return loss.item()
|
||||||
|
|
||||||
|
# Returns the function that will be called inside the train loop
|
||||||
|
return train_step
|
||||||
|
|
||||||
|
# Creates the train_step function for our model, loss function and optimizer
|
||||||
|
train_step = make_train_step(model, loss_fn, optimizer)
|
||||||
|
training_losses = []
|
||||||
|
validation_losses = []
|
||||||
|
#print(model.state_dict())
|
||||||
|
# For each epoch...
|
||||||
|
for epoch in range(n_epochs):
|
||||||
|
losses = []
|
||||||
|
# Uses loader to fetch one mini-batch for training
|
||||||
|
for x_batch, y_batch in train_loader:
|
||||||
|
# NOW, sends the mini-batch data to the device
|
||||||
|
# so it matches location of the MODEL
|
||||||
|
# x_batch = x_batch.to(device)
|
||||||
|
# y_batch = y_batch.to(device)
|
||||||
|
# One stpe of training
|
||||||
|
loss = train_step(x_batch, y_batch)
|
||||||
|
losses.append(loss)
|
||||||
|
training_loss = np.mean(losses)
|
||||||
|
training_losses.append(training_loss)
|
||||||
|
|
||||||
|
# After finishing training steps for all mini-batches,
|
||||||
|
# it is time for evaluation!
|
||||||
|
|
||||||
|
# We tell PyTorch to NOT use autograd...
|
||||||
|
# Do you remember why?
|
||||||
|
with torch.no_grad():
|
||||||
|
val_losses = []
|
||||||
|
# Uses loader to fetch one mini-batch for validation
|
||||||
|
for x_val, y_val in val_loader:
|
||||||
|
# Again, sends data to same device as model
|
||||||
|
# x_val = x_val.to(device)
|
||||||
|
# y_val = y_val.to(device)
|
||||||
|
|
||||||
|
model.eval()
|
||||||
|
# Makes predictions
|
||||||
|
yhat = model(x_val)
|
||||||
|
# Computes validation loss
|
||||||
|
val_loss = loss_fn(y_val, yhat)
|
||||||
|
val_losses.append(val_loss.item())
|
||||||
|
validation_loss = np.mean(val_losses)
|
||||||
|
validation_losses.append(validation_loss)
|
||||||
|
|
||||||
|
print(f"[{epoch+1}] Training loss: {training_loss:.3f}\t Validation loss: {validation_loss:.3f}")
|
||||||
|
|
||||||
|
# Checks model's parameters
|
||||||
|
print("Model's state_dict:")
|
||||||
|
for param_tensor in model.state_dict():
|
||||||
|
print(param_tensor, "\t", model.state_dict()[param_tensor])
|
||||||
|
|
||||||
|
# Print optimizer's state_dict
|
||||||
|
print("Optimizer's state_dict:")
|
||||||
|
for var_name in optimizer.state_dict():
|
||||||
|
print(var_name, "\t", optimizer.state_dict()[var_name])
|
||||||
|
print("Mean squared error for training: ", np.mean(losses))
|
||||||
|
print("Mean squared error for validating: ", np.mean(val_losses))
|
||||||
|
torch.save(model, 'model.pt')
|
1431
mms.csv.bak
Normal file
1431
mms.csv.bak
Normal file
File diff suppressed because it is too large
Load Diff
1431
mms_norm.csv
Normal file
1431
mms_norm.csv
Normal file
File diff suppressed because it is too large
Load Diff
1431
mms_norm.csv.bak
Normal file
1431
mms_norm.csv.bak
Normal file
File diff suppressed because it is too large
Load Diff
27
requirments.txt
Normal file
27
requirments.txt
Normal file
@ -0,0 +1,27 @@
|
|||||||
|
certifi==2020.12.5
|
||||||
|
chardet==4.0.0
|
||||||
|
cycler==0.10.0
|
||||||
|
graphviz==0.16
|
||||||
|
idna==2.10
|
||||||
|
joblib==1.0.1
|
||||||
|
kaggle==1.5.12
|
||||||
|
kiwisolver==1.3.1
|
||||||
|
matplotlib==3.4.1
|
||||||
|
numpy==1.20.2
|
||||||
|
pandas==1.2.4
|
||||||
|
Pillow==8.2.0
|
||||||
|
pyparsing==2.4.7
|
||||||
|
python-dateutil==2.8.1
|
||||||
|
python-slugify==4.0.1
|
||||||
|
pytz==2021.1
|
||||||
|
requests==2.25.1
|
||||||
|
scikit-learn==0.24.1
|
||||||
|
scipy==1.6.2
|
||||||
|
six==1.15.0
|
||||||
|
sklearn==0.0
|
||||||
|
text-unidecode==1.3
|
||||||
|
threadpoolctl==2.1.0
|
||||||
|
torchviz==0.0.2
|
||||||
|
tqdm==4.60.0
|
||||||
|
typing-extensions==3.7.4.3
|
||||||
|
urllib3==1.26.4
|
@ -1,7 +1,7 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
kaggle datasets download -d apoorvaappz/global-super-store-dataset
|
kaggle datasets download -d apoorvaappz/global-super-store-dataset
|
||||||
unzip -o global-super-store-dataset.zip
|
unzip -o global-super-store-dataset.zip
|
||||||
if [ $1 = "0" ]; then
|
if [[ $1 = "0" ]]; then
|
||||||
CUTOFF=51291
|
CUTOFF=51291
|
||||||
cp Global_Superstore2.csv Global_Superstore22.csv
|
cp Global_Superstore2.csv Global_Superstore22.csv
|
||||||
else
|
else
|
||||||
@ -16,4 +16,4 @@ head -n $((prop1*2)) Global_Superstore2.csv.shuf | tail -n $prop1 > Global_Super
|
|||||||
tail -n +$((prop1*2+1)) Global_Superstore2.csv.shuf > Global_Superstore2.csv.train
|
tail -n +$((prop1*2+1)) Global_Superstore2.csv.shuf > Global_Superstore2.csv.train
|
||||||
rm Global_Superstore2.csv.shuf
|
rm Global_Superstore2.csv.shuf
|
||||||
#Sprawdźmy, czy wielkości się zgadzają:
|
#Sprawdźmy, czy wielkości się zgadzają:
|
||||||
wc -l Global_Superstore2*
|
wc -l Global_Superstore2*
|
||||||
|
@ -3,7 +3,7 @@
|
|||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"id": "eligible-business",
|
"id": "strange-teens",
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"tags": []
|
"tags": []
|
||||||
},
|
},
|
||||||
@ -18,7 +18,7 @@
|
|||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"id": "limited-memorial",
|
"id": "another-accessory",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
@ -28,7 +28,7 @@
|
|||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 158,
|
"execution_count": 158,
|
||||||
"id": "aware-allah",
|
"id": "valid-malta",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
@ -48,7 +48,7 @@
|
|||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 159,
|
"execution_count": 159,
|
||||||
"id": "drawn-financing",
|
"id": "noble-compilation",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
@ -448,7 +448,7 @@
|
|||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 160,
|
"execution_count": 160,
|
||||||
"id": "boring-consumption",
|
"id": "multiple-council",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
@ -458,7 +458,7 @@
|
|||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 161,
|
"execution_count": 161,
|
||||||
"id": "cathedral-frank",
|
"id": "green-trunk",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
@ -468,7 +468,7 @@
|
|||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 162,
|
"execution_count": 162,
|
||||||
"id": "satisfactory-venice",
|
"id": "operating-catalyst",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
@ -489,7 +489,7 @@
|
|||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 163,
|
"execution_count": 163,
|
||||||
"id": "united-climate",
|
"id": "female-landscape",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
@ -510,7 +510,7 @@
|
|||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 164,
|
"execution_count": 164,
|
||||||
"id": "institutional-corpus",
|
"id": "thirty-auckland",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
@ -531,7 +531,7 @@
|
|||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 165,
|
"execution_count": 165,
|
||||||
"id": "caroline-shannon",
|
"id": "mysterious-alignment",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
@ -552,7 +552,7 @@
|
|||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 166,
|
"execution_count": 166,
|
||||||
"id": "sublime-quarter",
|
"id": "stone-combining",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
@ -950,7 +950,7 @@
|
|||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 167,
|
"execution_count": 167,
|
||||||
"id": "committed-disease",
|
"id": "demanding-milwaukee",
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"tags": []
|
"tags": []
|
||||||
},
|
},
|
||||||
@ -990,7 +990,7 @@
|
|||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 168,
|
"execution_count": 168,
|
||||||
"id": "corporate-fisher",
|
"id": "above-script",
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"tags": []
|
"tags": []
|
||||||
},
|
},
|
||||||
@ -1026,7 +1026,7 @@
|
|||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 169,
|
"execution_count": 169,
|
||||||
"id": "revised-study",
|
"id": "abroad-durham",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
@ -1047,7 +1047,7 @@
|
|||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 170,
|
"execution_count": 170,
|
||||||
"id": "checked-thought",
|
"id": "centered-realtor",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
@ -1058,7 +1058,7 @@
|
|||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"id": "assigned-mobility",
|
"id": "relevant-receptor",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
@ -1076,7 +1076,7 @@
|
|||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 172,
|
"execution_count": 172,
|
||||||
"id": "stunning-metallic",
|
"id": "informal-unemployment",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
@ -1474,7 +1474,7 @@
|
|||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"id": "boolean-calgary",
|
"id": "reserved-cookie",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": []
|
"source": []
|
||||||
@ -1496,7 +1496,7 @@
|
|||||||
"name": "python",
|
"name": "python",
|
||||||
"nbconvert_exporter": "python",
|
"nbconvert_exporter": "python",
|
||||||
"pygments_lexer": "ipython3",
|
"pygments_lexer": "ipython3",
|
||||||
"version": "3.6.9"
|
"version": "3.8.5"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"nbformat": 4,
|
"nbformat": 4,
|
||||||
|
Loading…
Reference in New Issue
Block a user