Compare commits

..

No commits in common. "master" and "feature/dvc" have entirely different histories.

13 changed files with 57 additions and 9301 deletions

View File

@ -1,5 +1,6 @@
[core] [core]
remote = ium_ssh_remote remote = ium_ssh_remote
['remote "my_local_remote"']
url = /Users/adamwojdyla/Documents/Studia/Magisterskie/1_sem/IUM/ium_444507/dvcstore
['remote "ium_ssh_remote"'] ['remote "ium_ssh_remote"']
url = ssh://tzietkiewicz.vm.wmi.amu.edu.pl:/home/ium-sftp url = ssh://ium-sftp@tzietkiewicz.vm.wmi.amu.edu.pl
user = ium-sftp

2
.gitignore vendored
View File

@ -153,9 +153,11 @@ fabric.properties
# kaggle # kaggle
kaggle.json kaggle.json
Car_Prices_Poland_Kaggle*.csv Car_Prices_Poland_Kaggle*.csv
CarPrices*
IUM08/* IUM08/*
.DS_store .DS_store
*.db *.db
mlruns mlruns
my_model my_model
dvcstore dvcstore
/prediction_results.csv

Binary file not shown.

View File

@ -10,9 +10,9 @@ pipeline {
stages { stages {
stage('DVC') { stage('DVC') {
steps { steps {
checkout([$class: 'GitSCM', branches: [[name: '*/master']], extensions: [], userRemoteConfigs: [[credentialsId: 'S444507_cred', url: 'https://git.wmi.amu.edu.pl/s444507/ium_444507.git']]])
withCredentials( withCredentials(
[sshUserPrivateKey(credentialsId: '48ac7004-216e-4260-abba-1fe5db753e18', keyFileVariable: 'IUM_SFTP_KEY', passphraseVariable: '', usernameVariable: 'USER')]) { [sshUserPrivateKey(credentialsId: '48ac7004-216e-4260-abba-1fe5db753e18', keyFileVariable: 'IUM_SFTP_KEY', passphraseVariable: '', usernameVariable: '')]) {
sh 'dvc remote add -d ium_ssh_remote ssh://ium-sftp@tzietkiewicz.vm.wmi.amu.edu.pl/ium-sftp'
sh 'dvc remote modify --local ium_ssh_remote keyfile $IUM_SFTP_KEY' sh 'dvc remote modify --local ium_ssh_remote keyfile $IUM_SFTP_KEY'
sh 'dvc pull' sh 'dvc pull'
sh 'dvc repro' sh 'dvc repro'
@ -20,4 +20,9 @@ pipeline {
} }
} }
} }
post {
success {
archiveArtifacts artifacts: 'prediction_results.csv, *.pkl', followSymlinks: false
}
}
} }

4
data/.gitignore vendored
View File

@ -1,5 +1 @@
/prepared /prepared
/Car_Prices_Poland_Kaggle_test.csv
/Car_Prices_Poland_Kaggle_train.csv
/Car_Prices_Poland_Kaggle_dev.csv
/Car_Prices_Poland_Kaggle.csv

0
data/Car_Prices_Poland_Kaggle.csv.dvc Normal file → Executable file
View File

View File

@ -1,4 +0,0 @@
outs:
- md5: 099e43435758084862777c03cc2feb02
size: 1648018
path: Car_Prices_Poland_Kaggle_dev.csv

View File

@ -1,4 +0,0 @@
outs:
- md5: 099e43435758084862777c03cc2feb02
size: 1648018
path: Car_Prices_Poland_Kaggle_test.csv

View File

@ -1,4 +0,0 @@
outs:
- md5: 47632fa91312b676baf3053a7a1b4f2b
size: 6598832
path: Car_Prices_Poland_Kaggle_train.csv

View File

@ -9,21 +9,38 @@ stages:
- path: script_prepare.py - path: script_prepare.py
md5: f1dfe33a503f5acc687c53dee448f71b md5: f1dfe33a503f5acc687c53dee448f71b
size: 1899 size: 1899
outs:
- path: data/Car_Prices_Poland_Kaggle_dev.csv
md5: cf9355749edc79f588e264de5b2bf1f0
size: 1648309
- path: data/Car_Prices_Poland_Kaggle_test.csv
md5: cf9355749edc79f588e264de5b2bf1f0
size: 1648309
- path: data/Car_Prices_Poland_Kaggle_train.csv
md5: 8818f758e2de344a4b9ad712379b81e1
size: 6597472
train: train:
cmd: python3 lab05_deepLearning.py 70 cmd: python3 lab05_deepLearning.py 50
deps: deps:
- path: data/Car_Prices_Poland_Kaggle_dev.csv - path: data/Car_Prices_Poland_Kaggle_dev.csv
md5: 113fad808a17e2aa5131832ecaa2e640 md5: cf9355749edc79f588e264de5b2bf1f0
size: 1647374 size: 1648309
- path: data/Car_Prices_Poland_Kaggle_test.csv - path: data/Car_Prices_Poland_Kaggle_test.csv
md5: 113fad808a17e2aa5131832ecaa2e640 md5: cf9355749edc79f588e264de5b2bf1f0
size: 1647374 size: 1648309
- path: data/Car_Prices_Poland_Kaggle_train.csv - path: data/Car_Prices_Poland_Kaggle_train.csv
md5: dd41429d2b3285cc85b94a9b0ec8cf91 md5: 8818f758e2de344a4b9ad712379b81e1
size: 6597186 size: 6597472
outs:
- path: CarPrices_pytorch_model.pkl
md5: cff6a79945bbf839058a4fd1b2dcc98f
size: 30039
- path: prediction_results.csv
md5: 62b9e54cdfebc7f1dfb060e18e9b8738
size: 585197
evaluate: evaluate:
cmd: python3 lab10_evaluate.py cmd: python3 lab10_evaluate.py
deps: deps:
- path: CarPrices_pytorch_model.pkl - path: CarPrices_pytorch_model.pkl
md5: a73485a169b6185b0161d75bc5c883a3 md5: cff6a79945bbf839058a4fd1b2dcc98f
size: 30039 size: 30039

View File

@ -4,13 +4,20 @@ stages:
deps: deps:
- data/Car_Prices_Poland_Kaggle.csv - data/Car_Prices_Poland_Kaggle.csv
- script_prepare.py - script_prepare.py
outs:
- data/Car_Prices_Poland_Kaggle_dev.csv
- data/Car_Prices_Poland_Kaggle_train.csv
- data/Car_Prices_Poland_Kaggle_test.csv
train: train:
cmd: python3 lab05_deepLearning.py 70 cmd: python3 lab05_deepLearning.py 70
deps: deps:
- data/Car_Prices_Poland_Kaggle_dev.csv - data/Car_Prices_Poland_Kaggle_dev.csv
- data/Car_Prices_Poland_Kaggle_train.csv - data/Car_Prices_Poland_Kaggle_train.csv
- data/Car_Prices_Poland_Kaggle_test.csv - data/Car_Prices_Poland_Kaggle_test.csv
outs:
- CarPrices_pytorch_model.pkl
- prediction_results.csv
evaluate: evaluate:
cmd: python3 lab10_evaluate.py cmd: python3 lab10_evaluate.py
deps: deps:
- CarPrices_pytorch_model.pkl - CarPrices_pytorch_model.pkl

View File

@ -11,12 +11,7 @@ import torch.nn.functional as F
import pandas as pd import pandas as pd
from sklearn import preprocessing from sklearn import preprocessing
import sys import sys
import os
path = '.'
files = os.listdir(".")
if not "Car_Prices_Poland_Kaggle.csv" in files:
path = "data"
class Model(nn.Module): class Model(nn.Module):
def __init__(self, input_dim): def __init__(self, input_dim):
@ -34,15 +29,15 @@ class Model(nn.Module):
def load_dataset_raw(): def load_dataset_raw():
""" Load data from .csv file. """ """ Load data from .csv file. """
cars = pd.read_csv(f'{path}/Car_Prices_Poland_Kaggle.csv', usecols=[1, 4, 5, 6, 10], sep=',') cars = pd.read_csv('./Car_Prices_Poland_Kaggle.csv', usecols=[1, 4, 5, 6, 10], sep=',')
return cars return cars
def load_dataset_files(): def load_dataset_files():
""" Load shuffled, splitted dev and train files from .csv files. """ """ Load shuffled, splitted dev and train files from .csv files. """
cars_dev = pd.read_csv(f'{path}/Car_Prices_Poland_Kaggle_dev.csv', usecols=[1, 4, 5, 6, 10], sep=',', names= [str(i) for i in range(5)]) cars_dev = pd.read_csv('./Car_Prices_Poland_Kaggle_dev.csv', usecols=[1, 4, 5, 6, 10], sep=',', names= [str(i) for i in range(5)])
cars_train = pd.read_csv(f'{path}/Car_Prices_Poland_Kaggle_train.csv', usecols=[1, 4, 5, 6, 10], sep=',', names= [str(i) for i in range(5)]) cars_train = pd.read_csv('./Car_Prices_Poland_Kaggle_train.csv', usecols=[1, 4, 5, 6, 10], sep=',', names= [str(i) for i in range(5)])
return cars_dev, cars_train return cars_dev, cars_train
@ -70,6 +65,15 @@ def prepare_labels_features(dataset):
return lab, feat return lab, feat
# def draw_plot(lbl):
# need to import matplotlib to work
# plt.hist(lbl, bins=[i for i in range(len(set(lbl)))], edgecolor="black")
# plt.xticks(np.arange(0, len(set(lbl)), 1))
# plt.show()
# Prepare dataset # Prepare dataset
print("Loading dataset...") print("Loading dataset...")
dev, train = load_dataset_files() dev, train = load_dataset_files()

File diff suppressed because it is too large Load Diff