Compare commits

..

No commits in common. "master" and "feature/dvc" have entirely different histories.

13 changed files with 57 additions and 9301 deletions

View File

@ -1,5 +1,6 @@
[core]
remote = ium_ssh_remote
['remote "my_local_remote"']
url = /Users/adamwojdyla/Documents/Studia/Magisterskie/1_sem/IUM/ium_444507/dvcstore
['remote "ium_ssh_remote"']
url = ssh://tzietkiewicz.vm.wmi.amu.edu.pl:/home/ium-sftp
user = ium-sftp
url = ssh://ium-sftp@tzietkiewicz.vm.wmi.amu.edu.pl

2
.gitignore vendored
View File

@ -153,9 +153,11 @@ fabric.properties
# kaggle
kaggle.json
Car_Prices_Poland_Kaggle*.csv
CarPrices*
IUM08/*
.DS_store
*.db
mlruns
my_model
dvcstore
/prediction_results.csv

Binary file not shown.

View File

@ -10,9 +10,9 @@ pipeline {
stages {
stage('DVC') {
steps {
checkout([$class: 'GitSCM', branches: [[name: '*/master']], extensions: [], userRemoteConfigs: [[credentialsId: 'S444507_cred', url: 'https://git.wmi.amu.edu.pl/s444507/ium_444507.git']]])
withCredentials(
[sshUserPrivateKey(credentialsId: '48ac7004-216e-4260-abba-1fe5db753e18', keyFileVariable: 'IUM_SFTP_KEY', passphraseVariable: '', usernameVariable: 'USER')]) {
[sshUserPrivateKey(credentialsId: '48ac7004-216e-4260-abba-1fe5db753e18', keyFileVariable: 'IUM_SFTP_KEY', passphraseVariable: '', usernameVariable: '')]) {
sh 'dvc remote add -d ium_ssh_remote ssh://ium-sftp@tzietkiewicz.vm.wmi.amu.edu.pl/ium-sftp'
sh 'dvc remote modify --local ium_ssh_remote keyfile $IUM_SFTP_KEY'
sh 'dvc pull'
sh 'dvc repro'
@ -20,4 +20,9 @@ pipeline {
}
}
}
post {
success {
archiveArtifacts artifacts: 'prediction_results.csv, *.pkl', followSymlinks: false
}
}
}

4
data/.gitignore vendored
View File

@ -1,5 +1 @@
/prepared
/Car_Prices_Poland_Kaggle_test.csv
/Car_Prices_Poland_Kaggle_train.csv
/Car_Prices_Poland_Kaggle_dev.csv
/Car_Prices_Poland_Kaggle.csv

0
data/Car_Prices_Poland_Kaggle.csv.dvc Normal file → Executable file
View File

View File

@ -1,4 +0,0 @@
outs:
- md5: 099e43435758084862777c03cc2feb02
size: 1648018
path: Car_Prices_Poland_Kaggle_dev.csv

View File

@ -1,4 +0,0 @@
outs:
- md5: 099e43435758084862777c03cc2feb02
size: 1648018
path: Car_Prices_Poland_Kaggle_test.csv

View File

@ -1,4 +0,0 @@
outs:
- md5: 47632fa91312b676baf3053a7a1b4f2b
size: 6598832
path: Car_Prices_Poland_Kaggle_train.csv

View File

@ -9,21 +9,38 @@ stages:
- path: script_prepare.py
md5: f1dfe33a503f5acc687c53dee448f71b
size: 1899
outs:
- path: data/Car_Prices_Poland_Kaggle_dev.csv
md5: cf9355749edc79f588e264de5b2bf1f0
size: 1648309
- path: data/Car_Prices_Poland_Kaggle_test.csv
md5: cf9355749edc79f588e264de5b2bf1f0
size: 1648309
- path: data/Car_Prices_Poland_Kaggle_train.csv
md5: 8818f758e2de344a4b9ad712379b81e1
size: 6597472
train:
cmd: python3 lab05_deepLearning.py 70
cmd: python3 lab05_deepLearning.py 50
deps:
- path: data/Car_Prices_Poland_Kaggle_dev.csv
md5: 113fad808a17e2aa5131832ecaa2e640
size: 1647374
md5: cf9355749edc79f588e264de5b2bf1f0
size: 1648309
- path: data/Car_Prices_Poland_Kaggle_test.csv
md5: 113fad808a17e2aa5131832ecaa2e640
size: 1647374
md5: cf9355749edc79f588e264de5b2bf1f0
size: 1648309
- path: data/Car_Prices_Poland_Kaggle_train.csv
md5: dd41429d2b3285cc85b94a9b0ec8cf91
size: 6597186
md5: 8818f758e2de344a4b9ad712379b81e1
size: 6597472
outs:
- path: CarPrices_pytorch_model.pkl
md5: cff6a79945bbf839058a4fd1b2dcc98f
size: 30039
- path: prediction_results.csv
md5: 62b9e54cdfebc7f1dfb060e18e9b8738
size: 585197
evaluate:
cmd: python3 lab10_evaluate.py
deps:
- path: CarPrices_pytorch_model.pkl
md5: a73485a169b6185b0161d75bc5c883a3
md5: cff6a79945bbf839058a4fd1b2dcc98f
size: 30039

View File

@ -4,13 +4,20 @@ stages:
deps:
- data/Car_Prices_Poland_Kaggle.csv
- script_prepare.py
outs:
- data/Car_Prices_Poland_Kaggle_dev.csv
- data/Car_Prices_Poland_Kaggle_train.csv
- data/Car_Prices_Poland_Kaggle_test.csv
train:
cmd: python3 lab05_deepLearning.py 70
deps:
- data/Car_Prices_Poland_Kaggle_dev.csv
- data/Car_Prices_Poland_Kaggle_train.csv
- data/Car_Prices_Poland_Kaggle_test.csv
outs:
- CarPrices_pytorch_model.pkl
- prediction_results.csv
evaluate:
cmd: python3 lab10_evaluate.py
deps:
- CarPrices_pytorch_model.pkl
- CarPrices_pytorch_model.pkl

View File

@ -11,12 +11,7 @@ import torch.nn.functional as F
import pandas as pd
from sklearn import preprocessing
import sys
import os
path = '.'
files = os.listdir(".")
if not "Car_Prices_Poland_Kaggle.csv" in files:
path = "data"
class Model(nn.Module):
def __init__(self, input_dim):
@ -34,15 +29,15 @@ class Model(nn.Module):
def load_dataset_raw():
""" Load data from .csv file. """
cars = pd.read_csv(f'{path}/Car_Prices_Poland_Kaggle.csv', usecols=[1, 4, 5, 6, 10], sep=',')
cars = pd.read_csv('./Car_Prices_Poland_Kaggle.csv', usecols=[1, 4, 5, 6, 10], sep=',')
return cars
def load_dataset_files():
""" Load shuffled, splitted dev and train files from .csv files. """
cars_dev = pd.read_csv(f'{path}/Car_Prices_Poland_Kaggle_dev.csv', usecols=[1, 4, 5, 6, 10], sep=',', names= [str(i) for i in range(5)])
cars_train = pd.read_csv(f'{path}/Car_Prices_Poland_Kaggle_train.csv', usecols=[1, 4, 5, 6, 10], sep=',', names= [str(i) for i in range(5)])
cars_dev = pd.read_csv('./Car_Prices_Poland_Kaggle_dev.csv', usecols=[1, 4, 5, 6, 10], sep=',', names= [str(i) for i in range(5)])
cars_train = pd.read_csv('./Car_Prices_Poland_Kaggle_train.csv', usecols=[1, 4, 5, 6, 10], sep=',', names= [str(i) for i in range(5)])
return cars_dev, cars_train
@ -70,6 +65,15 @@ def prepare_labels_features(dataset):
return lab, feat
# def draw_plot(lbl):
# need to import matplotlib to work
# plt.hist(lbl, bins=[i for i in range(len(set(lbl)))], edgecolor="black")
# plt.xticks(np.arange(0, len(set(lbl)), 1))
# plt.show()
# Prepare dataset
print("Loading dataset...")
dev, train = load_dataset_files()

File diff suppressed because it is too large Load Diff