Compare commits
No commits in common. "master" and "feature/dvc" have entirely different histories.
master
...
feature/dv
@ -1,5 +1,6 @@
|
||||
[core]
|
||||
remote = ium_ssh_remote
|
||||
['remote "my_local_remote"']
|
||||
url = /Users/adamwojdyla/Documents/Studia/Magisterskie/1_sem/IUM/ium_444507/dvcstore
|
||||
['remote "ium_ssh_remote"']
|
||||
url = ssh://tzietkiewicz.vm.wmi.amu.edu.pl:/home/ium-sftp
|
||||
user = ium-sftp
|
||||
url = ssh://ium-sftp@tzietkiewicz.vm.wmi.amu.edu.pl
|
||||
|
2
.gitignore
vendored
2
.gitignore
vendored
@ -153,9 +153,11 @@ fabric.properties
|
||||
# kaggle
|
||||
kaggle.json
|
||||
Car_Prices_Poland_Kaggle*.csv
|
||||
CarPrices*
|
||||
IUM08/*
|
||||
.DS_store
|
||||
*.db
|
||||
mlruns
|
||||
my_model
|
||||
dvcstore
|
||||
/prediction_results.csv
|
||||
|
Binary file not shown.
@ -10,9 +10,9 @@ pipeline {
|
||||
stages {
|
||||
stage('DVC') {
|
||||
steps {
|
||||
checkout([$class: 'GitSCM', branches: [[name: '*/master']], extensions: [], userRemoteConfigs: [[credentialsId: 'S444507_cred', url: 'https://git.wmi.amu.edu.pl/s444507/ium_444507.git']]])
|
||||
withCredentials(
|
||||
[sshUserPrivateKey(credentialsId: '48ac7004-216e-4260-abba-1fe5db753e18', keyFileVariable: 'IUM_SFTP_KEY', passphraseVariable: '', usernameVariable: 'USER')]) {
|
||||
[sshUserPrivateKey(credentialsId: '48ac7004-216e-4260-abba-1fe5db753e18', keyFileVariable: 'IUM_SFTP_KEY', passphraseVariable: '', usernameVariable: '')]) {
|
||||
sh 'dvc remote add -d ium_ssh_remote ssh://ium-sftp@tzietkiewicz.vm.wmi.amu.edu.pl/ium-sftp'
|
||||
sh 'dvc remote modify --local ium_ssh_remote keyfile $IUM_SFTP_KEY'
|
||||
sh 'dvc pull'
|
||||
sh 'dvc repro'
|
||||
@ -20,4 +20,9 @@ pipeline {
|
||||
}
|
||||
}
|
||||
}
|
||||
post {
|
||||
success {
|
||||
archiveArtifacts artifacts: 'prediction_results.csv, *.pkl', followSymlinks: false
|
||||
}
|
||||
}
|
||||
}
|
4
data/.gitignore
vendored
4
data/.gitignore
vendored
@ -1,5 +1 @@
|
||||
/prepared
|
||||
/Car_Prices_Poland_Kaggle_test.csv
|
||||
/Car_Prices_Poland_Kaggle_train.csv
|
||||
/Car_Prices_Poland_Kaggle_dev.csv
|
||||
/Car_Prices_Poland_Kaggle.csv
|
0
data/Car_Prices_Poland_Kaggle.csv.dvc
Normal file → Executable file
0
data/Car_Prices_Poland_Kaggle.csv.dvc
Normal file → Executable file
@ -1,4 +0,0 @@
|
||||
outs:
|
||||
- md5: 099e43435758084862777c03cc2feb02
|
||||
size: 1648018
|
||||
path: Car_Prices_Poland_Kaggle_dev.csv
|
@ -1,4 +0,0 @@
|
||||
outs:
|
||||
- md5: 099e43435758084862777c03cc2feb02
|
||||
size: 1648018
|
||||
path: Car_Prices_Poland_Kaggle_test.csv
|
@ -1,4 +0,0 @@
|
||||
outs:
|
||||
- md5: 47632fa91312b676baf3053a7a1b4f2b
|
||||
size: 6598832
|
||||
path: Car_Prices_Poland_Kaggle_train.csv
|
33
dvc.lock
33
dvc.lock
@ -9,21 +9,38 @@ stages:
|
||||
- path: script_prepare.py
|
||||
md5: f1dfe33a503f5acc687c53dee448f71b
|
||||
size: 1899
|
||||
outs:
|
||||
- path: data/Car_Prices_Poland_Kaggle_dev.csv
|
||||
md5: cf9355749edc79f588e264de5b2bf1f0
|
||||
size: 1648309
|
||||
- path: data/Car_Prices_Poland_Kaggle_test.csv
|
||||
md5: cf9355749edc79f588e264de5b2bf1f0
|
||||
size: 1648309
|
||||
- path: data/Car_Prices_Poland_Kaggle_train.csv
|
||||
md5: 8818f758e2de344a4b9ad712379b81e1
|
||||
size: 6597472
|
||||
train:
|
||||
cmd: python3 lab05_deepLearning.py 70
|
||||
cmd: python3 lab05_deepLearning.py 50
|
||||
deps:
|
||||
- path: data/Car_Prices_Poland_Kaggle_dev.csv
|
||||
md5: 113fad808a17e2aa5131832ecaa2e640
|
||||
size: 1647374
|
||||
md5: cf9355749edc79f588e264de5b2bf1f0
|
||||
size: 1648309
|
||||
- path: data/Car_Prices_Poland_Kaggle_test.csv
|
||||
md5: 113fad808a17e2aa5131832ecaa2e640
|
||||
size: 1647374
|
||||
md5: cf9355749edc79f588e264de5b2bf1f0
|
||||
size: 1648309
|
||||
- path: data/Car_Prices_Poland_Kaggle_train.csv
|
||||
md5: dd41429d2b3285cc85b94a9b0ec8cf91
|
||||
size: 6597186
|
||||
md5: 8818f758e2de344a4b9ad712379b81e1
|
||||
size: 6597472
|
||||
outs:
|
||||
- path: CarPrices_pytorch_model.pkl
|
||||
md5: cff6a79945bbf839058a4fd1b2dcc98f
|
||||
size: 30039
|
||||
- path: prediction_results.csv
|
||||
md5: 62b9e54cdfebc7f1dfb060e18e9b8738
|
||||
size: 585197
|
||||
evaluate:
|
||||
cmd: python3 lab10_evaluate.py
|
||||
deps:
|
||||
- path: CarPrices_pytorch_model.pkl
|
||||
md5: a73485a169b6185b0161d75bc5c883a3
|
||||
md5: cff6a79945bbf839058a4fd1b2dcc98f
|
||||
size: 30039
|
||||
|
9
dvc.yaml
9
dvc.yaml
@ -4,13 +4,20 @@ stages:
|
||||
deps:
|
||||
- data/Car_Prices_Poland_Kaggle.csv
|
||||
- script_prepare.py
|
||||
outs:
|
||||
- data/Car_Prices_Poland_Kaggle_dev.csv
|
||||
- data/Car_Prices_Poland_Kaggle_train.csv
|
||||
- data/Car_Prices_Poland_Kaggle_test.csv
|
||||
train:
|
||||
cmd: python3 lab05_deepLearning.py 70
|
||||
deps:
|
||||
- data/Car_Prices_Poland_Kaggle_dev.csv
|
||||
- data/Car_Prices_Poland_Kaggle_train.csv
|
||||
- data/Car_Prices_Poland_Kaggle_test.csv
|
||||
outs:
|
||||
- CarPrices_pytorch_model.pkl
|
||||
- prediction_results.csv
|
||||
evaluate:
|
||||
cmd: python3 lab10_evaluate.py
|
||||
deps:
|
||||
- CarPrices_pytorch_model.pkl
|
||||
- CarPrices_pytorch_model.pkl
|
||||
|
@ -11,12 +11,7 @@ import torch.nn.functional as F
|
||||
import pandas as pd
|
||||
from sklearn import preprocessing
|
||||
import sys
|
||||
import os
|
||||
|
||||
path = '.'
|
||||
files = os.listdir(".")
|
||||
if not "Car_Prices_Poland_Kaggle.csv" in files:
|
||||
path = "data"
|
||||
|
||||
class Model(nn.Module):
|
||||
def __init__(self, input_dim):
|
||||
@ -34,15 +29,15 @@ class Model(nn.Module):
|
||||
|
||||
def load_dataset_raw():
|
||||
""" Load data from .csv file. """
|
||||
cars = pd.read_csv(f'{path}/Car_Prices_Poland_Kaggle.csv', usecols=[1, 4, 5, 6, 10], sep=',')
|
||||
cars = pd.read_csv('./Car_Prices_Poland_Kaggle.csv', usecols=[1, 4, 5, 6, 10], sep=',')
|
||||
return cars
|
||||
|
||||
|
||||
def load_dataset_files():
|
||||
""" Load shuffled, splitted dev and train files from .csv files. """
|
||||
|
||||
cars_dev = pd.read_csv(f'{path}/Car_Prices_Poland_Kaggle_dev.csv', usecols=[1, 4, 5, 6, 10], sep=',', names= [str(i) for i in range(5)])
|
||||
cars_train = pd.read_csv(f'{path}/Car_Prices_Poland_Kaggle_train.csv', usecols=[1, 4, 5, 6, 10], sep=',', names= [str(i) for i in range(5)])
|
||||
cars_dev = pd.read_csv('./Car_Prices_Poland_Kaggle_dev.csv', usecols=[1, 4, 5, 6, 10], sep=',', names= [str(i) for i in range(5)])
|
||||
cars_train = pd.read_csv('./Car_Prices_Poland_Kaggle_train.csv', usecols=[1, 4, 5, 6, 10], sep=',', names= [str(i) for i in range(5)])
|
||||
|
||||
return cars_dev, cars_train
|
||||
|
||||
@ -70,6 +65,15 @@ def prepare_labels_features(dataset):
|
||||
return lab, feat
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
# def draw_plot(lbl):
|
||||
# need to import matplotlib to work
|
||||
# plt.hist(lbl, bins=[i for i in range(len(set(lbl)))], edgecolor="black")
|
||||
# plt.xticks(np.arange(0, len(set(lbl)), 1))
|
||||
# plt.show()
|
||||
|
||||
# Prepare dataset
|
||||
print("Loading dataset...")
|
||||
dev, train = load_dataset_files()
|
||||
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user