13 changed files with 57 additions and 9301 deletions
--- a/.dvc/config
+++ b/.dvc/config
@ -1,5 +1,6 @@
 [core]
    remote = ium_ssh_remote
+['remote "my_local_remote"']
+    url = /Users/adamwojdyla/Documents/Studia/Magisterskie/1_sem/IUM/ium_444507/dvcstore
 ['remote "ium_ssh_remote"']
-    url = ssh://tzietkiewicz.vm.wmi.amu.edu.pl:/home/ium-sftp
-    user = ium-sftp
+    url = ssh://ium-sftp@tzietkiewicz.vm.wmi.amu.edu.pl
--- a/.gitignore
+++ b/.gitignore
@ -153,9 +153,11 @@ fabric.properties
 # kaggle
 kaggle.json
 Car_Prices_Poland_Kaggle*.csv
+CarPrices*
 IUM08/*
 .DS_store
 *.db
 mlruns
 my_model
 dvcstore
+/prediction_results.csv
--- a/CarPrices_pytorch_model.pkl
+++ b/CarPrices_pytorch_model.pkl
--- a/9
+++ b/9
@ -10,9 +10,9 @@ pipeline {
    stages {
        stage('DVC') {
            steps {
-                checkout([$class: 'GitSCM', branches: [[name: '*/master']], extensions: [], userRemoteConfigs: [[credentialsId: 'S444507_cred', url: 'https://git.wmi.amu.edu.pl/s444507/ium_444507.git']]])
                withCredentials(
-                    [sshUserPrivateKey(credentialsId: '48ac7004-216e-4260-abba-1fe5db753e18', keyFileVariable: 'IUM_SFTP_KEY', passphraseVariable: '', usernameVariable: 'USER')]) {
+                    [sshUserPrivateKey(credentialsId: '48ac7004-216e-4260-abba-1fe5db753e18', keyFileVariable: 'IUM_SFTP_KEY', passphraseVariable: '', usernameVariable: '')]) {
+                        sh 'dvc remote add -d ium_ssh_remote ssh://ium-sftp@tzietkiewicz.vm.wmi.amu.edu.pl/ium-sftp'
                        sh 'dvc remote modify --local ium_ssh_remote keyfile $IUM_SFTP_KEY'
                        sh 'dvc pull'
                        sh 'dvc repro'
@ -20,4 +20,9 @@ pipeline {
            }
        }
    }
+    post {
+        success {
+            archiveArtifacts artifacts: 'prediction_results.csv, *.pkl', followSymlinks: false
+        }
+    }
 }
--- a/data/.gitignore
+++ b/data/.gitignore
@ -1,5 +1 @@
 /prepared
-/Car_Prices_Poland_Kaggle_test.csv
-/Car_Prices_Poland_Kaggle_train.csv
-/Car_Prices_Poland_Kaggle_dev.csv
-/Car_Prices_Poland_Kaggle.csv
--- a/data/Car_Prices_Poland_Kaggle.csv.dvc
+++ b/data/Car_Prices_Poland_Kaggle.csv.dvc
--- a/data/Car_Prices_Poland_Kaggle_dev.csv.dvc
+++ b/data/Car_Prices_Poland_Kaggle_dev.csv.dvc
@ -1,4 +0,0 @@
-outs:
- md5: 099e43435758084862777c03cc2feb02
-  size: 1648018
-  path: Car_Prices_Poland_Kaggle_dev.csv
--- a/data/Car_Prices_Poland_Kaggle_test.csv.dvc
+++ b/data/Car_Prices_Poland_Kaggle_test.csv.dvc
@ -1,4 +0,0 @@
-outs:
- md5: 099e43435758084862777c03cc2feb02
-  size: 1648018
-  path: Car_Prices_Poland_Kaggle_test.csv
--- a/data/Car_Prices_Poland_Kaggle_train.csv.dvc
+++ b/data/Car_Prices_Poland_Kaggle_train.csv.dvc
@ -1,4 +0,0 @@
-outs:
- md5: 47632fa91312b676baf3053a7a1b4f2b
-  size: 6598832
-  path: Car_Prices_Poland_Kaggle_train.csv
--- a/dvc.lock
+++ b/dvc.lock
@ -9,21 +9,38 @@ stages:
    - path: script_prepare.py
      md5: f1dfe33a503f5acc687c53dee448f71b
      size: 1899
+    outs:
+    - path: data/Car_Prices_Poland_Kaggle_dev.csv
+      md5: cf9355749edc79f588e264de5b2bf1f0
+      size: 1648309
+    - path: data/Car_Prices_Poland_Kaggle_test.csv
+      md5: cf9355749edc79f588e264de5b2bf1f0
+      size: 1648309
+    - path: data/Car_Prices_Poland_Kaggle_train.csv
+      md5: 8818f758e2de344a4b9ad712379b81e1
+      size: 6597472
  train:
-    cmd: python3 lab05_deepLearning.py 70
+    cmd: python3 lab05_deepLearning.py 50
    deps:
    - path: data/Car_Prices_Poland_Kaggle_dev.csv
-      md5: 113fad808a17e2aa5131832ecaa2e640
-      size: 1647374
+      md5: cf9355749edc79f588e264de5b2bf1f0
+      size: 1648309
    - path: data/Car_Prices_Poland_Kaggle_test.csv
-      md5: 113fad808a17e2aa5131832ecaa2e640
-      size: 1647374
+      md5: cf9355749edc79f588e264de5b2bf1f0
+      size: 1648309
    - path: data/Car_Prices_Poland_Kaggle_train.csv
-      md5: dd41429d2b3285cc85b94a9b0ec8cf91
-      size: 6597186
+      md5: 8818f758e2de344a4b9ad712379b81e1
+      size: 6597472
+    outs:
+    - path: CarPrices_pytorch_model.pkl
+      md5: cff6a79945bbf839058a4fd1b2dcc98f
+      size: 30039
+    - path: prediction_results.csv
+      md5: 62b9e54cdfebc7f1dfb060e18e9b8738
+      size: 585197
  evaluate:
    cmd: python3 lab10_evaluate.py
    deps:
    - path: CarPrices_pytorch_model.pkl
-      md5: a73485a169b6185b0161d75bc5c883a3
+      md5: cff6a79945bbf839058a4fd1b2dcc98f
      size: 30039
--- a/dvc.yaml
+++ b/dvc.yaml
@ -4,13 +4,20 @@ stages:
    deps:
    - data/Car_Prices_Poland_Kaggle.csv
    - script_prepare.py
+    outs:
+    - data/Car_Prices_Poland_Kaggle_dev.csv
+    - data/Car_Prices_Poland_Kaggle_train.csv
+    - data/Car_Prices_Poland_Kaggle_test.csv
  train:
    cmd:  python3 lab05_deepLearning.py 70
    deps:
    - data/Car_Prices_Poland_Kaggle_dev.csv
    - data/Car_Prices_Poland_Kaggle_train.csv
    - data/Car_Prices_Poland_Kaggle_test.csv
+    outs:
+    - CarPrices_pytorch_model.pkl
+    - prediction_results.csv
  evaluate:
    cmd:  python3 lab10_evaluate.py
    deps:
-    - CarPrices_pytorch_model.pkl
+    - CarPrices_pytorch_model.pkl
--- a/lab05_deepLearning.py
+++ b/lab05_deepLearning.py
@ -11,12 +11,7 @@ import torch.nn.functional as F
 import pandas as pd
 from sklearn import preprocessing
 import sys
-import os

-path = '.'
-files = os.listdir(".")
-if not "Car_Prices_Poland_Kaggle.csv" in files:
-    path = "data"

 class Model(nn.Module):
    def __init__(self, input_dim):
@ -34,15 +29,15 @@ class Model(nn.Module):

 def load_dataset_raw():
    """ Load data from .csv file. """
-    cars = pd.read_csv(f'{path}/Car_Prices_Poland_Kaggle.csv', usecols=[1, 4, 5, 6, 10], sep=',')
+    cars = pd.read_csv('./Car_Prices_Poland_Kaggle.csv', usecols=[1, 4, 5, 6, 10], sep=',')
    return cars


 def load_dataset_files():
    """ Load shuffled, splitted dev and train files from .csv files. """

-    cars_dev = pd.read_csv(f'{path}/Car_Prices_Poland_Kaggle_dev.csv', usecols=[1, 4, 5, 6, 10], sep=',', names= [str(i) for i in range(5)])
-    cars_train = pd.read_csv(f'{path}/Car_Prices_Poland_Kaggle_train.csv', usecols=[1, 4, 5, 6, 10], sep=',', names= [str(i) for i in range(5)])
+    cars_dev = pd.read_csv('./Car_Prices_Poland_Kaggle_dev.csv', usecols=[1, 4, 5, 6, 10], sep=',', names= [str(i) for i in range(5)])
+    cars_train = pd.read_csv('./Car_Prices_Poland_Kaggle_train.csv', usecols=[1, 4, 5, 6, 10], sep=',', names= [str(i) for i in range(5)])

    return cars_dev, cars_train

@ -70,6 +65,15 @@ def prepare_labels_features(dataset):
    return lab, feat


+
+
+
+# def draw_plot(lbl):
+# need to import matplotlib to work
+#     plt.hist(lbl, bins=[i for i in range(len(set(lbl)))], edgecolor="black")
+#     plt.xticks(np.arange(0, len(set(lbl)), 1))
+#     plt.show()
+
 # Prepare dataset
 print("Loading dataset...")
 dev, train = load_dataset_files()
--- a/prediction_results.csv
+++ b/prediction_results.csv