diff --git a/.dvc/config b/.dvc/config index e69de29..c02d6a2 100644 --- a/.dvc/config +++ b/.dvc/config @@ -0,0 +1,4 @@ +[core] + remote = ium_ssh_remote +['remote "ium_ssh_remote"'] + url = ssh://ium-sftp@tzietkiewicz.vm.wmi.amu.edu.pl/ium-sftp diff --git a/.gitignore b/.gitignore index 2e491f2..95148b2 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,5 @@ .ipynb_checkpoints/ *.zip /country_vaccinations.csv +/train.csv +/train_output.txt diff --git a/Jenkinsfile_dvc b/Jenkinsfile_dvc new file mode 100644 index 0000000..3d9e6dd --- /dev/null +++ b/Jenkinsfile_dvc @@ -0,0 +1,43 @@ + +pipeline { + agent { + docker { + image 's434804/ium:0.5' + args '-v /tmp/mlruns:/tmp/mlruns -v /mlruns:/mlruns ' + } + } + stages { + stage('checkout: Check out from version control'){ + steps{ + checkout([$class: 'GitSCM', branches: [[name: '*/master']], doGenerateSubmoduleConfigurations: false, extensions: [], submoduleCfg: [], userRemoteConfigs: [[credentialsId: '87e24204-a0e1-4840-b235-2b993c922d83', url: 'https://git.wmi.amu.edu.pl/s434804/ium_434804.git']]]) + } + } + stage('install dependencies') { + steps { + sh 'python -m pip install dvc' + sh 'python -m pip install dvc[ssh] paramiko' + } + } + } + stage('DVC') { + steps { + withCredentials([string(credentialsId: 'ium-sftp-password', variable: 'IUM_SFTP_PASS')]) { + sh "dvc init -f" + sh "dvc remote add -d ium_ssh_remote ssh://ium-sftp@tzietkiewicz.vm.wmi.amu.edu.pl/ium-sftp" + sh "dvc remote modify --local ium_ssh_remote password $IUM_SFTP_PASS" + sh "dvc pull" + sh "dvc reproduce" + } + } + post { + success { + stage('sendMail') { + steps{ + emailext body: currentBuild.result ?: 'DVC SUCCESS', + subject: 's434804', + to: '26ab8f35.uam.onmicrosoft.com@emea.teams.ms' + } + } + } + } +} \ No newline at end of file diff --git a/dvc.lock b/dvc.lock new file mode 100644 index 0000000..b8969b0 --- /dev/null +++ b/dvc.lock @@ -0,0 +1,28 @@ +schema: '2.0' +stages: + split: + cmd: python dvc_prepare_data.py + deps: + - path: country_vaccinations.csv + md5: e72f519f36732ded275a723c55edb82d + size: 2563179 + - path: dvc_prepare_data.py + md5: b8e80295cfddfb448198dbe18dd23695 + size: 546 + outs: + - path: train.csv + md5: 998c91b0e0e0d29c2760b14102ee0ca5 + size: 1573494 + train: + cmd: python dvc_train.py + deps: + - path: dvc_train.py + md5: b76f1bc15969023aa0d1779bd81c8c0f + size: 1528 + - path: train.csv + md5: 998c91b0e0e0d29c2760b14102ee0ca5 + size: 1573494 + outs: + - path: train_output.txt + md5: a0eda36e44d7151af605c6cb32bb3a50 + size: 21157 diff --git a/dvc.yaml b/dvc.yaml new file mode 100644 index 0000000..2eb7b2e --- /dev/null +++ b/dvc.yaml @@ -0,0 +1,15 @@ +stages: + split: + cmd: python dvc_prepare_data.py + deps: + - country_vaccinations.csv + - dvc_prepare_data.py + outs: + - train.csv + train: + cmd: python dvc_train.py + deps: + - dvc_train.py + - train.csv + outs: + - train_output.txt diff --git a/dvc_prepare_data.py b/dvc_prepare_data.py new file mode 100644 index 0000000..ac8cf40 --- /dev/null +++ b/dvc_prepare_data.py @@ -0,0 +1,15 @@ +import numpy as np +import pandas as pd +import wget +from sklearn import preprocessing + +url = 'https://git.wmi.amu.edu.pl/s434804/ium_434804/raw/branch/master/country_vaccinations.csv' +wget.download(url, out='country_vaccinations.csv', bar=None) + +df = pd.read_csv('country_vaccinations.csv') +# podział danych na train/validate/test (6:2:2) za pomocą biblioteki numpy i pandas +train, validate, test = np.split(df.sample(frac=1), [int(.6*len(df)), int(.8*len(df))]) + +train.to_csv("train.csv") +validate.to_csv("validate.csv") +test.to_csv("test.csv") \ No newline at end of file diff --git a/dvc_train.py b/dvc_train.py new file mode 100644 index 0000000..56b7b0f --- /dev/null +++ b/dvc_train.py @@ -0,0 +1,40 @@ +import numpy as np +import pandas as pd +import tensorflow as tf +import sys +import wget +from tensorflow import keras +from sklearn.metrics import r2_score, mean_squared_error +from math import sqrt +from sklearn.model_selection import train_test_split +from sklearn import preprocessing +# Importing the dataset + +df = pd.read_csv('train.csv').dropna() +dataset = df.iloc[:, 3:-3] +sys.stdout=open("train_output.txt","w") +print(dataset.head()) +dataset = df.groupby(by=["country"], dropna=True).sum() +X = dataset.loc[:,dataset.columns != "daily_vaccinations"] +y = dataset.loc[:,dataset.columns == "daily_vaccinations"] + +# Splitting the dataset into the Training set and Test set +X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42) + +# Feature Scaling +model = keras.Sequential([ + keras.layers.Dense(512,input_dim = X_train.shape[1],kernel_initializer='normal', activation='relu'), + keras.layers.Dense(512,kernel_initializer='normal', activation='relu'), + keras.layers.Dense(256,kernel_initializer='normal', activation='relu'), + keras.layers.Dense(256,kernel_initializer='normal', activation='relu'), + keras.layers.Dense(128,kernel_initializer='normal', activation='relu'), + keras.layers.Dense(1,kernel_initializer='normal', activation='linear'), +]) + +model.compile(loss='mean_absolute_error', optimizer='adam', metrics=['mean_absolute_error']) + +model.fit(X_train, y_train, epochs=50, validation_split = 0.3) + +prediction = model.predict(X_test) +print(prediction) +sys.stdout.close() \ No newline at end of file