dvc configuration

This commit is contained in:
Dawid 2021-06-12 18:15:43 +02:00
parent 47c71091a1
commit 32fedbee37
7 changed files with 147 additions and 0 deletions

View File

@ -0,0 +1,4 @@
[core]
remote = ium_ssh_remote
['remote "ium_ssh_remote"']
url = ssh://ium-sftp@tzietkiewicz.vm.wmi.amu.edu.pl/ium-sftp

2
.gitignore vendored
View File

@ -1,3 +1,5 @@
.ipynb_checkpoints/ .ipynb_checkpoints/
*.zip *.zip
/country_vaccinations.csv /country_vaccinations.csv
/train.csv
/train_output.txt

43
Jenkinsfile_dvc Normal file
View File

@ -0,0 +1,43 @@
pipeline {
agent {
docker {
image 's434804/ium:0.5'
args '-v /tmp/mlruns:/tmp/mlruns -v /mlruns:/mlruns '
}
}
stages {
stage('checkout: Check out from version control'){
steps{
checkout([$class: 'GitSCM', branches: [[name: '*/master']], doGenerateSubmoduleConfigurations: false, extensions: [], submoduleCfg: [], userRemoteConfigs: [[credentialsId: '87e24204-a0e1-4840-b235-2b993c922d83', url: 'https://git.wmi.amu.edu.pl/s434804/ium_434804.git']]])
}
}
stage('install dependencies') {
steps {
sh 'python -m pip install dvc'
sh 'python -m pip install dvc[ssh] paramiko'
}
}
}
stage('DVC') {
steps {
withCredentials([string(credentialsId: 'ium-sftp-password', variable: 'IUM_SFTP_PASS')]) {
sh "dvc init -f"
sh "dvc remote add -d ium_ssh_remote ssh://ium-sftp@tzietkiewicz.vm.wmi.amu.edu.pl/ium-sftp"
sh "dvc remote modify --local ium_ssh_remote password $IUM_SFTP_PASS"
sh "dvc pull"
sh "dvc reproduce"
}
}
post {
success {
stage('sendMail') {
steps{
emailext body: currentBuild.result ?: 'DVC SUCCESS',
subject: 's434804',
to: '26ab8f35.uam.onmicrosoft.com@emea.teams.ms'
}
}
}
}
}

28
dvc.lock Normal file
View File

@ -0,0 +1,28 @@
schema: '2.0'
stages:
split:
cmd: python dvc_prepare_data.py
deps:
- path: country_vaccinations.csv
md5: e72f519f36732ded275a723c55edb82d
size: 2563179
- path: dvc_prepare_data.py
md5: b8e80295cfddfb448198dbe18dd23695
size: 546
outs:
- path: train.csv
md5: 998c91b0e0e0d29c2760b14102ee0ca5
size: 1573494
train:
cmd: python dvc_train.py
deps:
- path: dvc_train.py
md5: b76f1bc15969023aa0d1779bd81c8c0f
size: 1528
- path: train.csv
md5: 998c91b0e0e0d29c2760b14102ee0ca5
size: 1573494
outs:
- path: train_output.txt
md5: a0eda36e44d7151af605c6cb32bb3a50
size: 21157

15
dvc.yaml Normal file
View File

@ -0,0 +1,15 @@
stages:
split:
cmd: python dvc_prepare_data.py
deps:
- country_vaccinations.csv
- dvc_prepare_data.py
outs:
- train.csv
train:
cmd: python dvc_train.py
deps:
- dvc_train.py
- train.csv
outs:
- train_output.txt

15
dvc_prepare_data.py Normal file
View File

@ -0,0 +1,15 @@
import numpy as np
import pandas as pd
import wget
from sklearn import preprocessing
url = 'https://git.wmi.amu.edu.pl/s434804/ium_434804/raw/branch/master/country_vaccinations.csv'
wget.download(url, out='country_vaccinations.csv', bar=None)
df = pd.read_csv('country_vaccinations.csv')
# podział danych na train/validate/test (6:2:2) za pomocą biblioteki numpy i pandas
train, validate, test = np.split(df.sample(frac=1), [int(.6*len(df)), int(.8*len(df))])
train.to_csv("train.csv")
validate.to_csv("validate.csv")
test.to_csv("test.csv")

40
dvc_train.py Normal file
View File

@ -0,0 +1,40 @@
import numpy as np
import pandas as pd
import tensorflow as tf
import sys
import wget
from tensorflow import keras
from sklearn.metrics import r2_score, mean_squared_error
from math import sqrt
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
# Importing the dataset
df = pd.read_csv('train.csv').dropna()
dataset = df.iloc[:, 3:-3]
sys.stdout=open("train_output.txt","w")
print(dataset.head())
dataset = df.groupby(by=["country"], dropna=True).sum()
X = dataset.loc[:,dataset.columns != "daily_vaccinations"]
y = dataset.loc[:,dataset.columns == "daily_vaccinations"]
# Splitting the dataset into the Training set and Test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)
# Feature Scaling
model = keras.Sequential([
keras.layers.Dense(512,input_dim = X_train.shape[1],kernel_initializer='normal', activation='relu'),
keras.layers.Dense(512,kernel_initializer='normal', activation='relu'),
keras.layers.Dense(256,kernel_initializer='normal', activation='relu'),
keras.layers.Dense(256,kernel_initializer='normal', activation='relu'),
keras.layers.Dense(128,kernel_initializer='normal', activation='relu'),
keras.layers.Dense(1,kernel_initializer='normal', activation='linear'),
])
model.compile(loss='mean_absolute_error', optimizer='adam', metrics=['mean_absolute_error'])
model.fit(X_train, y_train, epochs=50, validation_split = 0.3)
prediction = model.predict(X_test)
print(prediction)
sys.stdout.close()