Jenkinsfile and Dockerfile modify for s444498-dvc pipeline

This commit is contained in:
Wirusik 2022-06-05 18:59:04 +02:00
parent d4d89d25a8
commit 89d8c6e9e9
10 changed files with 127 additions and 12 deletions

View File

@ -1,5 +1,5 @@
[core] [core]
autostage = true
remote = ium_ssh_remote remote = ium_ssh_remote
['remote "ium_ssh_remote"'] ['remote "ium_ssh_remote"']
url = ssh://ium-sftp@tzietkiewicz.vm.wmi.amu.edu.pl url = ssh://tzietkiewicz.vm.wmi.amu.edu.pl:/home/ium-sftp
user = ium-sftp

3
.gitignore vendored
View File

@ -1,6 +1,7 @@
*.csv *.csv
*.zip
*.png *.png
*.txt *.txt
__pycache__ __pycache__
/prepared /prepared
model.zip
sacred_runs/1/model.zip

View File

@ -19,6 +19,8 @@ RUN pip3 install matplotlib
RUN pip3 install torchvision RUN pip3 install torchvision
RUN pip3 install sacred RUN pip3 install sacred
RUN pip3 install pymongo RUN pip3 install pymongo
RUN pip3 install dvc
RUN pip3 install 'dvc[ssh]' paramiko
# Args # Args
ARG KAGGLE_USERNAME ARG KAGGLE_USERNAME
@ -31,5 +33,8 @@ WORKDIR /app
# Copy everything from jenkins to /app # Copy everything from jenkins to /app
COPY . . COPY . .
# Create user
RUN useradd -r -u 111 jenkins
# Create kaggle catalog for authenticate # Create kaggle catalog for authenticate
RUN mkdir /.kaggle/ && chmod o+w /.kaggle RUN mkdir /.kaggle/ && chmod o+w /.kaggle

42
Jenkinsfile-dvc Normal file
View File

@ -0,0 +1,42 @@
pipeline {
agent {
dockerfile {
args '-e KAGGLE_USERNAME=${params.KAGGLE_USERNAME} -e KAGGLE_KEY=${params.KAGGLE_KEY}'
}
}
parameters {
string (
defaultValue: 'wirus006',
description: 'Kaggle username',
name: 'KAGGLE_USERNAME',
trim: false
)
password (
defaultValue: '',
description: 'Kaggle token taken from kaggle.json file, as described in https://github.com/Kaggle/kaggle-api#api-credentials',
name: 'KAGGLE_KEY'
)
}
stages {
stage("Git clone") {
steps {
checkout([$class: 'GitSCM', branches: [[name: '*/master']], extensions: [], userRemoteConfigs: [[credentialsId: 's444498', url: 'https://git.wmi.amu.edu.pl/s444498/ium_444498.git']]])
}
}
stage("Run DVC") {
steps{
withCredentials(
[sshUserPrivateKey(credentialsId: '48ac7004-216e-4260-abba-1fe5db753e18', keyFileVariable: 'IUM_SFTP_KEY', passphraseVariable: '', usernameVariable: 'USER')]) {
sh 'dvc remote modify --local ium_ssh_remote keyfile $IUM_SFTP_KEY'
sh 'dvc remote modify --local ium_ssh_remote password IUM@2021'
sh 'dvc remote list'
sh 'cat .dvc/config'
sh 'cat .dvc/config.local'
sh 'dvc pull'
sh 'ls -al'
sh 'dvc repro'
}
}
}
}
}

BIN
atp-and-wta-tennis-data.zip Normal file

Binary file not shown.

View File

@ -1,4 +1,4 @@
outs: outs:
- md5: 16cefb2b04f963bcf0fbb6f256496219 - md5: d32a6cf1889199066cace68f8f56890b
size: 2466716 size: 2431316
path: atp_dev.csv path: atp_dev.csv

View File

@ -1,4 +1,4 @@
outs: outs:
- md5: b5b50c11ef644df2ef799ca56e7d1ced - md5: 389fd474d4db00db1c113683177d5880
size: 2466156 size: 2430180
path: atp_test.csv path: atp_test.csv

View File

@ -1,4 +1,4 @@
outs: outs:
- md5: 314cd14a051bd61bf7e1f3a160c02dd2 - md5: 50969b14a70db98c17a62cf7d99edb5a
size: 7408451 size: 7302503
path: atp_train.csv path: atp_train.csv

View File

@ -1,5 +1,5 @@
stages: stages:
prepare:
cmd: python init.py
train: train:
cmd: python neutral_network.py cmd: python3 neutral_network.py
prepare:
cmd: python3 init2.py

67
init2.py Normal file
View File

@ -0,0 +1,67 @@
import subprocess
from os.path import exists
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import matplotlib
from pathlib import Path
import math
# Inicjalizacja danych
file_exists = exists("./df_atp.csv")
if not file_exists:
subprocess.run(["unzip", "-o", "atp-and-wta-tennis-data.zip"])
atp_data = pd.read_csv("df_atp.csv")
# Średnia ilość gemów w pierwszym secie zwycięzców meczu
print(atp_data[["Winner", "W1"]].mean())
# Minimalna ilość wygranych gemów w pierwszym secie osób wygrywających mecz
print(atp_data[["Winner", "W1"]].min())
# Maksymalna ilość wygranych gemów w pierwszym secie osób wygrywających mecz
print(atp_data[["Winner", "W1"]].max())
# Odchylenie standardowe wygranych gemów w pierwszym secie osób wygrywających mecz
print(atp_data[["Winner", "W1"]].std())
# Mediana wygranych gemów w pierwszym secie osób wygrywających mecz
print(atp_data[["Winner", "W1"]].median())
# Zmiana nazwy nienazwanej kolumny
atp_data.rename(columns={"Unnamed: 0": "ID"}, inplace=True)
# Jak często kto był zwycięzcą
print(atp_data.groupby("Winner")["ID"].nunique())
# Normalizacja rund -1: Finał, -2: Półfinał, -3: Ćwiartka, -4: Każdy z każdym
# 1: pierwsza runda, 2: druga runda, 3: trzecia runda, 4: czwarta runda
atp_data.loc[atp_data["Round"] == "The Final", "Round"] = -1
atp_data.loc[atp_data["Round"] == "Semifinals", "Round"] = -2
atp_data.loc[atp_data["Round"] == "Quarterfinals", "Round"] = -3
atp_data.loc[atp_data["Round"] == "Round Robin", "Round"] = -4
atp_data.loc[atp_data["Round"] == "1st Round", "Round"] = 1
atp_data.loc[atp_data["Round"] == "2nd Round", "Round"] = 2
atp_data.loc[atp_data["Round"] == "3rd Round", "Round"] = 3
atp_data.loc[atp_data["Round"] == "4th Round", "Round"] = 4
print(atp_data["Round"])
# Czyszczenie: W polu z datą zamienimy ######## na pustego stringa
atp_data.loc[atp_data["Date"] == "########", "Date"] = ""
print(atp_data["Date"])
# Podział na podzbiory: trenujący, testowy, walidujący w proporcjach 6:2:2
atp_train, atp_test = train_test_split(atp_data, test_size=0.4, random_state=1)
atp_dev, atp_test = train_test_split(atp_test, test_size=0.5, random_state=1)
# Wielkość zbioru i podzbiorów
print("\nElements of total set: " + str(len(atp_data)))
print("\nElements of test set: " + str(len(atp_test)))
print("\nElements of dev set: " + str(len(atp_dev)))
print("\nElements of train set: " + str(len(atp_train)))
# Stworzenie plików z danymi trenującymi i testowymi
atp_test.to_csv("atp_test.csv", encoding="utf-8", index=False)
atp_dev.to_csv("atp_dev.csv", encoding="utf-8", index=False)
atp_train.to_csv("atp_train.csv", encoding="utf-8", index=False)