Jenkinsfile and Dockerfile modify for s444498-dvc pipeline
This commit is contained in:
parent
d4d89d25a8
commit
89d8c6e9e9
@ -1,5 +1,5 @@
|
|||||||
[core]
|
[core]
|
||||||
autostage = true
|
|
||||||
remote = ium_ssh_remote
|
remote = ium_ssh_remote
|
||||||
['remote "ium_ssh_remote"']
|
['remote "ium_ssh_remote"']
|
||||||
url = ssh://ium-sftp@tzietkiewicz.vm.wmi.amu.edu.pl
|
url = ssh://tzietkiewicz.vm.wmi.amu.edu.pl:/home/ium-sftp
|
||||||
|
user = ium-sftp
|
3
.gitignore
vendored
3
.gitignore
vendored
@ -1,6 +1,7 @@
|
|||||||
*.csv
|
*.csv
|
||||||
*.zip
|
|
||||||
*.png
|
*.png
|
||||||
*.txt
|
*.txt
|
||||||
__pycache__
|
__pycache__
|
||||||
/prepared
|
/prepared
|
||||||
|
model.zip
|
||||||
|
sacred_runs/1/model.zip
|
||||||
|
@ -19,6 +19,8 @@ RUN pip3 install matplotlib
|
|||||||
RUN pip3 install torchvision
|
RUN pip3 install torchvision
|
||||||
RUN pip3 install sacred
|
RUN pip3 install sacred
|
||||||
RUN pip3 install pymongo
|
RUN pip3 install pymongo
|
||||||
|
RUN pip3 install dvc
|
||||||
|
RUN pip3 install 'dvc[ssh]' paramiko
|
||||||
|
|
||||||
# Args
|
# Args
|
||||||
ARG KAGGLE_USERNAME
|
ARG KAGGLE_USERNAME
|
||||||
@ -31,5 +33,8 @@ WORKDIR /app
|
|||||||
# Copy everything from jenkins to /app
|
# Copy everything from jenkins to /app
|
||||||
COPY . .
|
COPY . .
|
||||||
|
|
||||||
|
# Create user
|
||||||
|
RUN useradd -r -u 111 jenkins
|
||||||
|
|
||||||
# Create kaggle catalog for authenticate
|
# Create kaggle catalog for authenticate
|
||||||
RUN mkdir /.kaggle/ && chmod o+w /.kaggle
|
RUN mkdir /.kaggle/ && chmod o+w /.kaggle
|
||||||
|
42
Jenkinsfile-dvc
Normal file
42
Jenkinsfile-dvc
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
pipeline {
|
||||||
|
agent {
|
||||||
|
dockerfile {
|
||||||
|
args '-e KAGGLE_USERNAME=${params.KAGGLE_USERNAME} -e KAGGLE_KEY=${params.KAGGLE_KEY}'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
parameters {
|
||||||
|
string (
|
||||||
|
defaultValue: 'wirus006',
|
||||||
|
description: 'Kaggle username',
|
||||||
|
name: 'KAGGLE_USERNAME',
|
||||||
|
trim: false
|
||||||
|
)
|
||||||
|
password (
|
||||||
|
defaultValue: '',
|
||||||
|
description: 'Kaggle token taken from kaggle.json file, as described in https://github.com/Kaggle/kaggle-api#api-credentials',
|
||||||
|
name: 'KAGGLE_KEY'
|
||||||
|
)
|
||||||
|
}
|
||||||
|
stages {
|
||||||
|
stage("Git clone") {
|
||||||
|
steps {
|
||||||
|
checkout([$class: 'GitSCM', branches: [[name: '*/master']], extensions: [], userRemoteConfigs: [[credentialsId: 's444498', url: 'https://git.wmi.amu.edu.pl/s444498/ium_444498.git']]])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
stage("Run DVC") {
|
||||||
|
steps{
|
||||||
|
withCredentials(
|
||||||
|
[sshUserPrivateKey(credentialsId: '48ac7004-216e-4260-abba-1fe5db753e18', keyFileVariable: 'IUM_SFTP_KEY', passphraseVariable: '', usernameVariable: 'USER')]) {
|
||||||
|
sh 'dvc remote modify --local ium_ssh_remote keyfile $IUM_SFTP_KEY'
|
||||||
|
sh 'dvc remote modify --local ium_ssh_remote password IUM@2021'
|
||||||
|
sh 'dvc remote list'
|
||||||
|
sh 'cat .dvc/config'
|
||||||
|
sh 'cat .dvc/config.local'
|
||||||
|
sh 'dvc pull'
|
||||||
|
sh 'ls -al'
|
||||||
|
sh 'dvc repro'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
BIN
atp-and-wta-tennis-data.zip
Normal file
BIN
atp-and-wta-tennis-data.zip
Normal file
Binary file not shown.
@ -1,4 +1,4 @@
|
|||||||
outs:
|
outs:
|
||||||
- md5: 16cefb2b04f963bcf0fbb6f256496219
|
- md5: d32a6cf1889199066cace68f8f56890b
|
||||||
size: 2466716
|
size: 2431316
|
||||||
path: atp_dev.csv
|
path: atp_dev.csv
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
outs:
|
outs:
|
||||||
- md5: b5b50c11ef644df2ef799ca56e7d1ced
|
- md5: 389fd474d4db00db1c113683177d5880
|
||||||
size: 2466156
|
size: 2430180
|
||||||
path: atp_test.csv
|
path: atp_test.csv
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
outs:
|
outs:
|
||||||
- md5: 314cd14a051bd61bf7e1f3a160c02dd2
|
- md5: 50969b14a70db98c17a62cf7d99edb5a
|
||||||
size: 7408451
|
size: 7302503
|
||||||
path: atp_train.csv
|
path: atp_train.csv
|
||||||
|
6
dvc.yaml
6
dvc.yaml
@ -1,5 +1,5 @@
|
|||||||
stages:
|
stages:
|
||||||
prepare:
|
|
||||||
cmd: python init.py
|
|
||||||
train:
|
train:
|
||||||
cmd: python neutral_network.py
|
cmd: python3 neutral_network.py
|
||||||
|
prepare:
|
||||||
|
cmd: python3 init2.py
|
67
init2.py
Normal file
67
init2.py
Normal file
@ -0,0 +1,67 @@
|
|||||||
|
import subprocess
|
||||||
|
from os.path import exists
|
||||||
|
import pandas as pd
|
||||||
|
import numpy as np
|
||||||
|
from sklearn.model_selection import train_test_split
|
||||||
|
import matplotlib
|
||||||
|
from pathlib import Path
|
||||||
|
import math
|
||||||
|
|
||||||
|
# Inicjalizacja danych
|
||||||
|
file_exists = exists("./df_atp.csv")
|
||||||
|
if not file_exists:
|
||||||
|
subprocess.run(["unzip", "-o", "atp-and-wta-tennis-data.zip"])
|
||||||
|
|
||||||
|
atp_data = pd.read_csv("df_atp.csv")
|
||||||
|
|
||||||
|
# Średnia ilość gemów w pierwszym secie zwycięzców meczu
|
||||||
|
print(atp_data[["Winner", "W1"]].mean())
|
||||||
|
|
||||||
|
# Minimalna ilość wygranych gemów w pierwszym secie osób wygrywających mecz
|
||||||
|
print(atp_data[["Winner", "W1"]].min())
|
||||||
|
|
||||||
|
# Maksymalna ilość wygranych gemów w pierwszym secie osób wygrywających mecz
|
||||||
|
print(atp_data[["Winner", "W1"]].max())
|
||||||
|
|
||||||
|
# Odchylenie standardowe wygranych gemów w pierwszym secie osób wygrywających mecz
|
||||||
|
print(atp_data[["Winner", "W1"]].std())
|
||||||
|
|
||||||
|
# Mediana wygranych gemów w pierwszym secie osób wygrywających mecz
|
||||||
|
print(atp_data[["Winner", "W1"]].median())
|
||||||
|
|
||||||
|
# Zmiana nazwy nienazwanej kolumny
|
||||||
|
atp_data.rename(columns={"Unnamed: 0": "ID"}, inplace=True)
|
||||||
|
|
||||||
|
# Jak często kto był zwycięzcą
|
||||||
|
print(atp_data.groupby("Winner")["ID"].nunique())
|
||||||
|
|
||||||
|
# Normalizacja rund -1: Finał, -2: Półfinał, -3: Ćwiartka, -4: Każdy z każdym
|
||||||
|
# 1: pierwsza runda, 2: druga runda, 3: trzecia runda, 4: czwarta runda
|
||||||
|
atp_data.loc[atp_data["Round"] == "The Final", "Round"] = -1
|
||||||
|
atp_data.loc[atp_data["Round"] == "Semifinals", "Round"] = -2
|
||||||
|
atp_data.loc[atp_data["Round"] == "Quarterfinals", "Round"] = -3
|
||||||
|
atp_data.loc[atp_data["Round"] == "Round Robin", "Round"] = -4
|
||||||
|
atp_data.loc[atp_data["Round"] == "1st Round", "Round"] = 1
|
||||||
|
atp_data.loc[atp_data["Round"] == "2nd Round", "Round"] = 2
|
||||||
|
atp_data.loc[atp_data["Round"] == "3rd Round", "Round"] = 3
|
||||||
|
atp_data.loc[atp_data["Round"] == "4th Round", "Round"] = 4
|
||||||
|
print(atp_data["Round"])
|
||||||
|
|
||||||
|
# Czyszczenie: W polu z datą zamienimy ######## na pustego stringa
|
||||||
|
atp_data.loc[atp_data["Date"] == "########", "Date"] = ""
|
||||||
|
print(atp_data["Date"])
|
||||||
|
|
||||||
|
# Podział na podzbiory: trenujący, testowy, walidujący w proporcjach 6:2:2
|
||||||
|
atp_train, atp_test = train_test_split(atp_data, test_size=0.4, random_state=1)
|
||||||
|
atp_dev, atp_test = train_test_split(atp_test, test_size=0.5, random_state=1)
|
||||||
|
|
||||||
|
# Wielkość zbioru i podzbiorów
|
||||||
|
print("\nElements of total set: " + str(len(atp_data)))
|
||||||
|
print("\nElements of test set: " + str(len(atp_test)))
|
||||||
|
print("\nElements of dev set: " + str(len(atp_dev)))
|
||||||
|
print("\nElements of train set: " + str(len(atp_train)))
|
||||||
|
|
||||||
|
# Stworzenie plików z danymi trenującymi i testowymi
|
||||||
|
atp_test.to_csv("atp_test.csv", encoding="utf-8", index=False)
|
||||||
|
atp_dev.to_csv("atp_dev.csv", encoding="utf-8", index=False)
|
||||||
|
atp_train.to_csv("atp_train.csv", encoding="utf-8", index=False)
|
Loading…
Reference in New Issue
Block a user