only dockerfile true
All checks were successful
s444498-training/pipeline/head This commit looks good
All checks were successful
s444498-training/pipeline/head This commit looks good
This commit is contained in:
parent
39f02f99ff
commit
f7c9671206
20
Dockerfile
20
Dockerfile
@ -2,6 +2,7 @@
|
|||||||
FROM ubuntu:latest
|
FROM ubuntu:latest
|
||||||
|
|
||||||
# Install required dependencies
|
# Install required dependencies
|
||||||
|
RUN export PATH="$PATH:/root/.local/bin"
|
||||||
RUN apt update
|
RUN apt update
|
||||||
RUN apt-get update
|
RUN apt-get update
|
||||||
RUN apt install -y figlet
|
RUN apt install -y figlet
|
||||||
@ -13,19 +14,18 @@ RUN pip3 install pandas
|
|||||||
RUN pip3 install pillow --global-option="build_ext" --global-option="--disable-zlib" --global-option="--disable-jpeg"
|
RUN pip3 install pillow --global-option="build_ext" --global-option="--disable-zlib" --global-option="--disable-jpeg"
|
||||||
RUN pip3 install scikit-learn
|
RUN pip3 install scikit-learn
|
||||||
RUN pip3 install matplotlib
|
RUN pip3 install matplotlib
|
||||||
|
RUN pip3 install torchvision
|
||||||
|
|
||||||
|
# Args
|
||||||
|
ARG KAGGLE_USERNAME
|
||||||
|
ARG KAGGLE_KEY
|
||||||
|
ENV IS_DOCKER=True
|
||||||
|
|
||||||
# Create app directory in image
|
# Create app directory in image
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
|
|
||||||
|
# Copy everything from jenkins to /app
|
||||||
COPY . .
|
COPY . .
|
||||||
ARG KAGGLE_USERNAME
|
|
||||||
ARG KAGGLE_KEY
|
|
||||||
|
|
||||||
# Download kaggle dataset
|
# Create kaggle catalog for authenticate
|
||||||
RUN kaggle datasets download -d hakeem/atp-and-wta-tennis-data
|
RUN mkdir /.kaggle/ && chmod o+w /.kaggle
|
||||||
RUN unzip -o atp-and-wta-tennis-data.zip
|
|
||||||
|
|
||||||
# Script executed after docker run
|
|
||||||
RUN python3 ./init.py
|
|
||||||
RUN chmod a+rwx -R *
|
|
||||||
RUN ls -la
|
|
33
Jenkinsfile
vendored
33
Jenkinsfile
vendored
@ -1,4 +1,10 @@
|
|||||||
pipeline {
|
pipeline {
|
||||||
|
agent {
|
||||||
|
dockerfile {
|
||||||
|
additionalBuildArgs '-t ium'
|
||||||
|
args '-e KAGGLE_USERNAME=${params.KAGGLE_USERNAME} -e KAGGLE_KEY=${params.KAGGLE_KEY}'
|
||||||
|
}
|
||||||
|
}
|
||||||
parameters {
|
parameters {
|
||||||
string (
|
string (
|
||||||
defaultValue: 'wirus006',
|
defaultValue: 'wirus006',
|
||||||
@ -12,22 +18,23 @@ pipeline {
|
|||||||
name: 'KAGGLE_KEY'
|
name: 'KAGGLE_KEY'
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
agent {
|
options {
|
||||||
dockerfile {
|
copyArtifactPermission('s444498-training');
|
||||||
additionalBuildArgs "--build-arg KAGGLE_USERNAME=${params.KAGGLE_USERNAME} --build-arg KAGGLE_KEY=${params.KAGGLE_KEY} -t s444498-create-dataset"
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
stages {
|
stages {
|
||||||
stage('Archive dataset') {
|
stage('Init datasets') {
|
||||||
steps {
|
steps {
|
||||||
withEnv(["KAGGLE_USERNAME=${params.KAGGLE_USERNAME}",
|
sh 'python3 init.py'
|
||||||
"KAGGLE_KEY=${params.KAGGLE_KEY}"]) {
|
}
|
||||||
sh 'echo hello world | figlet'
|
}
|
||||||
sh 'chmod a+rwx -R *'
|
stage('Archive datasets') {
|
||||||
sh 'pwd && ls'
|
steps {
|
||||||
sh 'ls /app/data/'
|
archiveArtifacts artifacts: 'atp_test.csv, atp_train.csv', onlyIfSuccessful: true
|
||||||
archiveArtifacts artifacts: '/app/data/*', onlyIfSuccessful: true
|
}
|
||||||
}
|
}
|
||||||
|
stage('Run training job') {
|
||||||
|
steps {
|
||||||
|
build job: "s444498-training/master"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,4 +1,8 @@
|
|||||||
pipeline {
|
pipeline {
|
||||||
|
agent {
|
||||||
|
dockerfile true
|
||||||
|
}
|
||||||
|
|
||||||
parameters {
|
parameters {
|
||||||
string(
|
string(
|
||||||
defaultValue: '64',
|
defaultValue: '64',
|
||||||
@ -12,7 +16,6 @@ pipeline {
|
|||||||
name: 'EPOCHS',
|
name: 'EPOCHS',
|
||||||
trim: true
|
trim: true
|
||||||
)
|
)
|
||||||
gitParameter branchFilter: 'origin/(.*)', defaultValue: 'main', name: 'BRANCH', type: 'PT_BRANCH'
|
|
||||||
buildSelector(
|
buildSelector(
|
||||||
defaultSelector: lastSuccessful(),
|
defaultSelector: lastSuccessful(),
|
||||||
description: 'Which build to use for copying artifacts',
|
description: 'Which build to use for copying artifacts',
|
||||||
@ -20,39 +23,40 @@ pipeline {
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
agent {
|
|
||||||
docker {
|
|
||||||
image 's444498-create-dataset'
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
stages {
|
stages {
|
||||||
|
stage('Copy artifacts') {
|
||||||
|
steps {
|
||||||
|
copyArtifacts fingerprintArtifacts: true, projectName: 's444498-create-dataset', selector: buildParameter('BUILD_SELECTOR')
|
||||||
|
}
|
||||||
|
}
|
||||||
stage('Train model') {
|
stage('Train model') {
|
||||||
steps {
|
steps {
|
||||||
sh "python neutral_network.py -e ${params.EPOCHS} -b ${params.BATCHSIZE}"
|
sh "chmod u+x ./neutral_network.py"
|
||||||
|
sh "python3 neutral_network.py -e ${params.EPOCHS} -b ${params.BATCHSIZE}"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
stage('Archive model') {
|
||||||
|
steps {
|
||||||
|
archiveArtifacts artifacts: "model.zip", onlyIfSuccessful: true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
environment {
|
|
||||||
NOTIFICATION_ADDRESS = 'e19191c5.uam.onmicrosoft.com@emea.teams.ms'
|
|
||||||
}
|
|
||||||
|
|
||||||
post {
|
post {
|
||||||
success {
|
success {
|
||||||
emailext body: 'SUCCESS', subject: "${env.JOB_NAME}", to: "${env.NOTIFICATION_ADDRESS}"
|
emailext body: "SUCCESS", subject: "s444498-training", to: "e19191c5.uam.onmicrosoft.com@emea.teams.ms"
|
||||||
}
|
}
|
||||||
|
|
||||||
failure {
|
failure {
|
||||||
emailext body: 'FAILURE', subject: "${env.JOB_NAME}", to: "${env.NOTIFICATION_ADDRESS}"
|
emailext body: "FAILURE", subject: "s444498-training", to: "e19191c5.uam.onmicrosoft.com@emea.teams.ms"
|
||||||
}
|
}
|
||||||
|
|
||||||
unstable {
|
unstable {
|
||||||
emailext body: 'UNSTABLE', subject: "${env.JOB_NAME}", to: "${env.NOTIFICATION_ADDRESS}"
|
emailext body: 'UNSTABLE', subject: "s444498-training", to: "e19191c5.uam.onmicrosoft.com@emea.teams.ms"
|
||||||
}
|
}
|
||||||
|
|
||||||
changed {
|
changed {
|
||||||
emailext body: 'CHANGED', subject: "${env.JOB_NAME}", to: "${env.NOTIFICATION_ADDRESS}"
|
emailext body: 'CHANGED', subject: "s444498-training", to: "e19191c5.uam.onmicrosoft.com@emea.teams.ms"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
26
init.py
26
init.py
@ -7,41 +7,36 @@ import matplotlib
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
# Inicjalizacja danych
|
# Inicjalizacja danych
|
||||||
|
file_exists = exists('./df_atp.csv')
|
||||||
|
if not file_exists:
|
||||||
|
subprocess.run(["kaggle", "datasets", "download", "-d", "hakeem/atp-and-wta-tennis-data"])
|
||||||
|
subprocess.run(["unzip", "-o", "atp-and-wta-tennis-data.zip"])
|
||||||
atp_data = pd.read_csv('df_atp.csv')
|
atp_data = pd.read_csv('df_atp.csv')
|
||||||
print(atp_data)
|
print(atp_data)
|
||||||
|
|
||||||
# Średnia ilość gemów w pierwszym secie zwycięzców meczu
|
# Średnia ilość gemów w pierwszym secie zwycięzców meczu
|
||||||
|
|
||||||
print(atp_data[["Winner", "W1"]].mean())
|
print(atp_data[["Winner", "W1"]].mean())
|
||||||
|
|
||||||
# Minimalna ilość wygranych gemów w pierwszym secie osób wygrywających mecz
|
# Minimalna ilość wygranych gemów w pierwszym secie osób wygrywających mecz
|
||||||
|
|
||||||
print(atp_data[["Winner", "W1"]].min())
|
print(atp_data[["Winner", "W1"]].min())
|
||||||
|
|
||||||
# Maksymalna ilość wygranych gemów w pierwszym secie osób wygrywających mecz
|
# Maksymalna ilość wygranych gemów w pierwszym secie osób wygrywających mecz
|
||||||
|
|
||||||
print(atp_data[["Winner", "W1"]].max())
|
print(atp_data[["Winner", "W1"]].max())
|
||||||
|
|
||||||
# Odchylenie standardowe wygranych gemów w pierwszym secie osób wygrywających mecz
|
# Odchylenie standardowe wygranych gemów w pierwszym secie osób wygrywających mecz
|
||||||
|
|
||||||
print(atp_data[["Winner", "W1"]].std())
|
print(atp_data[["Winner", "W1"]].std())
|
||||||
|
|
||||||
# Mediana wygranych gemów w pierwszym secie osób wygrywających mecz
|
# Mediana wygranych gemów w pierwszym secie osób wygrywających mecz
|
||||||
|
|
||||||
print(atp_data[["Winner", "W1"]].median())
|
print(atp_data[["Winner", "W1"]].median())
|
||||||
|
|
||||||
# Zmiana nazwy nienazwanej kolumny
|
# Zmiana nazwy nienazwanej kolumny
|
||||||
|
|
||||||
atp_data.rename(columns={'Unnamed: 0':'ID'}, inplace=True)
|
atp_data.rename(columns={'Unnamed: 0':'ID'}, inplace=True)
|
||||||
|
|
||||||
# Jak często kto był zwycięzcą
|
# Jak często kto był zwycięzcą
|
||||||
|
|
||||||
print(atp_data.groupby("Winner")["ID"].nunique())
|
print(atp_data.groupby("Winner")["ID"].nunique())
|
||||||
|
|
||||||
# Normalizacja rund -1: Finał, -2: Półfinał, -3: Ćwiartka, -4: Każdy z każdym
|
# Normalizacja rund -1: Finał, -2: Półfinał, -3: Ćwiartka, -4: Każdy z każdym
|
||||||
# 1: pierwsza runda, 2: druga runda, 3: trzecia runda, 4: czwarta runda
|
# 1: pierwsza runda, 2: druga runda, 3: trzecia runda, 4: czwarta runda
|
||||||
|
|
||||||
atp_data.loc[atp_data["Round"] == 'The Final', "Round"] = -1
|
atp_data.loc[atp_data["Round"] == 'The Final', "Round"] = -1
|
||||||
atp_data.loc[atp_data["Round"] == 'Semifinals', "Round"] = -2
|
atp_data.loc[atp_data["Round"] == 'Semifinals', "Round"] = -2
|
||||||
atp_data.loc[atp_data["Round"] == 'Quarterfinals', "Round"] = -3
|
atp_data.loc[atp_data["Round"] == 'Quarterfinals', "Round"] = -3
|
||||||
@ -53,28 +48,19 @@ atp_data.loc[atp_data["Round"] == '4th Round', "Round"] = 4
|
|||||||
print(atp_data["Round"])
|
print(atp_data["Round"])
|
||||||
|
|
||||||
# Czyszczenie: W polu z datą zamienimy ######## na pustego stringa
|
# Czyszczenie: W polu z datą zamienimy ######## na pustego stringa
|
||||||
|
|
||||||
atp_data.loc[atp_data["Date"] == '########', "Date"] = ''
|
atp_data.loc[atp_data["Date"] == '########', "Date"] = ''
|
||||||
print(atp_data["Date"])
|
print(atp_data["Date"])
|
||||||
|
|
||||||
# Podział na podzbiory: trenujący, testowy, walidujący w proporcjach 6:2:2
|
# Podział na podzbiory: trenujący, testowy, walidujący w proporcjach 6:2:2
|
||||||
|
|
||||||
atp_train, atp_test = train_test_split(atp_data, test_size=0.4, random_state=1)
|
atp_train, atp_test = train_test_split(atp_data, test_size=0.4, random_state=1)
|
||||||
atp_dev, atp_test = train_test_split(atp_test, test_size=0.5, random_state=1)
|
atp_dev, atp_test = train_test_split(atp_test, test_size=0.5, random_state=1)
|
||||||
|
|
||||||
# Wielkość zbioru i podzbiorów
|
# Wielkość zbioru i podzbiorów
|
||||||
|
|
||||||
print("\nElements of total set: " + str(len(atp_data)))
|
print("\nElements of total set: " + str(len(atp_data)))
|
||||||
print("\nElements of test set: " + str(len(atp_test)))
|
print("\nElements of test set: " + str(len(atp_test)))
|
||||||
print("\nElements of dev set: " + str(len(atp_dev)))
|
print("\nElements of dev set: " + str(len(atp_dev)))
|
||||||
print("\nElements of train set: " + str(len(atp_train)))
|
print("\nElements of train set: " + str(len(atp_train)))
|
||||||
|
|
||||||
# Stworzenie plików z danymi trenującymi i testowymi
|
# Stworzenie plików z danymi trenującymi i testowymi
|
||||||
|
atp_test.to_csv('atp_test.csv', encoding="utf-8", index=False)
|
||||||
filepath1 = Path('data/atp_test.csv')
|
atp_train.to_csv('atp_train.csv', encoding="utf-8", index=False)
|
||||||
filepath2 = Path('data/atp_train.csv')
|
|
||||||
filepath1.parent.mkdir(parents=True, exist_ok=True)
|
|
||||||
filepath2.parent.mkdir(parents=True, exist_ok=True)
|
|
||||||
|
|
||||||
atp_test.to_csv(filepath1)
|
|
||||||
atp_train.to_csv(filepath2)
|
|
@ -87,8 +87,8 @@ print(f"Using {device} device")
|
|||||||
args = setup_args()
|
args = setup_args()
|
||||||
batch_size = args.batchSize
|
batch_size = args.batchSize
|
||||||
|
|
||||||
plant_test = AtpDataset('data/atp_test.csv')
|
plant_test = AtpDataset('atp_test.csv')
|
||||||
plant_train = AtpDataset('data/atp_train.csv')
|
plant_train = AtpDataset('atp_train.csv')
|
||||||
|
|
||||||
train_dataloader = DataLoader(plant_train, batch_size=batch_size)
|
train_dataloader = DataLoader(plant_train, batch_size=batch_size)
|
||||||
test_dataloader = DataLoader(plant_test, batch_size=batch_size)
|
test_dataloader = DataLoader(plant_test, batch_size=batch_size)
|
||||||
|
Loading…
Reference in New Issue
Block a user