docker-v1

This commit is contained in:
s434732 2021-04-10 15:08:18 +02:00
parent 43ee28a29e
commit 7f7f14fd9e
5 changed files with 114 additions and 5 deletions

17
Dockerfile Normal file
View File

@ -0,0 +1,17 @@
FROM ubuntu:latest
RUN apt update && apt install -y python3 && apt install -y nano
RUN apt update && apt install python3-pip -y
RUN pip3 install --user kaggle && pip3 install --user pandas && pip3 install scikit-learn && pip3 install matplotlib
RUN apt install -y curl
RUN pip3 install --user wget
WORKDIR /app
# Skopiujmy nasz skrypt do katalogu /app w kontenerze
COPY ./skrypt_download.py ./
COPY ./skrypt_stat.py ./

9
Jenkinsfile vendored
View File

@ -26,14 +26,13 @@ node {
checkout([$class: 'GitSCM', branches: [[name: '*/master']], doGenerateSubmoduleConfigurations: false, extensions: [], submoduleCfg: [], userRemoteConfigs: [[url: 'https://git.wmi.amu.edu.pl/s434732/ium_434732']]])
sh "chmod 777 ./skrypt_zad2.sh"
sh "./skrypt_zad2.sh"
sh 'python3 ./skrypt_download.py'
archiveArtifacts "results.csv_cut.dev"
archiveArtifacts "results.csv_cut.test"
archiveArtifacts "results.csv_cut.train"
archiveArtifacts "valid"
archiveArtifacts "test"
archiveArtifacts "train"
}
}

31
skrypt.py Normal file
View File

@ -0,0 +1,31 @@
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
import kaggle
kaggle.api.authenticate()
kaggle.api.dataset_download_files('martj42/international-football-results-from-1872-to-2017', path='.', unzip=True)
results = pd.read_csv('results.csv')
#brak wierszy z NaN
results.dropna()
#normalizacja itp
for collumn in ['home_team', 'away_team', 'tournament', 'city', 'country']:
results[collumn] = results[collumn].str.lower()
# Podział zbioru 6:1:1
train, test = train_test_split(results, test_size= 1 - 0.6)
valid, test = train_test_split(test, test_size=0.5)
print("All data: ", results.size)
print("Train size: ", train.size)
print("Test size: ", test.size)
print("Validate size: ", valid.size)
print(results.describe(include='all'))
# sprawdzenie czy cały dataset oraz podział na podzbiory jest równy
print(train.size+test.size+valid.size)

31
skrypt_download.py Normal file
View File

@ -0,0 +1,31 @@
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
import kaggle
kaggle.api.authenticate()
kaggle.api.dataset_download_files('martj42/international-football-results-from-1872-to-2017', path='.', unzip=True)
results = pd.read_csv('results.csv')
#brak wierszy z NaN
results.dropna()
#normalizacja itp
for collumn in ['home_team', 'away_team', 'tournament', 'city', 'country']:
results[collumn] = results[collumn].str.lower()
# Podział zbioru 6:1:1
train, test = train_test_split(results, test_size= 1 - 0.6)
valid, test = train_test_split(test, test_size=0.5)
print("All data: ", results.size)
print("Train size: ", train.size)
print("Test size: ", test.size)
print("Validate size: ", valid.size)
print(results.describe(include='all'))
# sprawdzenie czy cały dataset oraz podział na podzbiory jest równy
print(train.size+test.size+valid.size)

31
skrypt_stat.py Normal file
View File

@ -0,0 +1,31 @@
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
import kaggle
kaggle.api.authenticate()
kaggle.api.dataset_download_files('martj42/international-football-results-from-1872-to-2017', path='.', unzip=True)
results = pd.read_csv('results.csv')
#brak wierszy z NaN
results.dropna()
#normalizacja itp
for collumn in ['home_team', 'away_team', 'tournament', 'city', 'country']:
results[collumn] = results[collumn].str.lower()
# Podział zbioru 6:1:1
train, test = train_test_split(results, test_size= 1 - 0.6)
valid, test = train_test_split(test, test_size=0.5)
print("All data: ", results.size)
print("Train size: ", train.size)
print("Test size: ", test.size)
print("Validate size: ", valid.size)
print(results.describe(include='all'))
# sprawdzenie czy cały dataset oraz podział na podzbiory jest równy
print(train.size+test.size+valid.size)