docker-v1
This commit is contained in:
parent
43ee28a29e
commit
7f7f14fd9e
17
Dockerfile
Normal file
17
Dockerfile
Normal file
@ -0,0 +1,17 @@
|
||||
FROM ubuntu:latest
|
||||
|
||||
RUN apt update && apt install -y python3 && apt install -y nano
|
||||
|
||||
RUN apt update && apt install python3-pip -y
|
||||
RUN pip3 install --user kaggle && pip3 install --user pandas && pip3 install scikit-learn && pip3 install matplotlib
|
||||
RUN apt install -y curl
|
||||
RUN pip3 install --user wget
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
# Skopiujmy nasz skrypt do katalogu /app w kontenerze
|
||||
COPY ./skrypt_download.py ./
|
||||
COPY ./skrypt_stat.py ./
|
||||
|
||||
|
||||
|
9
Jenkinsfile
vendored
9
Jenkinsfile
vendored
@ -26,14 +26,13 @@ node {
|
||||
checkout([$class: 'GitSCM', branches: [[name: '*/master']], doGenerateSubmoduleConfigurations: false, extensions: [], submoduleCfg: [], userRemoteConfigs: [[url: 'https://git.wmi.amu.edu.pl/s434732/ium_434732']]])
|
||||
|
||||
|
||||
sh "chmod 777 ./skrypt_zad2.sh"
|
||||
sh "./skrypt_zad2.sh"
|
||||
sh 'python3 ./skrypt_download.py'
|
||||
|
||||
|
||||
|
||||
archiveArtifacts "results.csv_cut.dev"
|
||||
archiveArtifacts "results.csv_cut.test"
|
||||
archiveArtifacts "results.csv_cut.train"
|
||||
archiveArtifacts "valid"
|
||||
archiveArtifacts "test"
|
||||
archiveArtifacts "train"
|
||||
|
||||
}
|
||||
}
|
||||
|
31
skrypt.py
Normal file
31
skrypt.py
Normal file
@ -0,0 +1,31 @@
|
||||
import pandas as pd
|
||||
from sklearn.model_selection import train_test_split
|
||||
from sklearn import preprocessing
|
||||
import kaggle
|
||||
|
||||
kaggle.api.authenticate()
|
||||
|
||||
kaggle.api.dataset_download_files('martj42/international-football-results-from-1872-to-2017', path='.', unzip=True)
|
||||
|
||||
results = pd.read_csv('results.csv')
|
||||
|
||||
#brak wierszy z NaN
|
||||
results.dropna()
|
||||
|
||||
#normalizacja itp
|
||||
for collumn in ['home_team', 'away_team', 'tournament', 'city', 'country']:
|
||||
results[collumn] = results[collumn].str.lower()
|
||||
|
||||
# Podział zbioru 6:1:1
|
||||
train, test = train_test_split(results, test_size= 1 - 0.6)
|
||||
|
||||
valid, test = train_test_split(test, test_size=0.5)
|
||||
|
||||
print("All data: ", results.size)
|
||||
print("Train size: ", train.size)
|
||||
print("Test size: ", test.size)
|
||||
print("Validate size: ", valid.size)
|
||||
print(results.describe(include='all'))
|
||||
|
||||
# sprawdzenie czy cały dataset oraz podział na podzbiory jest równy
|
||||
print(train.size+test.size+valid.size)
|
31
skrypt_download.py
Normal file
31
skrypt_download.py
Normal file
@ -0,0 +1,31 @@
|
||||
import pandas as pd
|
||||
from sklearn.model_selection import train_test_split
|
||||
from sklearn import preprocessing
|
||||
import kaggle
|
||||
|
||||
kaggle.api.authenticate()
|
||||
|
||||
kaggle.api.dataset_download_files('martj42/international-football-results-from-1872-to-2017', path='.', unzip=True)
|
||||
|
||||
results = pd.read_csv('results.csv')
|
||||
|
||||
#brak wierszy z NaN
|
||||
results.dropna()
|
||||
|
||||
#normalizacja itp
|
||||
for collumn in ['home_team', 'away_team', 'tournament', 'city', 'country']:
|
||||
results[collumn] = results[collumn].str.lower()
|
||||
|
||||
# Podział zbioru 6:1:1
|
||||
train, test = train_test_split(results, test_size= 1 - 0.6)
|
||||
|
||||
valid, test = train_test_split(test, test_size=0.5)
|
||||
|
||||
print("All data: ", results.size)
|
||||
print("Train size: ", train.size)
|
||||
print("Test size: ", test.size)
|
||||
print("Validate size: ", valid.size)
|
||||
print(results.describe(include='all'))
|
||||
|
||||
# sprawdzenie czy cały dataset oraz podział na podzbiory jest równy
|
||||
print(train.size+test.size+valid.size)
|
31
skrypt_stat.py
Normal file
31
skrypt_stat.py
Normal file
@ -0,0 +1,31 @@
|
||||
import pandas as pd
|
||||
from sklearn.model_selection import train_test_split
|
||||
from sklearn import preprocessing
|
||||
import kaggle
|
||||
|
||||
kaggle.api.authenticate()
|
||||
|
||||
kaggle.api.dataset_download_files('martj42/international-football-results-from-1872-to-2017', path='.', unzip=True)
|
||||
|
||||
results = pd.read_csv('results.csv')
|
||||
|
||||
#brak wierszy z NaN
|
||||
results.dropna()
|
||||
|
||||
#normalizacja itp
|
||||
for collumn in ['home_team', 'away_team', 'tournament', 'city', 'country']:
|
||||
results[collumn] = results[collumn].str.lower()
|
||||
|
||||
# Podział zbioru 6:1:1
|
||||
train, test = train_test_split(results, test_size= 1 - 0.6)
|
||||
|
||||
valid, test = train_test_split(test, test_size=0.5)
|
||||
|
||||
print("All data: ", results.size)
|
||||
print("Train size: ", train.size)
|
||||
print("Test size: ", test.size)
|
||||
print("Validate size: ", valid.size)
|
||||
print(results.describe(include='all'))
|
||||
|
||||
# sprawdzenie czy cały dataset oraz podział na podzbiory jest równy
|
||||
print(train.size+test.size+valid.size)
|
Loading…
Reference in New Issue
Block a user