Zadanie 2 część 2

This commit is contained in:
Anna Nowak 2021-04-11 23:06:50 +02:00
parent 9866366769
commit 0a38aa7d86
6 changed files with 26 additions and 28 deletions

View File

@ -7,3 +7,4 @@ COPY ["requirements.txt", "./"]
RUN pip3 install -r requirements.txt
COPY ["Zadanie 1.py", "."]
COPY ["stats.py", "."]

2
Jenkinsfile vendored
View File

@ -16,7 +16,7 @@ pipeline {
)
}
stages {
stage('docker: Download artifacts')
stage('download and process data')
{
steps {
withEnv(["KAGGLE_USERNAME=${params.KAGGLE_USERNAME}",

View File

@ -1,5 +1,7 @@
pipeline {
agent any
agent {
docker { image 'adnovac/ium_s434760:latest' }
}
parameters{
buildSelector(
defaultSelector: lastSuccessful(),
@ -8,23 +10,12 @@ pipeline {
)
}
stages {
stage('checkout: Check out from version control') {
steps {
git 'https://git.wmi.amu.edu.pl/s434760/ium_434760.git'
}
}
stage('copy artifacts')
{
steps
{
copyArtifacts(fingerprintArtifacts: true, projectName: 's434760-create-dataset', selector: buildParameter('WHICH_BUILD'))
}
}
stage('sh: Shell Script')
{
steps {
sh "chmod 777 ./script_stat.sh"
sh './script_stat.sh'
sh 'python3.8 stats.py'
}
}
stage('archive artifacts') {

View File

@ -1,9 +0,0 @@
kaggle datasets download -d karangadiya/fifa19
unzip -o fifa19.zip
x=$(($CUTOFF/10))
head -n 1 data.csv > header.csv
tail -n +2 data.csv| tail -n $CUTOFF | shuf > data_shuf.csv
head -n $(($x*2)) data_shuf.csv > test.csv
head -n $(($x*2)) data_shuf.csv | tail -n 3330 > validation.csv
tail -n $(($x*6)) data_shuf.csv > train.csv

View File

@ -1,4 +0,0 @@
wc -l data_shuf.csv >> stat.txt
wc -l train.csv >> stat.txt
wc -l test.csv >> stat.txt
wc -l validation.csv >> stat.txt

19
stats.py Normal file
View File

@ -0,0 +1,19 @@
stats = []
with open("data.csv", "r") as f:
lines_count = len(f.readlines())
stats.append(f"data.csv - {lines_count} lines\n")
with open("dev.csv", "r") as f:
lines_count = len(f.readlines())
stats.append(f"dev.csv - {lines_count} lines\n")
with open("test.csv", "r") as f:
lines_count = len(f.readlines())
stats.append(f"test.csv - {lines_count} lines\n")
with open("train.csv", "r") as f:
lines_count = len(f.readlines())
stats.append(f"train.csv - {lines_count} lines\n")
with open("stat.txt", "w+") as f:
f.writelines(stats)