diff --git a/Dockerfile b/Dockerfile index bf742ff..edede8b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -6,4 +6,5 @@ WORKDIR /app COPY ["requirements.txt", "./"] RUN pip3 install -r requirements.txt -COPY ["Zadanie 1.py", "."] \ No newline at end of file +COPY ["Zadanie 1.py", "."] +COPY ["stats.py", "."] \ No newline at end of file diff --git a/Jenkinsfile b/Jenkinsfile index 1684900..37c870b 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -16,7 +16,7 @@ pipeline { ) } stages { - stage('docker: Download artifacts') + stage('download and process data') { steps { withEnv(["KAGGLE_USERNAME=${params.KAGGLE_USERNAME}", diff --git a/Jenkinsfile-stat b/Jenkinsfile-stat index f0bf12d..78cb4ea 100644 --- a/Jenkinsfile-stat +++ b/Jenkinsfile-stat @@ -1,5 +1,7 @@ pipeline { - agent any + agent { + docker { image 'adnovac/ium_s434760:latest' } + } parameters{ buildSelector( defaultSelector: lastSuccessful(), @@ -8,23 +10,12 @@ pipeline { ) } stages { - stage('checkout: Check out from version control') { - steps { - git 'https://git.wmi.amu.edu.pl/s434760/ium_434760.git' - } - } stage('copy artifacts') { steps { copyArtifacts(fingerprintArtifacts: true, projectName: 's434760-create-dataset', selector: buildParameter('WHICH_BUILD')) - } - } - stage('sh: Shell Script') - { - steps { - sh "chmod 777 ./script_stat.sh" - sh './script_stat.sh' + sh 'python3.8 stats.py' } } stage('archive artifacts') { diff --git a/script.sh b/script.sh deleted file mode 100644 index be7bf04..0000000 --- a/script.sh +++ /dev/null @@ -1,9 +0,0 @@ -kaggle datasets download -d karangadiya/fifa19 -unzip -o fifa19.zip - -x=$(($CUTOFF/10)) -head -n 1 data.csv > header.csv -tail -n +2 data.csv| tail -n $CUTOFF | shuf > data_shuf.csv -head -n $(($x*2)) data_shuf.csv > test.csv -head -n $(($x*2)) data_shuf.csv | tail -n 3330 > validation.csv -tail -n $(($x*6)) data_shuf.csv > train.csv \ No newline at end of file diff --git a/script_stat.sh b/script_stat.sh deleted file mode 100644 index 5b910d6..0000000 --- a/script_stat.sh +++ /dev/null @@ -1,4 +0,0 @@ -wc -l data_shuf.csv >> stat.txt -wc -l train.csv >> stat.txt -wc -l test.csv >> stat.txt -wc -l validation.csv >> stat.txt \ No newline at end of file diff --git a/stats.py b/stats.py new file mode 100644 index 0000000..9c3e36e --- /dev/null +++ b/stats.py @@ -0,0 +1,19 @@ +stats = [] +with open("data.csv", "r") as f: + lines_count = len(f.readlines()) + stats.append(f"data.csv - {lines_count} lines\n") + +with open("dev.csv", "r") as f: + lines_count = len(f.readlines()) + stats.append(f"dev.csv - {lines_count} lines\n") + +with open("test.csv", "r") as f: + lines_count = len(f.readlines()) + stats.append(f"test.csv - {lines_count} lines\n") + +with open("train.csv", "r") as f: + lines_count = len(f.readlines()) + stats.append(f"train.csv - {lines_count} lines\n") + +with open("stat.txt", "w+") as f: + f.writelines(stats) \ No newline at end of file