This commit is contained in:
Adam Wojdyla 2022-03-27 14:03:36 +02:00
parent c1c0e10e50
commit 80326968b1
4 changed files with 45 additions and 58 deletions

12
Jenkinsfile vendored
View File

@ -12,6 +12,12 @@ pipeline {
description: 'Kaggle token',
name: 'KAGGLE_KEY'
)
string(
defaultValue: '1',
description: 'Cutoff',
name: 'CUTOFF',
trim: false
)
}
stages {
stage('Checkout') {
@ -26,11 +32,11 @@ pipeline {
stage('Script') {
steps {
script {
withEnv(["KAGGLE_USERNAME=${params.KAGGLE_USERNAME}", "KAGGLE_KEY=${params.KAGGLE_KEY}"]) {
withEnv(["KAGGLE_USERNAME=${params.KAGGLE_USERNAME}", "KAGGLE_KEY=${params.KAGGLE_KEY}", "CUTOFF=${params.CUTOFF}"]) {
sh 'echo KAGGLE_USERNAME: $KAGGLE_USERNAME'
sh 'kaggle datasets list'
sh './download.sh'
sh './download.sh $CUTOFF > ./script_logs.txt'
archiveArtifacts artifacts: 'car_prices.csv.dev, car_prices.csv.test, car_prices.csv.train', followSymlinks: false
}
}
}

View File

@ -1,53 +0,0 @@
pipeline {
agent any
parameters {
string(
defaultValue: 'heatedboss2',
description: 'Kaggle username',
name: 'KAGGLE_USERNAME',
trim: false
)
password(
defaultValue: '',
description: 'Kaggle token taken from kaggle.json file, as described in https://github.com/Kaggle/kaggle-api#api-credentials',
name: 'KAGGLE_KEY'
)
string(
defaultValue: '1',
description: 'Cutoff lines',
name: 'CUTOFF'
)
}
environment {
KAGGLE_USERNAME="$params.KAGGLE_USERNAME"
KAGGLE_KEY="$params.KAGGLE_KEY"
}
stages {
stage('Checkout') {
steps {
checkout([$class: 'GitSCM', branches: [
[name: '*/master']
], extensions: [], userRemoteConfigs: [
[credentialsId: '8b8d54ee-f03c-4980-90b1-959faa97082b', url: 'https://git.wmi.amu.edu.pl/s444507/ium_444507.git']
]])
}
}
stage('Script'){
steps {
script {
withEnv(["KAGGLE_USERNAME=${params.KAGGLE_USERNAME}",
"KAGGLE_KEY=${params.KAGGLE_KEY}" ]) {
sh 'export KAGGLE_USERNAME=${params.KAGGLE_USERNAME}'
sh 'export KAGGLE_KEY=${params.KAGGLE_KEY}'
sh 'echo KAGGLE_USERNAME: $KAGGLE_USERNAME'
sh 'kaggle datasets list'
}
}
sh './download.sh'
}
}
}
}

View File

@ -1,2 +1,37 @@
#!/bin/bash
echo 'Downloading Dataset'
kaggle datasets download -d aleksandrglotov/car-prices-poland
kaggle datasets download -d aleksandrglotov/car-prices-poland
echo 'Dataset downloaded'
echo 'Unzippig Dataset'
unzip -o car-prices-poland.zip
echo 'Dataset unzipped'
len=$(cat ./Car_Prices_Poland_Kaggle.csv | wc -l)
echo 'Initial dataset count:' $len
echo 'CUTOFF VALUE: ' $1
echo 'Skip first header row and shuffle'
# example in materials don't work (head -n -1)
tail -n +2 Car_Prices_Poland_Kaggle.csv | shuf | head -n $1 > ./Car_Prices_Poland_Kaggle_shuf.csv
echo 'Shuffled'
len=$(cat ./Car_Prices_Poland_Kaggle_shuf.csv | wc -l)
echo 'Dataset count after cutoff:' $len
len1=$(($len/6))
len2=$(($len1*2+1))
echo 'len: '$len
echo 'len1: '$len1
echo 'len2: '$len2
echo 'Divide and save to files'
head -n $len1 Car_Prices_Poland_Kaggle_shuf.csv> car_prices.csv.test
head -n $len1 Car_Prices_Poland_Kaggle_shuf.csv| tail -n $len1 > car_prices.csv.dev
tail -n +$len2 Car_Prices_Poland_Kaggle_shuf.csv> car_prices.csv.train
rm ./Car_Prices_Poland_Kaggle_shuf.csv
echo 'Divided datasets count'
wc -l car_prices.csv.*

View File

@ -1 +0,0 @@
echo 'Downloading Dataset'