cutoff
This commit is contained in:
parent
c1c0e10e50
commit
80326968b1
12
Jenkinsfile
vendored
12
Jenkinsfile
vendored
@ -12,6 +12,12 @@ pipeline {
|
||||
description: 'Kaggle token',
|
||||
name: 'KAGGLE_KEY'
|
||||
)
|
||||
string(
|
||||
defaultValue: '1',
|
||||
description: 'Cutoff',
|
||||
name: 'CUTOFF',
|
||||
trim: false
|
||||
)
|
||||
}
|
||||
stages {
|
||||
stage('Checkout') {
|
||||
@ -26,11 +32,11 @@ pipeline {
|
||||
stage('Script') {
|
||||
steps {
|
||||
script {
|
||||
withEnv(["KAGGLE_USERNAME=${params.KAGGLE_USERNAME}", "KAGGLE_KEY=${params.KAGGLE_KEY}"]) {
|
||||
withEnv(["KAGGLE_USERNAME=${params.KAGGLE_USERNAME}", "KAGGLE_KEY=${params.KAGGLE_KEY}", "CUTOFF=${params.CUTOFF}"]) {
|
||||
sh 'echo KAGGLE_USERNAME: $KAGGLE_USERNAME'
|
||||
sh 'kaggle datasets list'
|
||||
sh './download.sh'
|
||||
|
||||
sh './download.sh $CUTOFF > ./script_logs.txt'
|
||||
archiveArtifacts artifacts: 'car_prices.csv.dev, car_prices.csv.test, car_prices.csv.train', followSymlinks: false
|
||||
}
|
||||
}
|
||||
}
|
||||
|
53
Jenkinsfile2
53
Jenkinsfile2
@ -1,53 +0,0 @@
|
||||
pipeline {
|
||||
agent any
|
||||
|
||||
parameters {
|
||||
string(
|
||||
defaultValue: 'heatedboss2',
|
||||
description: 'Kaggle username',
|
||||
name: 'KAGGLE_USERNAME',
|
||||
trim: false
|
||||
)
|
||||
password(
|
||||
defaultValue: '',
|
||||
description: 'Kaggle token taken from kaggle.json file, as described in https://github.com/Kaggle/kaggle-api#api-credentials',
|
||||
name: 'KAGGLE_KEY'
|
||||
)
|
||||
string(
|
||||
defaultValue: '1',
|
||||
description: 'Cutoff lines',
|
||||
name: 'CUTOFF'
|
||||
)
|
||||
}
|
||||
environment {
|
||||
KAGGLE_USERNAME="$params.KAGGLE_USERNAME"
|
||||
KAGGLE_KEY="$params.KAGGLE_KEY"
|
||||
}
|
||||
|
||||
stages {
|
||||
stage('Checkout') {
|
||||
steps {
|
||||
checkout([$class: 'GitSCM', branches: [
|
||||
[name: '*/master']
|
||||
], extensions: [], userRemoteConfigs: [
|
||||
[credentialsId: '8b8d54ee-f03c-4980-90b1-959faa97082b', url: 'https://git.wmi.amu.edu.pl/s444507/ium_444507.git']
|
||||
]])
|
||||
}
|
||||
}
|
||||
|
||||
stage('Script'){
|
||||
steps {
|
||||
script {
|
||||
withEnv(["KAGGLE_USERNAME=${params.KAGGLE_USERNAME}",
|
||||
"KAGGLE_KEY=${params.KAGGLE_KEY}" ]) {
|
||||
sh 'export KAGGLE_USERNAME=${params.KAGGLE_USERNAME}'
|
||||
sh 'export KAGGLE_KEY=${params.KAGGLE_KEY}'
|
||||
sh 'echo KAGGLE_USERNAME: $KAGGLE_USERNAME'
|
||||
sh 'kaggle datasets list'
|
||||
}
|
||||
}
|
||||
sh './download.sh'
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
35
download.sh
35
download.sh
@ -1,2 +1,37 @@
|
||||
#!/bin/bash
|
||||
|
||||
echo 'Downloading Dataset'
|
||||
kaggle datasets download -d aleksandrglotov/car-prices-poland
|
||||
echo 'Dataset downloaded'
|
||||
|
||||
echo 'Unzippig Dataset'
|
||||
unzip -o car-prices-poland.zip
|
||||
echo 'Dataset unzipped'
|
||||
|
||||
len=$(cat ./Car_Prices_Poland_Kaggle.csv | wc -l)
|
||||
echo 'Initial dataset count:' $len
|
||||
|
||||
echo 'CUTOFF VALUE: ' $1
|
||||
|
||||
echo 'Skip first header row and shuffle'
|
||||
# example in materials don't work (head -n -1)
|
||||
tail -n +2 Car_Prices_Poland_Kaggle.csv | shuf | head -n $1 > ./Car_Prices_Poland_Kaggle_shuf.csv
|
||||
echo 'Shuffled'
|
||||
|
||||
len=$(cat ./Car_Prices_Poland_Kaggle_shuf.csv | wc -l)
|
||||
echo 'Dataset count after cutoff:' $len
|
||||
len1=$(($len/6))
|
||||
len2=$(($len1*2+1))
|
||||
echo 'len: '$len
|
||||
echo 'len1: '$len1
|
||||
echo 'len2: '$len2
|
||||
|
||||
echo 'Divide and save to files'
|
||||
head -n $len1 Car_Prices_Poland_Kaggle_shuf.csv> car_prices.csv.test
|
||||
head -n $len1 Car_Prices_Poland_Kaggle_shuf.csv| tail -n $len1 > car_prices.csv.dev
|
||||
tail -n +$len2 Car_Prices_Poland_Kaggle_shuf.csv> car_prices.csv.train
|
||||
rm ./Car_Prices_Poland_Kaggle_shuf.csv
|
||||
|
||||
echo 'Divided datasets count'
|
||||
wc -l car_prices.csv.*
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user