cutoff
This commit is contained in:
parent
c1c0e10e50
commit
80326968b1
12
Jenkinsfile
vendored
12
Jenkinsfile
vendored
@ -12,6 +12,12 @@ pipeline {
|
|||||||
description: 'Kaggle token',
|
description: 'Kaggle token',
|
||||||
name: 'KAGGLE_KEY'
|
name: 'KAGGLE_KEY'
|
||||||
)
|
)
|
||||||
|
string(
|
||||||
|
defaultValue: '1',
|
||||||
|
description: 'Cutoff',
|
||||||
|
name: 'CUTOFF',
|
||||||
|
trim: false
|
||||||
|
)
|
||||||
}
|
}
|
||||||
stages {
|
stages {
|
||||||
stage('Checkout') {
|
stage('Checkout') {
|
||||||
@ -26,11 +32,11 @@ pipeline {
|
|||||||
stage('Script') {
|
stage('Script') {
|
||||||
steps {
|
steps {
|
||||||
script {
|
script {
|
||||||
withEnv(["KAGGLE_USERNAME=${params.KAGGLE_USERNAME}", "KAGGLE_KEY=${params.KAGGLE_KEY}"]) {
|
withEnv(["KAGGLE_USERNAME=${params.KAGGLE_USERNAME}", "KAGGLE_KEY=${params.KAGGLE_KEY}", "CUTOFF=${params.CUTOFF}"]) {
|
||||||
sh 'echo KAGGLE_USERNAME: $KAGGLE_USERNAME'
|
sh 'echo KAGGLE_USERNAME: $KAGGLE_USERNAME'
|
||||||
sh 'kaggle datasets list'
|
sh 'kaggle datasets list'
|
||||||
sh './download.sh'
|
sh './download.sh $CUTOFF > ./script_logs.txt'
|
||||||
|
archiveArtifacts artifacts: 'car_prices.csv.dev, car_prices.csv.test, car_prices.csv.train', followSymlinks: false
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
53
Jenkinsfile2
53
Jenkinsfile2
@ -1,53 +0,0 @@
|
|||||||
pipeline {
|
|
||||||
agent any
|
|
||||||
|
|
||||||
parameters {
|
|
||||||
string(
|
|
||||||
defaultValue: 'heatedboss2',
|
|
||||||
description: 'Kaggle username',
|
|
||||||
name: 'KAGGLE_USERNAME',
|
|
||||||
trim: false
|
|
||||||
)
|
|
||||||
password(
|
|
||||||
defaultValue: '',
|
|
||||||
description: 'Kaggle token taken from kaggle.json file, as described in https://github.com/Kaggle/kaggle-api#api-credentials',
|
|
||||||
name: 'KAGGLE_KEY'
|
|
||||||
)
|
|
||||||
string(
|
|
||||||
defaultValue: '1',
|
|
||||||
description: 'Cutoff lines',
|
|
||||||
name: 'CUTOFF'
|
|
||||||
)
|
|
||||||
}
|
|
||||||
environment {
|
|
||||||
KAGGLE_USERNAME="$params.KAGGLE_USERNAME"
|
|
||||||
KAGGLE_KEY="$params.KAGGLE_KEY"
|
|
||||||
}
|
|
||||||
|
|
||||||
stages {
|
|
||||||
stage('Checkout') {
|
|
||||||
steps {
|
|
||||||
checkout([$class: 'GitSCM', branches: [
|
|
||||||
[name: '*/master']
|
|
||||||
], extensions: [], userRemoteConfigs: [
|
|
||||||
[credentialsId: '8b8d54ee-f03c-4980-90b1-959faa97082b', url: 'https://git.wmi.amu.edu.pl/s444507/ium_444507.git']
|
|
||||||
]])
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
stage('Script'){
|
|
||||||
steps {
|
|
||||||
script {
|
|
||||||
withEnv(["KAGGLE_USERNAME=${params.KAGGLE_USERNAME}",
|
|
||||||
"KAGGLE_KEY=${params.KAGGLE_KEY}" ]) {
|
|
||||||
sh 'export KAGGLE_USERNAME=${params.KAGGLE_USERNAME}'
|
|
||||||
sh 'export KAGGLE_KEY=${params.KAGGLE_KEY}'
|
|
||||||
sh 'echo KAGGLE_USERNAME: $KAGGLE_USERNAME'
|
|
||||||
sh 'kaggle datasets list'
|
|
||||||
}
|
|
||||||
}
|
|
||||||
sh './download.sh'
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
37
download.sh
37
download.sh
@ -1,2 +1,37 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
|
||||||
echo 'Downloading Dataset'
|
echo 'Downloading Dataset'
|
||||||
kaggle datasets download -d aleksandrglotov/car-prices-poland
|
kaggle datasets download -d aleksandrglotov/car-prices-poland
|
||||||
|
echo 'Dataset downloaded'
|
||||||
|
|
||||||
|
echo 'Unzippig Dataset'
|
||||||
|
unzip -o car-prices-poland.zip
|
||||||
|
echo 'Dataset unzipped'
|
||||||
|
|
||||||
|
len=$(cat ./Car_Prices_Poland_Kaggle.csv | wc -l)
|
||||||
|
echo 'Initial dataset count:' $len
|
||||||
|
|
||||||
|
echo 'CUTOFF VALUE: ' $1
|
||||||
|
|
||||||
|
echo 'Skip first header row and shuffle'
|
||||||
|
# example in materials don't work (head -n -1)
|
||||||
|
tail -n +2 Car_Prices_Poland_Kaggle.csv | shuf | head -n $1 > ./Car_Prices_Poland_Kaggle_shuf.csv
|
||||||
|
echo 'Shuffled'
|
||||||
|
|
||||||
|
len=$(cat ./Car_Prices_Poland_Kaggle_shuf.csv | wc -l)
|
||||||
|
echo 'Dataset count after cutoff:' $len
|
||||||
|
len1=$(($len/6))
|
||||||
|
len2=$(($len1*2+1))
|
||||||
|
echo 'len: '$len
|
||||||
|
echo 'len1: '$len1
|
||||||
|
echo 'len2: '$len2
|
||||||
|
|
||||||
|
echo 'Divide and save to files'
|
||||||
|
head -n $len1 Car_Prices_Poland_Kaggle_shuf.csv> car_prices.csv.test
|
||||||
|
head -n $len1 Car_Prices_Poland_Kaggle_shuf.csv| tail -n $len1 > car_prices.csv.dev
|
||||||
|
tail -n +$len2 Car_Prices_Poland_Kaggle_shuf.csv> car_prices.csv.train
|
||||||
|
rm ./Car_Prices_Poland_Kaggle_shuf.csv
|
||||||
|
|
||||||
|
echo 'Divided datasets count'
|
||||||
|
wc -l car_prices.csv.*
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user