download dataset
This commit is contained in:
parent
424e4b2478
commit
060de23459
28
Jenkinsfile
vendored
28
Jenkinsfile
vendored
@ -1,10 +1,32 @@
|
|||||||
pipeline {
|
pipeline {
|
||||||
agent any
|
agent any
|
||||||
|
|
||||||
|
parameters {
|
||||||
|
string(name: 'CUTOFF', defaultValue: '100', description: 'Ilość wierszy do odcięcia')
|
||||||
|
string(name: 'KAGGLE_USERNAME', defaultValue: '', description: 'Kaggle username')
|
||||||
|
password(name: 'KAGGLE_KEY', defaultValue: '', description: 'Kaggle API key')
|
||||||
|
}
|
||||||
|
|
||||||
stages {
|
stages {
|
||||||
stage('Stage 1') {
|
stage('Clone repo') {
|
||||||
steps {
|
steps {
|
||||||
echo 'elo 420 v2'
|
git url: "https://git.wmi.amu.edu.pl/s464937/ium_464937"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
stage('Pobierz i przeprocesuj zbiór') {
|
||||||
|
steps {
|
||||||
|
withEnv([
|
||||||
|
"KAGGLE_USERNAME=${env.KAGGLE_USERNAME}",
|
||||||
|
"KAGGLE_KEY=${env.KAGGLE_KEY}"
|
||||||
|
]) {
|
||||||
|
sh "bash ./script1.sh ${params.CUTOFF}"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
stage('Archive Results') {
|
||||||
|
steps {
|
||||||
|
archiveArtifacts artifacts: 'data/*', onlyIfSuccessful: true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
18
script1.sh
Normal file
18
script1.sh
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
pip install kaggle
|
||||||
|
kaggle datasets download -d open-powerlifting/powerlifting-database
|
||||||
|
unzip -o powerlifting-database.zip
|
||||||
|
DATASET_FILE="openpowerlifting.csv"
|
||||||
|
echo "Obcięte wiersze: ${1}"
|
||||||
|
head -n $1 $DATASET_FILE > cutoff_$DATASET_FILE
|
||||||
|
echo "Podział i wymieszanie"
|
||||||
|
total_lines=$(tail -n +2 cutoff_$DATASET_FILE | wc -l)
|
||||||
|
train_lines=$((total_lines * 90 / 100))
|
||||||
|
dev_lines=$((total_lines * 10 / 100))
|
||||||
|
test_lines=$((total_lines - train_lines - dev_lines))
|
||||||
|
shuf cutoff_$DATASET_FILE -o shuffled.csv
|
||||||
|
head -n $train_lines shuffled.csv > train.csv
|
||||||
|
tail -n $((dev_lines + test_lines)) shuffled.csv | head -n $dev_lines > dev.csv
|
||||||
|
tail -n $test_lines shuffled.csv > test.csv
|
||||||
|
mkdir -p data
|
||||||
|
mv train.csv dev.csv test.csv data/
|
Loading…
Reference in New Issue
Block a user