From 3daf373d65565f75809bfc52cbc8407a5d9d7cce Mon Sep 17 00:00:00 2001 From: Robert Bendun Date: Wed, 29 Mar 2023 01:18:17 +0200 Subject: [PATCH] train & test sets --- Jenkinsfile | 2 +- prepare-ztm-data.sh | 14 +++++++++----- 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index 22b385e..70a2d05 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -2,7 +2,7 @@ node { stage('Build') { checkout([$class: 'GitSCM', branches: [[name: 'ztm']], extensions: [], userRemoteConfigs: [[url: 'https://git.wmi.amu.edu.pl/s452639/ium_452639']]]) sh './prepare-ztm-data.sh' - archiveArtifacts artifacts: 'stop_times.csv', followSymlinks: false + archiveArtifacts artifacts: 'train.csv,test.csv', followSymlinks: false } } diff --git a/prepare-ztm-data.sh b/prepare-ztm-data.sh index ee00d58..4b10638 100755 --- a/prepare-ztm-data.sh +++ b/prepare-ztm-data.sh @@ -5,7 +5,7 @@ set -xe -o pipefail # Disable to allow to work in bare jenkins # make normalize csv2tsv/csv2tsv -keep=(stops.txt trips.txt stop_times.txt) +keep=stop_times.txt mkdir -p data && cd data xargs -- wget --no-verbose --no-clobber <../ztm-data.txt @@ -21,10 +21,14 @@ done cd .. -for k in "${keep[@]}"; do - csv="${k%.txt}.csv" - cat $(find data -name "$k") > "$csv" -done +k=$keep +csv="${k%.txt}.csv" +cat $(find data -name "$k") | shuf > "$csv" +train_size=$(( $(wc -l "$csv" | cut -f1 -d' ') * 8 / 10 )) +echo $train_size + +head -n $train_size $csv >train.csv +tail -n +$train_size $csv >test.csv # Disable to allow to work in bare jenkins # for k in "${keep[@]}"; do