train & test sets

This commit is contained in:
Robert Bendun 2023-03-29 01:18:17 +02:00
parent 7661ce87b7
commit 3daf373d65
2 changed files with 10 additions and 6 deletions

2
Jenkinsfile vendored
View File

@ -2,7 +2,7 @@ node {
stage('Build') { stage('Build') {
checkout([$class: 'GitSCM', branches: [[name: 'ztm']], extensions: [], userRemoteConfigs: [[url: 'https://git.wmi.amu.edu.pl/s452639/ium_452639']]]) checkout([$class: 'GitSCM', branches: [[name: 'ztm']], extensions: [], userRemoteConfigs: [[url: 'https://git.wmi.amu.edu.pl/s452639/ium_452639']]])
sh './prepare-ztm-data.sh' sh './prepare-ztm-data.sh'
archiveArtifacts artifacts: 'stop_times.csv', followSymlinks: false archiveArtifacts artifacts: 'train.csv,test.csv', followSymlinks: false
} }
} }

View File

@ -5,7 +5,7 @@ set -xe -o pipefail
# Disable to allow to work in bare jenkins # Disable to allow to work in bare jenkins
# make normalize csv2tsv/csv2tsv # make normalize csv2tsv/csv2tsv
keep=(stops.txt trips.txt stop_times.txt) keep=stop_times.txt
mkdir -p data && cd data mkdir -p data && cd data
xargs -- wget --no-verbose --no-clobber <../ztm-data.txt xargs -- wget --no-verbose --no-clobber <../ztm-data.txt
@ -21,10 +21,14 @@ done
cd .. cd ..
for k in "${keep[@]}"; do k=$keep
csv="${k%.txt}.csv" csv="${k%.txt}.csv"
cat $(find data -name "$k") > "$csv" cat $(find data -name "$k") | shuf > "$csv"
done train_size=$(( $(wc -l "$csv" | cut -f1 -d' ') * 8 / 10 ))
echo $train_size
head -n $train_size $csv >train.csv
tail -n +$train_size $csv >test.csv
# Disable to allow to work in bare jenkins # Disable to allow to work in bare jenkins
# for k in "${keep[@]}"; do # for k in "${keep[@]}"; do