This commit is contained in:
Mikołaj Pokrywka 2022-03-27 21:55:59 +02:00
parent e266ae9e53
commit 2b62b40568
2 changed files with 4 additions and 3 deletions

2
Jenkinsfile vendored
View File

@ -13,7 +13,7 @@ pipeline {
name: 'KAGGLE_KEY'
)
string (
defaultValue: '10000',
defaultValue: '17000',
description: 'cut data',
name: 'CUTOFF',
trim: false

View File

@ -7,8 +7,9 @@ head -n 1 fake_job_postings.csv > column_titles.csv
tail -n +2 fake_job_postings.csv > data_not_shuf.csv
echo "Create sets"
shuf data_not_shuf.csv > data_not_cutted.csv
head -n $1 data_not_cutted.csv > data.csv
head -n $CUTOFF data_not_cutted.csv > data.csv
sed -n '1,2500p' data.csv > data_test.csv
sed -n '2501,5000p' data.csv > data_dev.csv
tail -n +5001 data.csv > data_train.csv
rm data.csv real-or-fake-fake-jobposting-prediction.zip fake_job_postings.csv column_titles.csv data_not_shuf.csv data_not_cutted.csv
rm data.csv real-or-fake-fake-jobposting-prediction.zip fake_job_postings.csv column_titles.csv data_not_shuf.csv data_not_cutted.csv
echo "BASH SCRIPT SUCCES"