diff --git a/Jenkinsfile b/Jenkinsfile index 7806f7b..c7d092f 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -13,7 +13,7 @@ pipeline { name: 'KAGGLE_KEY' ) string ( - defaultValue: '10000', + defaultValue: '17000', description: 'cut data', name: 'CUTOFF', trim: false diff --git a/process_data.sh b/process_data.sh index 67c3dee..080fdae 100755 --- a/process_data.sh +++ b/process_data.sh @@ -7,8 +7,9 @@ head -n 1 fake_job_postings.csv > column_titles.csv tail -n +2 fake_job_postings.csv > data_not_shuf.csv echo "Create sets" shuf data_not_shuf.csv > data_not_cutted.csv -head -n $1 data_not_cutted.csv > data.csv +head -n $CUTOFF data_not_cutted.csv > data.csv sed -n '1,2500p' data.csv > data_test.csv sed -n '2501,5000p' data.csv > data_dev.csv tail -n +5001 data.csv > data_train.csv -rm data.csv real-or-fake-fake-jobposting-prediction.zip fake_job_postings.csv column_titles.csv data_not_shuf.csv data_not_cutted.csv \ No newline at end of file +rm data.csv real-or-fake-fake-jobposting-prediction.zip fake_job_postings.csv column_titles.csv data_not_shuf.csv data_not_cutted.csv +echo "BASH SCRIPT SUCCES" \ No newline at end of file