script fix

This commit is contained in:
s434765 2021-03-27 21:39:10 +01:00
parent 4da4bb212a
commit 00e3446c93
2 changed files with 7 additions and 9 deletions

9
Jenkinsfile vendored
View File

@ -9,8 +9,11 @@ node {
trim: false), trim: false),
password(defaultValue: '', password(defaultValue: '',
description: 'Kaggle token', description: 'Kaggle token',
name: 'KAGGLE_KEY') name: 'KAGGLE_KEY'),
string(defaultV string(defaultValue: '5000',
description: 'Data cutoff',
name: 'CUTOFF',
trim: false),
]) ])
] ]
) )
@ -22,7 +25,7 @@ node {
sh ''' sh '''
#!/usr/bin/env bash #!/usr/bin/env bash
chmod 777 get_data_simple.sh chmod 777 get_data_simple.sh
./get_data_simple.sh ./get_data_simple.sh ${params.CUTOFF}"
''' '''
archiveArtifacts "data_dev" archiveArtifacts "data_dev"
archiveArtifacts "data_shuf" archiveArtifacts "data_shuf"

View File

@ -5,15 +5,10 @@ if kaggle datasets download -d sgonkaggle/youtube-trend-with-subscriber && unzip
COUNT=$(wc -l "USvideos_modified.csv") COUNT=$(wc -l "USvideos_modified.csv")
echo "${COUNT}" echo "${COUNT}"
head -n -1 "USvideos_modified.csv" | shuf > "data_shuf" head -n -1 "USvideos_modified.csv" | shuf > "data_shuf"
head -n 544 "data_shuf" > "data_test" head -n "$1" "data_shuf" > "data_train"
head -n 1088 "data_shuf" | tail -n 544 > "data_dev"
head -n +1089 "data_shuf" > "data_train"
echo "Shuffled dataset" echo "Shuffled dataset"
wc -l "data_shuf" wc -l "data_shuf"
echo "Test dataset" echo "Test dataset"
wc -l "data_test"
echo "Dev dataset"
wc -l "data_dev"
echo "Train dataset" echo "Train dataset"
wc -l "data_train" wc -l "data_train"
python main.py USvideos_modified.csv python main.py USvideos_modified.csv