changed Jenkinsfile
This commit is contained in:
parent
e0e26bfe1d
commit
430f10ce26
3
Jenkinsfile
vendored
3
Jenkinsfile
vendored
@ -4,6 +4,7 @@ pipeline {
|
|||||||
parameters {
|
parameters {
|
||||||
string(name: 'KAGGLE_USERNAME', defaultValue: 'gulczas', description: 'Kaggle username')
|
string(name: 'KAGGLE_USERNAME', defaultValue: 'gulczas', description: 'Kaggle username')
|
||||||
password(name: 'KAGGLE_KEY', defaultValue: '', description: 'Kaggle API key')
|
password(name: 'KAGGLE_KEY', defaultValue: '', description: 'Kaggle API key')
|
||||||
|
string(name: 'CUTOFF', defaultValue: '10', description: 'Number of rows to cut')
|
||||||
}
|
}
|
||||||
|
|
||||||
stages {
|
stages {
|
||||||
@ -19,7 +20,7 @@ pipeline {
|
|||||||
"KAGGLE_USERNAME=${env.KAGGLE_USERNAME}",
|
"KAGGLE_USERNAME=${env.KAGGLE_USERNAME}",
|
||||||
"KAGGLE_KEY=${env.KAGGLE_KEY}"])
|
"KAGGLE_KEY=${env.KAGGLE_KEY}"])
|
||||||
{
|
{
|
||||||
sh 'bash ./download_dataset.sh'
|
sh 'bash ./download_dataset.sh ${params.CUTOFF}'
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -4,36 +4,23 @@ pip install kaggle --upgrade
|
|||||||
|
|
||||||
kaggle datasets download -d gulczas/spotify-dataset
|
kaggle datasets download -d gulczas/spotify-dataset
|
||||||
|
|
||||||
unzip spotify-dataset.zip
|
unzip -o spotify-dataset.zip
|
||||||
|
|
||||||
|
echo "------------------ Shufle ------------------"
|
||||||
shuf Spotify_Dataset.csv -o shuffled_spotify.csv
|
shuf Spotify_Dataset.csv -o shuffled_spotify.csv
|
||||||
|
|
||||||
head -n 100 shuffled_spotify.csv > subset1.csv
|
echo "------------------ Cut off top: ${1} rows ------------------"
|
||||||
tail -n 100 shuffled_spotify.csv > subset2.csv
|
head -n $1 shuffled_spotify.csv > cutoff_spotify.csv
|
||||||
|
|
||||||
cut -d ',' -f 1,2,3 shuffled_spotify.csv > trimmed_spotify.csv
|
echo "------------------ Split ------------------"
|
||||||
|
total_lines=$(wc -l < cutoff_spotify.csv)
|
||||||
|
num_test=$((total_lines / 10))
|
||||||
|
num_train=$((total_lines - (num_test * 2)))
|
||||||
|
num_validation=$num_test
|
||||||
|
|
||||||
cut -d ',' -f 1,2,4,5,6 shuffled_spotify.csv > processed_spotify.csv
|
head -n $num_train cutoff_spotify.csv > train.csv
|
||||||
|
tail -n $num_test cutoff_spotify.csv | head -n $num_validation > validation.csv
|
||||||
echo "Shuffled dataset:" > results.txt
|
tail -n $num_test cutoff_spotify.csv | tail -n +$(($num_validation + 1)) > test.csv
|
||||||
head shuffled_spotify.csv >> results.txt
|
|
||||||
echo "" >> results.txt
|
|
||||||
|
|
||||||
echo "Subset 1:" >> results.txt
|
|
||||||
head subset1.csv >> results.txt
|
|
||||||
echo "" >> results.txt
|
|
||||||
|
|
||||||
echo "Subset 2:" >> results.txt
|
|
||||||
head subset2.csv >> results.txt
|
|
||||||
echo "" >> results.txt
|
|
||||||
|
|
||||||
echo "Trimmed dataset:" >> results.txt
|
|
||||||
head trimmed_spotify.csv >> results.txt
|
|
||||||
echo "" >> results.txt
|
|
||||||
|
|
||||||
echo "Processed dataset:" >> results.txt
|
|
||||||
head processed_spotify.csv >> results.txt
|
|
||||||
echo "" >> results.txt
|
|
||||||
|
|
||||||
mkdir -p artifacts
|
mkdir -p artifacts
|
||||||
mv shuffled_spotify.csv subset1.csv subset2.csv trimmed_spotify.csv processed_spotify.csv results.txt artifacts/
|
mv Spotify_Dataset.csv cutoff_spotify.csv train.csv validation.csv test.csv artifacts/
|
Loading…
Reference in New Issue
Block a user