updated data download
This commit is contained in:
parent
b7b992cb8a
commit
279dbc885a
3
Jenkinsfile
vendored
3
Jenkinsfile
vendored
@ -4,7 +4,6 @@ pipeline {
|
||||
parameters {
|
||||
string(name: 'KAGGLE_USERNAME', defaultValue: 'gulczas', description: 'Kaggle username')
|
||||
password(name: 'KAGGLE_KEY', defaultValue: '', description: 'Kaggle API key')
|
||||
string(name: 'CUTOFF', defaultValue: '90', description: 'Number of rows to cut')
|
||||
}
|
||||
|
||||
stages {
|
||||
@ -27,7 +26,7 @@ pipeline {
|
||||
"KAGGLE_USERNAME=${env.KAGGLE_USERNAME}",
|
||||
"KAGGLE_KEY=${env.KAGGLE_KEY}"])
|
||||
{
|
||||
sh "bash ./download_dataset.sh ${params.CUTOFF}"
|
||||
sh "bash ./download_dataset.sh"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -2,25 +2,11 @@
|
||||
|
||||
pip install kaggle --upgrade
|
||||
|
||||
kaggle datasets download -d gulczas/spotify-dataset
|
||||
kaggle datasets download -d gulczas/spotify-dataset --unzip
|
||||
|
||||
unzip -o spotify-dataset.zip
|
||||
kaggle datasets download -d joebeachcapital/30000-spotify-songs --unzip
|
||||
|
||||
echo "------------------ Shufle ------------------"
|
||||
shuf Spotify_Dataset.csv -o shuffled_spotify.csv
|
||||
|
||||
echo "------------------ Cut off to top $1 rows ------------------"
|
||||
head -n $1 shuffled_spotify.csv > cutoff_spotify.csv
|
||||
|
||||
echo "------------------ Split ------------------"
|
||||
total_lines=$(wc -l < cutoff_spotify.csv)
|
||||
num_test=$((total_lines / 10))
|
||||
num_train=$((total_lines - (num_test * 2)))
|
||||
num_validation=$num_test
|
||||
|
||||
head -n $num_train cutoff_spotify.csv > train.csv
|
||||
tail -n $((num_test+num_validation)) cutoff_spotify.csv | head -n $num_test > test.csv
|
||||
tail -n $num_validation cutoff_spotify.csv > validation.csv
|
||||
echo "test test test"
|
||||
|
||||
mkdir -p artifacts
|
||||
mv Spotify_Dataset.csv cutoff_spotify.csv train.csv validation.csv test.csv artifacts/
|
||||
mv Spotify_Dataset.csv spotify_songs.csv artifacts/
|
Loading…
Reference in New Issue
Block a user