ium_464953/download_dataset.sh

43 lines
1.1 KiB
Bash
Raw Normal View History

2024-03-23 20:11:47 +01:00
#!/bin/bash
pip install kaggle --upgrade
2024-03-23 20:47:50 +01:00
echo $1
echo $2
2024-03-23 20:46:13 +01:00
kaggle datasets download -d brunoalercon123/top-200-spotify-songs-dataset -p ./dataset -u $1 -p $2
2024-03-23 20:11:47 +01:00
2024-03-23 20:46:13 +01:00
unzip -o ./dataset/top-200-spotify-songs-dataset.zip -d ./dataset
2024-03-23 20:11:47 +01:00
shuf Spotify-200-Songs.csv -o shuffled_spotify.csv
head -n 100 shuffled_spotify.csv > subset1.csv
tail -n 100 shuffled_spotify.csv > subset2.csv
cut -d ',' -f 1,2,3 shuffled_spotify.csv > trimmed_spotify.csv
cut -d ',' -f 1,2,4,5,6 shuffled_spotify.csv > processed_spotify.csv
echo "Shuffled dataset:" > results.txt
head shuffled_spotify.csv >> results.txt
echo "" >> results.txt
echo "Subset 1:" >> results.txt
head subset1.csv >> results.txt
echo "" >> results.txt
echo "Subset 2:" >> results.txt
head subset2.csv >> results.txt
echo "" >> results.txt
echo "Trimmed dataset:" >> results.txt
head trimmed_spotify.csv >> results.txt
echo "" >> results.txt
echo "Processed dataset:" >> results.txt
head processed_spotify.csv >> results.txt
echo "" >> results.txt
mkdir -p artifacts
mv shuffled_spotify.csv subset1.csv subset2.csv trimmed_spotify.csv processed_spotify.csv results.txt artifacts/