2024-03-23 20:11:47 +01:00
|
|
|
#!/bin/bash
|
|
|
|
|
|
|
|
pip install kaggle --upgrade
|
|
|
|
|
2024-03-24 21:04:06 +01:00
|
|
|
kaggle datasets download -d thedevastator/1-5-million-beer-reviews-from-beer-advocate
|
2024-03-23 20:11:47 +01:00
|
|
|
|
2024-03-23 21:09:35 +01:00
|
|
|
unzip top-200-spotify-songs-dataset.zip
|
2024-03-23 20:11:47 +01:00
|
|
|
|
|
|
|
shuf Spotify-200-Songs.csv -o shuffled_spotify.csv
|
|
|
|
|
|
|
|
head -n 100 shuffled_spotify.csv > subset1.csv
|
|
|
|
tail -n 100 shuffled_spotify.csv > subset2.csv
|
|
|
|
|
|
|
|
cut -d ',' -f 1,2,3 shuffled_spotify.csv > trimmed_spotify.csv
|
|
|
|
|
|
|
|
cut -d ',' -f 1,2,4,5,6 shuffled_spotify.csv > processed_spotify.csv
|
|
|
|
|
|
|
|
echo "Shuffled dataset:" > results.txt
|
|
|
|
head shuffled_spotify.csv >> results.txt
|
|
|
|
echo "" >> results.txt
|
|
|
|
|
|
|
|
echo "Subset 1:" >> results.txt
|
|
|
|
head subset1.csv >> results.txt
|
|
|
|
echo "" >> results.txt
|
|
|
|
|
|
|
|
echo "Subset 2:" >> results.txt
|
|
|
|
head subset2.csv >> results.txt
|
|
|
|
echo "" >> results.txt
|
|
|
|
|
|
|
|
echo "Trimmed dataset:" >> results.txt
|
|
|
|
head trimmed_spotify.csv >> results.txt
|
|
|
|
echo "" >> results.txt
|
|
|
|
|
|
|
|
echo "Processed dataset:" >> results.txt
|
|
|
|
head processed_spotify.csv >> results.txt
|
|
|
|
echo "" >> results.txt
|
|
|
|
|
|
|
|
mkdir -p artifacts
|
|
|
|
mv shuffled_spotify.csv subset1.csv subset2.csv trimmed_spotify.csv processed_spotify.csv results.txt artifacts/
|