Zaktualizuj 'download.sh'

This commit is contained in:
Szymon Parafiński 2022-03-27 22:30:47 +02:00
parent 1c2a9d77ac
commit 4545012227

View File

@ -1,18 +1,22 @@
dataset_operation() { dataset_operation() {
tail -n +2 imdb-dataset.csv | shuf > imdb-dataset.csv.s tail -n +2 dataset.csv | shuf > dataset.csv.s
head -n $CUTOFF imdb-dataset.csv.s > ./imdb-dataset.csv.shuf head -n $CUTOFF dataset.csv.s > ./dataset.csv.shuf
len1=$(cat ./imdb-dataset.csv.shuf | wc -l) len1=$(cat ./imdb-dataset.csv.shuf | wc -l)
len2=$(($len1/10)) len2=$(($len1/10))
len3=$(($len2*2)) len3=$(($len2*2))
len4=$(($len3+1)) len4=$(($len3+1))
head -n $len2 imdb-dataset.csv.shuf > imdb-dataset.csv.test head -n $len2 dataset.csv.shuf > dataset_test.csv
head -n $len3 imdb-dataset.csv.shuf | tail -n $len2 > imdb-dataset.csv.dev head -n $len3 dataset.csv.shuf | tail -n $len2 > dataset_dev.csv.
tail -n +$len4 imdb-dataset.csv.shuf > imdb-dataset.csv.train tail -n +$len4 dataset.csv.shuf > dataset_train.csv
rm imdb-dataset.csv.shuf rm dataset.csv.shuf
wc -l imdb-dataset.csv.* wc -l dataset.csv.*
} }
echo 'Start'
kaggle datasets download -d harshitshankhdhar/imdb-dataset-of-top-1000-movies-and-tv-shows kaggle datasets download -d harshitshankhdhar/imdb-dataset-of-top-1000-movies-and-tv-shows
echo 'Dataset downloaded'
unzip imdb-dataset-of-top-1000-movies-and-tv-shows.zip unzip imdb-dataset-of-top-1000-movies-and-tv-shows.zip
mv imdb-dataset-of-top-1000-movies-and-tv-shows.zip imdb-dataset.zip echo 'Dataset unziped'
echo "DONE" mv imdb-dataset-of-top-1000-movies-and-tv-shows.zip dataset.zip
echo 'Dataset renamed'
archiveArtifacts artifacts: 'dataset_dev.csv, dataset_test.csv, dataset_train.csv', followSymlinks: false