Zaktualizuj 'download.sh'
This commit is contained in:
parent
1c2a9d77ac
commit
4545012227
22
download.sh
22
download.sh
@ -1,18 +1,22 @@
|
|||||||
dataset_operation() {
|
dataset_operation() {
|
||||||
tail -n +2 imdb-dataset.csv | shuf > imdb-dataset.csv.s
|
tail -n +2 dataset.csv | shuf > dataset.csv.s
|
||||||
head -n $CUTOFF imdb-dataset.csv.s > ./imdb-dataset.csv.shuf
|
head -n $CUTOFF dataset.csv.s > ./dataset.csv.shuf
|
||||||
len1=$(cat ./imdb-dataset.csv.shuf | wc -l)
|
len1=$(cat ./imdb-dataset.csv.shuf | wc -l)
|
||||||
len2=$(($len1/10))
|
len2=$(($len1/10))
|
||||||
len3=$(($len2*2))
|
len3=$(($len2*2))
|
||||||
len4=$(($len3+1))
|
len4=$(($len3+1))
|
||||||
head -n $len2 imdb-dataset.csv.shuf > imdb-dataset.csv.test
|
head -n $len2 dataset.csv.shuf > dataset_test.csv
|
||||||
head -n $len3 imdb-dataset.csv.shuf | tail -n $len2 > imdb-dataset.csv.dev
|
head -n $len3 dataset.csv.shuf | tail -n $len2 > dataset_dev.csv.
|
||||||
tail -n +$len4 imdb-dataset.csv.shuf > imdb-dataset.csv.train
|
tail -n +$len4 dataset.csv.shuf > dataset_train.csv
|
||||||
rm imdb-dataset.csv.shuf
|
rm dataset.csv.shuf
|
||||||
wc -l imdb-dataset.csv.*
|
wc -l dataset.csv.*
|
||||||
}
|
}
|
||||||
|
|
||||||
|
echo 'Start'
|
||||||
kaggle datasets download -d harshitshankhdhar/imdb-dataset-of-top-1000-movies-and-tv-shows
|
kaggle datasets download -d harshitshankhdhar/imdb-dataset-of-top-1000-movies-and-tv-shows
|
||||||
|
echo 'Dataset downloaded'
|
||||||
unzip imdb-dataset-of-top-1000-movies-and-tv-shows.zip
|
unzip imdb-dataset-of-top-1000-movies-and-tv-shows.zip
|
||||||
mv imdb-dataset-of-top-1000-movies-and-tv-shows.zip imdb-dataset.zip
|
echo 'Dataset unziped'
|
||||||
echo "DONE"
|
mv imdb-dataset-of-top-1000-movies-and-tv-shows.zip dataset.zip
|
||||||
|
echo 'Dataset renamed'
|
||||||
|
archiveArtifacts artifacts: 'dataset_dev.csv, dataset_test.csv, dataset_train.csv', followSymlinks: false
|
Loading…
Reference in New Issue
Block a user