2022-03-26 11:59:53 +01:00
|
|
|
echo 'test'
|
2022-03-26 14:08:23 +01:00
|
|
|
|
|
|
|
# delete old files
|
|
|
|
rm -r df_atp.csv atp_train.csv atp_test.csv atp_dev.csv
|
|
|
|
|
|
|
|
# download dataset
|
2022-03-26 13:05:15 +01:00
|
|
|
kaggle datasets download -d hakeem/atp-and-wta-tennis-data
|
2022-03-26 14:08:23 +01:00
|
|
|
unzip -o atp-and-wta-tennis-data.zip
|
|
|
|
|
|
|
|
# split dataset to train, dev and test subsets
|
|
|
|
head -n 8000 df_atp.csv > atp_train.csv
|
|
|
|
tail -n 2000 df_atp.csv > atp_test.csv
|
|
|
|
head -n 2000 atp_train.csv > atp_dev.csv
|
|
|
|
tail -n 6000 atp_train.csv > x.csv
|
|
|
|
tail -n 6000 x.csv > atp_train.csv
|