diff --git a/download_dataset.sh b/download_dataset.sh index 52ffd72..63c3936 100644 --- a/download_dataset.sh +++ b/download_dataset.sh @@ -2,10 +2,11 @@ kaggle datasets download -d sohier/crime-in-baltimore unzip crime-in-baltimore.zip +echo 'Split' wc -l BPD_Part_1_Victim_Based_Crime_Data.csv head -n 8000 BPD_Part_1_Victim_Based_Crime_Data.csv > baltimore_train.csv tail -n 2000 BPD_Part_1_Victim_Based_Crime_Data.csv > baltimore_test.csv head -n 2000 baltimore_train.csv > baltimore_dev.csv - +echo 'End' wc -l baltimore_train.csv \ No newline at end of file