diff --git a/download_data.sh b/download_data.sh new file mode 100755 index 0000000..c636d37 --- /dev/null +++ b/download_data.sh @@ -0,0 +1,8 @@ +#!/bin/bash + +kaggle datasets download -d AnalyzeBoston/crimes-in-boston +unzip crimes-in-boston.zip +shuf crime.csv > crime.shuf +head -n 30000 crime.shuf > crime.test +head -n 60000 crime.shuf | tail -n 30000 > crime.dev +tail -n +60001 crime.shuf > crime.train