From 98939fc5518ea009f4e0f33a87c2a73d642e4943 Mon Sep 17 00:00:00 2001 From: s444380 Date: Sun, 27 Mar 2022 11:29:22 +0200 Subject: [PATCH] Added script downloading and splitting data --- download_data.sh | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100755 download_data.sh diff --git a/download_data.sh b/download_data.sh new file mode 100755 index 0000000..c636d37 --- /dev/null +++ b/download_data.sh @@ -0,0 +1,8 @@ +#!/bin/bash + +kaggle datasets download -d AnalyzeBoston/crimes-in-boston +unzip crimes-in-boston.zip +shuf crime.csv > crime.shuf +head -n 30000 crime.shuf > crime.test +head -n 60000 crime.shuf | tail -n 30000 > crime.dev +tail -n +60001 crime.shuf > crime.train