Add dockerfile and python script
This commit is contained in:
parent
1fe09ed25d
commit
180431b160
21
Dockerfile
Normal file
21
Dockerfile
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
FROM ubuntu:latest
|
||||||
|
|
||||||
|
ENV KAGGLE_USERNAME="jaszwajcar"
|
||||||
|
ENV KAGGLE_KEY="key"
|
||||||
|
|
||||||
|
RUN apt update && apt install -y figlet
|
||||||
|
RUN apt install unzip -y
|
||||||
|
RUN apt install python3-pip -y
|
||||||
|
|
||||||
|
RUN pip3 install kaggle
|
||||||
|
RUN pip3 install pandas
|
||||||
|
RUN pip3 install unzip
|
||||||
|
RUN pip3 install scikit-learn
|
||||||
|
RUN pip3 install seaborn
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
COPY ./download_dataset.sh ./
|
||||||
|
COPY ./stats.sh ./
|
||||||
|
COPY ./data.py ./
|
||||||
|
|
24
data.py
Normal file
24
data.py
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
import pandas as pd
|
||||||
|
from sklearn.model_selection import train_test_split
|
||||||
|
|
||||||
|
baltimore=pd.read_csv('BPD_Part_1_Victim_Based_Crime_Data.csv')
|
||||||
|
baltimore["Weapon"].fillna("None", inplace=True)
|
||||||
|
baltimore.dropna(inplace=True)
|
||||||
|
|
||||||
|
baltimore['Post'] = baltimore['Post'] /baltimore['Post'].abs().max()
|
||||||
|
baltimore['Location']=baltimore['Location'].str.lower()
|
||||||
|
baltimore['Description']=baltimore['Description'].str.lower()
|
||||||
|
baltimore['Weapon']=baltimore['Weapon'].str.lower()
|
||||||
|
baltimore['Premise']=baltimore['Premise'].str.lower()
|
||||||
|
baltimore['District']=baltimore['District'].str.lower()
|
||||||
|
baltimore['CrimeCode']=baltimore['CrimeCode'].str.lower()
|
||||||
|
baltimore['Neighborhood']=baltimore['Neighborhood'].str.lower()
|
||||||
|
baltimore['Inside/Outside']=baltimore['Inside/Outside'].str.lower()
|
||||||
|
|
||||||
|
|
||||||
|
baltimore_train, baltimore_test = train_test_split(baltimore, test_size=0.1, random_state=1)
|
||||||
|
baltimore_train, baltimore_dev= train_test_split(baltimore_train, test_size=0.25, random_state=1)
|
||||||
|
|
||||||
|
baltimore_test.to_csv("baltimore_test.csv", encoding="utf-8", index=False)
|
||||||
|
baltimore_dev.to_csv("baltimore_dev.csv", encoding="utf-8", index=False)
|
||||||
|
baltimore_train.to_csv("baltimore_train.csv", encoding="utf-8", index=False)
|
@ -1,6 +1,4 @@
|
|||||||
kaggle datasets download -d sohier/crime-in-baltimore
|
#!/bin/bash
|
||||||
unzip crime-in-baltimore.zip
|
|
||||||
|
|
||||||
head -n 8000 BPD_Part_1_Victim_Based_Crime_Data.csv > baltimore_train.csv
|
kaggle datasets download -d sohier/crime-in-baltimore
|
||||||
tail -n 2000 BPD_Part_1_Victim_Based_Crime_Data.csv > baltimore_test.csv
|
unzip crime-in-baltimore.zip
|
||||||
head -n 2000 baltimore_train.csv > baltimore_dev.csv
|
|
Loading…
Reference in New Issue
Block a user