Add dockerfile and python script
This commit is contained in:
parent
1fe09ed25d
commit
180431b160
21
Dockerfile
Normal file
21
Dockerfile
Normal file
@ -0,0 +1,21 @@
|
||||
FROM ubuntu:latest
|
||||
|
||||
ENV KAGGLE_USERNAME="jaszwajcar"
|
||||
ENV KAGGLE_KEY="key"
|
||||
|
||||
RUN apt update && apt install -y figlet
|
||||
RUN apt install unzip -y
|
||||
RUN apt install python3-pip -y
|
||||
|
||||
RUN pip3 install kaggle
|
||||
RUN pip3 install pandas
|
||||
RUN pip3 install unzip
|
||||
RUN pip3 install scikit-learn
|
||||
RUN pip3 install seaborn
|
||||
|
||||
WORKDIR /app
|
||||
|
||||
COPY ./download_dataset.sh ./
|
||||
COPY ./stats.sh ./
|
||||
COPY ./data.py ./
|
||||
|
24
data.py
Normal file
24
data.py
Normal file
@ -0,0 +1,24 @@
|
||||
import pandas as pd
|
||||
from sklearn.model_selection import train_test_split
|
||||
|
||||
baltimore=pd.read_csv('BPD_Part_1_Victim_Based_Crime_Data.csv')
|
||||
baltimore["Weapon"].fillna("None", inplace=True)
|
||||
baltimore.dropna(inplace=True)
|
||||
|
||||
baltimore['Post'] = baltimore['Post'] /baltimore['Post'].abs().max()
|
||||
baltimore['Location']=baltimore['Location'].str.lower()
|
||||
baltimore['Description']=baltimore['Description'].str.lower()
|
||||
baltimore['Weapon']=baltimore['Weapon'].str.lower()
|
||||
baltimore['Premise']=baltimore['Premise'].str.lower()
|
||||
baltimore['District']=baltimore['District'].str.lower()
|
||||
baltimore['CrimeCode']=baltimore['CrimeCode'].str.lower()
|
||||
baltimore['Neighborhood']=baltimore['Neighborhood'].str.lower()
|
||||
baltimore['Inside/Outside']=baltimore['Inside/Outside'].str.lower()
|
||||
|
||||
|
||||
baltimore_train, baltimore_test = train_test_split(baltimore, test_size=0.1, random_state=1)
|
||||
baltimore_train, baltimore_dev= train_test_split(baltimore_train, test_size=0.25, random_state=1)
|
||||
|
||||
baltimore_test.to_csv("baltimore_test.csv", encoding="utf-8", index=False)
|
||||
baltimore_dev.to_csv("baltimore_dev.csv", encoding="utf-8", index=False)
|
||||
baltimore_train.to_csv("baltimore_train.csv", encoding="utf-8", index=False)
|
@ -1,6 +1,4 @@
|
||||
kaggle datasets download -d sohier/crime-in-baltimore
|
||||
unzip crime-in-baltimore.zip
|
||||
#!/bin/bash
|
||||
|
||||
head -n 8000 BPD_Part_1_Victim_Based_Crime_Data.csv > baltimore_train.csv
|
||||
tail -n 2000 BPD_Part_1_Victim_Based_Crime_Data.csv > baltimore_test.csv
|
||||
head -n 2000 baltimore_train.csv > baltimore_dev.csv
|
||||
kaggle datasets download -d sohier/crime-in-baltimore
|
||||
unzip crime-in-baltimore.zip
|
Loading…
Reference in New Issue
Block a user