diff --git a/Dockerfile b/Dockerfile index 2d11201..49de659 100644 --- a/Dockerfile +++ b/Dockerfile @@ -3,7 +3,7 @@ FROM ubuntu:latest RUN apt-get update && \ apt-get install -y python3-pip -RUN pip3 install --user kaggle pandas +RUN pip3 install --user kaggle pandas sklearn ENV PATH="/root/.local/bin:${PATH}" diff --git a/data_processing.py b/data_processing.py index 9f43108..adb9b42 100644 --- a/data_processing.py +++ b/data_processing.py @@ -4,7 +4,7 @@ from sklearn.model_selection import train_test_split import pandas as pd import subprocess -# subprocess.run(["kaggle", "datasets", "download", "muhammadbinimran/housing-price-prediction-data", "--unzip"]) +subprocess.run(["kaggle", "datasets", "download", "muhammadbinimran/housing-price-prediction-data", "--unzip"]) housing_price_dataset = pd.read_csv('housing_price_dataset.csv') hp_train_test, hp_dev = sklearn.model_selection.train_test_split(housing_price_dataset, test_size=0.1)