diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..16d2083 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,18 @@ +FROM ubuntu + +ARG KAGGLE_USERNAME +ARG KAGGLE_KEY +ARG CUTOFF +ENV CUTOFF=${CUTOFF:-260000} + +WORKDIR /app +COPY ./download_data.sh calc_stats.sh ./ +COPY ./clean_and_split_data.py calc_stats.py ./ + +RUN apt-get update && apt-get install -y python3-pip unzip && rm -rf /var/lib/apt/lists/* + +RUN export PATH="$PATH:/root/.local/bin" + +RUN pip3 install kaggle pandas scikit-learn + +