diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..eb89aad --- /dev/null +++ b/Dockerfile @@ -0,0 +1,13 @@ +FROM ubuntu:latest + +RUN apt-get update && \ + apt-get install -y python3 python3-pip + +RUN pip3 install pandas scikit-learn kaggle + +WORKDIR /app + +COPY create-dataset.py /app +COPY data/barcelona_weekends.csv /app + +CMD ["python3", "create-dataset.py"] \ No newline at end of file diff --git a/create-dataset.py b/create-dataset.py index 34955f4..665aa2f 100644 --- a/create-dataset.py +++ b/create-dataset.py @@ -3,9 +3,9 @@ import os import numpy as np -cutoff = int(os.environ['CUTOFF']) +cutoff = 10 -data = pd.read_csv('./ium_z444510/barcelona_weekends.csv') +data = pd.read_csv('./barcelona_weekends.csv') data = data.sample(cutoff) data = data.iloc[:, 1:] @@ -15,3 +15,7 @@ train_set, dev_set, test_set = np.split(data.sample(frac=1, random_state=42), train_set.to_csv('train.csv', index=False) dev_set.to_csv('dev.csv', index=False) test_set.to_csv('test.csv', index=False) + + +check = pd.read_csv('./train.csv') +print(check.head())