diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000..c140625
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,14 @@
+FROM ubuntu:latest
+
+COPY requirements.txt ./
+
+RUN apt-get update
+RUN apt-get install -y python3-pip
+RUN pip3 install --user -r ./requirements.txt
+
+WORKDIR /app
+
+COPY ./stats.py ./
+COPY /kaggle.json /root/.kaggle/
+
+CMD python3 stats.py
\ No newline at end of file
diff --git a/Zad1.py b/Zad1.py
new file mode 100644
index 0000000..d30860b
--- /dev/null
+++ b/Zad1.py
@@ -0,0 +1,35 @@
+import numpy as np
+import pandas as pd
+from sklearn import preprocessing
+
+
+kaggle.api.authenticate()
+kaggle.api.dataset_download_files("gpreda/covid-world-vaccination-progress", path=".", unzip=True)
+
+df = pd.read_csv('country_vaccinations.csv')
+# podział danych na train/validate/test (6:2:2) za pomocą biblioteki numpy i pandas
+train, validate, test = np.split(df.sample(frac=1), [int(.6*len(df)), int(.8*len(df))])
+
+# Wypisanie ilości elementów w poszczególnych ramkach danych
+print("Whole set size".ljust(20), df.size)
+print("Train set size: ".ljust(20), train.size)
+print("Validate set size: ".ljust(20), validate.size)
+print("Test set size: ".ljust(20), test.size)
+
+df.describe(include='all')
+
+for col in df.columns:
+    column = df[col].value_counts().plot(kind="bar",figsize=(30,10))
+    print("\n", col)
+    print(column)
+
+# normalizacja wartości numerycznych
+numeric_values = df.select_dtypes(include='float64').values # tylko wartości numeryczne
+min_max_scaler = preprocessing.MinMaxScaler()
+x_scaled = min_max_scaler.fit_transform(values)
+numeric_columns = df.select_dtypes(include='float64').columns
+df_normalized = pd.DataFrame(x_scaled, columns=numeric_columns)
+for col in df.columns: # usunięcie nieznormalizowanych danych i wstawienie nowych już znormalizowanych do oryginalnej ramki danych
+    if col in numeric_columns: df[col] = df_normalized[col]
+
+df.dropna() # usunięcie wierszy z polami NaN
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..1feaa26
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,5 @@
+kaggle==1.5.12
+matplotlib==3.4.1
+numpy==1.20.2
+pandas==1.2.3
+sklearn==0.0
\ No newline at end of file
diff --git a/stats.py b/stats.py
new file mode 100644
index 0000000..f5c8d6c
--- /dev/null
+++ b/stats.py
@@ -0,0 +1,35 @@
+import zipfile
+import numpy as np
+import pandas as pd
+from sklearn import preprocessing
+
+with zipfile.ZipFile('covid-world-vaccination-progress.zip', 'r') as zip_ref:
+    zip_ref.extractall(".") 
+
+df = pd.read_csv('country_vaccinations.csv')
+# podział danych na train/validate/test (6:2:2) za pomocą biblioteki numpy i pandas
+train, validate, test = np.split(df.sample(frac=1), [int(.6*len(df)), int(.8*len(df))])
+
+# Wypisanie ilości elementów w poszczególnych ramkach danych
+print("Whole set size".ljust(20), df.size)
+print("Train set size: ".ljust(20), train.size)
+print("Validate set size: ".ljust(20), validate.size)
+print("Test set size: ".ljust(20), test.size)
+
+df.describe(include='all')
+
+for col in df.columns:
+    column = df[col].value_counts().plot(kind="bar",figsize=(30,10))
+    print("\n", col)
+    print(column)
+
+# normalizacja wartości numerycznych
+numeric_values = df.select_dtypes(include='float64').values # tylko wartości numeryczne
+min_max_scaler = preprocessing.MinMaxScaler()
+x_scaled = min_max_scaler.fit_transform(values)
+numeric_columns = df.select_dtypes(include='float64').columns
+df_normalized = pd.DataFrame(x_scaled, columns=numeric_columns)
+for col in df.columns: # usunięcie nieznormalizowanych danych i wstawienie nowych już znormalizowanych do oryginalnej ramki danych
+    if col in numeric_columns: df[col] = df_normalized[col]
+
+df.dropna() # usunięcie wierszy z polami NaN
\ No newline at end of file