diff --git a/Dockerfile b/Dockerfile
index 69895c1..8def072 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,9 +1,5 @@
 FROM ubuntu:latest
 
-RUN apt update && apt install -y python3-pip unzip
+RUN apt update && apt install -y python3-pip
 
-RUN pip install kaggle pandas numpy scikit-learn
-
-WORKDIR /app
-
-COPY ./create-dataset.py ./
\ No newline at end of file
+RUN pip install pandas numpy scikit-learn
\ No newline at end of file
diff --git a/IUM_2.ipynb b/IUM_2.ipynb
index 2cedc12..6910212 100644
--- a/IUM_2.ipynb
+++ b/IUM_2.ipynb
@@ -4,14 +4,14 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## IUM 2"
+    "## IUM 2\n"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### Installation of packages"
+    "### Installation of packages\n"
    ]
   },
   {
@@ -67,7 +67,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### Importing libraries"
+    "### Importing libraries\n"
    ]
   },
   {
@@ -90,7 +90,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### Downloading a dataset"
+    "### Downloading a dataset\n"
    ]
   },
   {
@@ -114,7 +114,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### Uncompress a file"
+    "### Uncompress a file\n"
    ]
   },
   {
@@ -139,7 +139,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### Load the data"
+    "### Load the data\n"
    ]
   },
   {
@@ -148,15 +148,15 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "df = pd.read_csv('creditcard.csv')\n",
-    "pd.set_option('display.max_columns', None)"
+    "df = pd.read_csv(\"creditcard.csv\")\n",
+    "pd.set_option(\"display.max_columns\", None)"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### Check missing values"
+    "### Check missing values\n"
    ]
   },
   {
@@ -214,7 +214,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### Size of the dataset"
+    "### Size of the dataset\n"
    ]
   },
   {
@@ -275,7 +275,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### Normalising the data"
+    "### Normalising the data\n"
    ]
   },
   {
@@ -286,14 +286,14 @@
    "source": [
     "scaler = StandardScaler()\n",
     "\n",
-    "df['Amount'] = scaler.fit_transform(df['Amount'].values.reshape(-1, 1))"
+    "df[\"Amount\"] = scaler.fit_transform(df[\"Amount\"].values.reshape(-1, 1))"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### Summary statistics"
+    "### Summary statistics\n"
    ]
   },
   {
@@ -717,7 +717,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### Distribution of legitimate and fraudulent transactions"
+    "### Distribution of legitimate and fraudulent transactions\n"
    ]
   },
   {
@@ -740,7 +740,7 @@
     }
    ],
    "source": [
-    "df['Class'].value_counts()"
+    "df[\"Class\"].value_counts()"
    ]
   },
   {
@@ -748,7 +748,8 @@
    "metadata": {},
    "source": [
     "### Undersampling the data\n",
-    "We will employ undersampling as one class significantly dominates the other."
+    "\n",
+    "We will employ undersampling as one class significantly dominates the other.\n"
    ]
   },
   {
@@ -774,15 +775,15 @@
     "# Undersample dataset\n",
     "undersample_data = df.iloc[undersample_indice, :]\n",
     "\n",
-    "X_undersample = undersample_data.iloc[:, undersample_data.columns != 'Class']\n",
-    "y_undersample = undersample_data.iloc[:, undersample_data.columns == 'Class']"
+    "X_undersample = undersample_data.iloc[:, undersample_data.columns != \"Class\"]\n",
+    "y_undersample = undersample_data.iloc[:, undersample_data.columns == \"Class\"]"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### Size of undersampled dataset"
+    "### Size of undersampled dataset\n"
    ]
   },
   {
@@ -843,7 +844,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### Summary statistics of the undersampled dataset"
+    "### Summary statistics of the undersampled dataset\n"
    ]
   },
   {
@@ -1257,7 +1258,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### Distribution of legitimate and fraudulent transactions in an undersampled dataset"
+    "### Distribution of legitimate and fraudulent transactions in an undersampled dataset\n"
    ]
   },
   {
@@ -1280,14 +1281,14 @@
     }
    ],
    "source": [
-    "undersample_data['Class'].value_counts()"
+    "undersample_data[\"Class\"].value_counts()"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### Splitting whole data into training and test datasets"
+    "### Splitting whole data into training and test datasets\n"
    ]
   },
   {
@@ -1296,8 +1297,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "X = df.iloc[:, df.columns != 'Class']\n",
-    "y = df.iloc[:, df.columns == 'Class']\n",
+    "X = df.iloc[:, df.columns != \"Class\"]\n",
+    "y = df.iloc[:, df.columns == \"Class\"]\n",
     "\n",
     "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)"
    ]
@@ -1306,7 +1307,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### Statistical measures of the training dataset of whole data"
+    "### Statistical measures of the training dataset of whole data\n"
    ]
   },
   {
@@ -1810,14 +1811,14 @@
     }
    ],
    "source": [
-    "pd.concat([X_train, y_train], axis=1)['Class'].value_counts()"
+    "pd.concat([X_train, y_train], axis=1)[\"Class\"].value_counts()"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### Statistical measures of the test dataset of whole data"
+    "### Statistical measures of the test dataset of whole data\n"
    ]
   },
   {
@@ -2311,14 +2312,14 @@
     }
    ],
    "source": [
-    "pd.concat([X_test, y_test], axis=1)['Class'].value_counts()"
+    "pd.concat([X_test, y_test], axis=1)[\"Class\"].value_counts()"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### Splitting undersampled data into training and test datasets"
+    "### Splitting undersampled data into training and test datasets\n"
    ]
   },
   {
@@ -2327,14 +2328,16 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "X_train_undersample, X_test_undersample, y_train_undersample, y_test_undersample = train_test_split(X_undersample, y_undersample, test_size = 0.3, random_state = 0)"
+    "X_train_undersample, X_test_undersample, y_train_undersample, y_test_undersample = (\n",
+    "    train_test_split(X_undersample, y_undersample, test_size=0.3, random_state=0)\n",
+    ")"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### Statistical measures of the training dataset of undersampled data"
+    "### Statistical measures of the training dataset of undersampled data\n"
    ]
   },
   {
@@ -2818,14 +2821,14 @@
     }
    ],
    "source": [
-    "pd.concat([X_train_undersample, y_train_undersample], axis=1)['Class'].value_counts()"
+    "pd.concat([X_train_undersample, y_train_undersample], axis=1)[\"Class\"].value_counts()"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### Statistical measures of the test dataset of undersampled data"
+    "### Statistical measures of the test dataset of undersampled data\n"
    ]
   },
   {
@@ -3309,7 +3312,7 @@
     }
    ],
    "source": [
-    "pd.concat([X_test_undersample, y_test_undersample], axis=1)['Class'].value_counts()"
+    "pd.concat([X_test_undersample, y_test_undersample], axis=1)[\"Class\"].value_counts()"
    ]
   }
  ],
diff --git a/Jenkinsfile b/Jenkinsfile
index 57f76e2..7d93729 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -25,6 +25,7 @@ pipeline {
     stage('Download dataset') {
       steps {
         withEnv(["KAGGLE_USERNAME=${params.KAGGLE_USERNAME}", "KAGGLE_KEY=${params.KAGGLE_KEY}"]) {
+          sh 'pip install kaggle'
           sh 'kaggle datasets download -d mlg-ulb/creditcardfraud'
           sh 'unzip -o creditcardfraud.zip'
           sh 'rm creditcardfraud.zip'
diff --git a/create-dataset.py b/create-dataset.py
index 5ad482c..eb10e44 100644
--- a/create-dataset.py
+++ b/create-dataset.py
@@ -88,11 +88,8 @@ def save_whole_data(df, X_train, X_test, y_train, y_test):
 
 
 def main():
-    # download_kaggle_dataset()
     os.makedirs("data", exist_ok=True)
 
-    os.system
-
     df = load_data("creditcard.csv")
     df = normalize_data(df)
 
diff --git a/dataset-stats.py b/dataset-stats.py
new file mode 100644
index 0000000..41cfe57
--- /dev/null
+++ b/dataset-stats.py
@@ -0,0 +1,119 @@
+import os
+import pandas as pd
+
+
+def write_to_file(file_name):
+    df = pd.read_csv("data/creditcard.csv")
+    pd.set_option("display.max_columns", None)
+
+    undersample_data = pd.read_csv("data/undersample_data.csv")
+    X_test_undersample = pd.read_csv("data/X_test_undersample.csv")
+    y_test_undersample = pd.read_csv("data/y_test_undersample.csv")
+    X_train_undersample = pd.read_csv("data/X_train_undersample.csv")
+    y_train_undersample = pd.read_csv("data/y_train_undersample.csv")
+
+    X_test = pd.read_csv("data/X_test.csv")
+    y_test = pd.read_csv("data/y_test.csv")
+    X_train = pd.read_csv("data/X_train.csv")
+    y_train = pd.read_csv("data/y_train.csv")
+
+    with open("stats_data/" + file_name, "w") as f:
+        f.write("Check missing values\n")
+        f.write(str(df.isnull().sum()))
+
+        f.write("\n\n")
+
+        f.write("Size of the dataset\n")
+        f.write(str(df.info()))
+
+        f.write("\n\n")
+
+        f.write("Summary statistics\n")
+        f.write(str(df.describe()))
+
+        f.write("\n\n")
+
+        f.write("Distribution of legitimate and fraudulent transactions\n")
+        f.write(str(df["Class"].value_counts()))
+
+        f.write("\n\n")
+
+        f.write("Size of undersampled dataset\n")
+        f.write(str(undersample_data.info()))
+
+        f.write("\n\n")
+
+        f.write("Summary statistics of the undersampled dataset\n")
+        f.write(str(undersample_data.describe()))
+
+        f.write("\n\n")
+
+        f.write(
+            "Distribution of legitimate and fraudulent transactions in an undersampled dataset\n"
+        )
+        f.write(str(undersample_data["Class"].value_counts()))
+
+        f.write("\n\n")
+
+        f.write("Statistical measures of the training dataset of whole data\n")
+        f.write(str(pd.concat([X_train, y_train], axis=1).info()))
+        f.write("\n")
+        f.write(str(pd.concat([X_train, y_train], axis=1).describe()))
+        f.write("\n")
+        f.write(str(pd.concat([X_train, y_train], axis=1)["Class"].value_counts()))
+
+        f.write("\n\n")
+
+        f.write("Statistical measures of the test dataset of whole data\n")
+        f.write(str(pd.concat([X_test, y_test], axis=1).info()))
+        f.write("\n")
+        f.write(str(pd.concat([X_test, y_test], axis=1).describe()))
+        f.write("\n")
+        f.write(str(pd.concat([X_test, y_test], axis=1)["Class"].value_counts()))
+
+        f.write("\n\n")
+
+        f.write("Statistical measures of the training dataset of undersampled data\n")
+        f.write(
+            str(pd.concat([X_train_undersample, y_train_undersample], axis=1).info())
+        )
+        f.write("\n")
+        f.write(
+            str(
+                pd.concat([X_train_undersample, y_train_undersample], axis=1).describe()
+            )
+        )
+        f.write("\n")
+        f.write(
+            str(
+                pd.concat([X_train_undersample, y_train_undersample], axis=1)[
+                    "Class"
+                ].value_counts()
+            )
+        )
+
+        f.write("\n\n")
+
+        f.write("Statistical measures of the test dataset of undersampled data\n")
+        f.write(str(pd.concat([X_test_undersample, y_test_undersample], axis=1).info()))
+        f.write("\n")
+        f.write(
+            str(pd.concat([X_test_undersample, y_test_undersample], axis=1).describe())
+        )
+        f.write("\n")
+        f.write(
+            str(
+                pd.concat([X_test_undersample, y_test_undersample], axis=1)[
+                    "Class"
+                ].value_counts()
+            )
+        )
+
+
+def main():
+    os.makedirs("stats_data", exist_ok=True)
+    write_to_file("stats.txt")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/stats.sh b/dataset-stats.sh
similarity index 100%
rename from stats.sh
rename to dataset-stats.sh
diff --git a/stats/Dockerfile b/stats/Dockerfile
new file mode 100644
index 0000000..3dc72dd
--- /dev/null
+++ b/stats/Dockerfile
@@ -0,0 +1,5 @@
+FROM ubuntu:latest
+
+RUN apt update && apt install -y python3-pip
+
+RUN pip install pandas
diff --git a/stats/Jenkinsfile b/stats/Jenkinsfile
index 6610ecd..5559a9d 100644
--- a/stats/Jenkinsfile
+++ b/stats/Jenkinsfile
@@ -1,5 +1,7 @@
 pipeline {
-  agent any
+  agent docker {
+    image 'mateusz887/ium:latest'
+  }
   
   parameters {
     buildSelector(
@@ -17,13 +19,13 @@ pipeline {
     }
     stage('Copy Artifacts') {
       steps {
-        copyArtifacts filter: 'data/*', projectName: 'z-s464913-create-dataset', selector: buildParameter('BUILD_SELECTOR')
+        copyArtifacts filter: 'data/*', projectName: 'z-s464913-create-dataset-1', selector: buildParameter('BUILD_SELECTOR')
       }
     }
     stage('Generate Report') {
       steps {
-        sh 'chmod +x stats.sh'
-        sh './stats.sh'
+        sh 'chmod +x dataset_stats.py'
+        sh './dataset_stats.py'
       }
     }
     stage('Archive Artifacts') {