diff --git a/Dockerfile b/Dockerfile
index cb900a6..1381810 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,4 +1,5 @@
 FROM ubuntu:latest
+RUN apt-get update && apt-get install -y locales && locale-gen en_US.UTF-8
 
 # COPY ./kaggle.json /root/.kaggle/kaggle.json
 
@@ -9,19 +10,25 @@ RUN apt-get install -y python3
 RUN apt-get install -y unzip
 RUN apt-get install -y python3-pip
 
+ENV PYTHONIOENCODING=utf-8
+RUN apt-get install -y locales locales-all
+ENV LC_ALL en_US.UTF-8
+ENV LANG en_US.UTF-8
+ENV LANGUAGE en_US.UTF-8
+
 RUN python3 -m pip --version
 RUN python3 -m pip install kaggle
 RUN python3 -m pip install pandas
-
 RUN python3 -m pip freeze
 
-COPY ./download.sh ./
-COPY ./script.py ./
+ENV PATH="/root/.local/bin:${PATH}"
+COPY . .
 
-ARG KAGGLE_USERNAME=testKAGGLE_USERNAME
-ARG KAGGLE_KEY=test1KAGGLE_KEY
+ARG KAGGLE_USERNAME
+ARG KAGGLE_KEY
 
-RUN chmod u+x ./script.py
+RUN chmod a+x ./stats-docker.sh
+RUN chmod a+x ./script-stats.py
 
 # RUN ./download.sh 117928
-# RUN python3 ./script.py
\ No newline at end of file
+RUN python3 ./script-download.py
\ No newline at end of file
diff --git a/Jenkinsfile-docker b/Jenkinsfile-docker
index 1bcfe50..b8a4beb 100644
--- a/Jenkinsfile-docker
+++ b/Jenkinsfile-docker
@@ -20,19 +20,19 @@ pipeline {
     }
     agent { 
         dockerfile{
-            additionalBuildArgs  '--build-arg KAGGLE_USERNAME="$KAGGLE_USERNAME" --build-arg KAGGLE_KEY="$KAGGLE_KEY" --build-arg --no-cache=true'
+            additionalBuildArgs  '--build-arg KAGGLE_USERNAME="$KAGGLE_USERNAME" --build-arg KAGGLE_KEY="$KAGGLE_KEY" -t s444507_create_dataset_image'
         } 
     }
     stages {
         stage('Prepare dataset') {
             steps {
-                sh 'python3 ./script.py'
+                sh './stats-docker.sh'
             }
         }
     }
     post {
         success {
-            archiveArtifacts artifacts: 'Car_Prices_Poland_Kaggle_*', followSymlinks: false
+            archiveArtifacts artifacts: 'Car_Prices_Poland_Kaggle*', followSymlinks: false
         }
     }
 }
\ No newline at end of file
diff --git a/Jenkinsfile-stats b/Jenkinsfile-stats
new file mode 100644
index 0000000..e699d59
--- /dev/null
+++ b/Jenkinsfile-stats
@@ -0,0 +1,17 @@
+pipeline {
+    agent {
+        docker { image 's444507_create_dataset_image:latest' }
+    }
+    stages {
+        stage('Get arifacts') {
+            steps {
+                copyArtifacts fingerprintArtifacts: true, projectName: 's444507-create-dataset', selector: lastSuccessful()
+            }
+        }
+        stage('Show stats') {
+            steps {
+                sh "python3 ./script-stats.py"
+            }
+        }
+    }
+}
\ No newline at end of file
diff --git a/script.py b/script-download.py
old mode 100644
new mode 100755
similarity index 66%
rename from script.py
rename to script-download.py
index 7d2a0fc..8f0145d
--- a/script.py
+++ b/script-download.py
@@ -5,26 +5,20 @@ import os
 import numpy as np
 
 
-def install_dependencies():
-    """Install kaggle and pandas."""
-    subprocess.check_call([sys.executable, '-m', 'pip', 'install', '--upgrade', 'pip'])
-    subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'kaggle'])
-    subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'pandas'])
-    subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'seaborn'])
-    subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'scikit-learn'])
-
 
 def unzip_package():
     """Unzip dataset"""
     print('Unzipping dataset...')
-    os.system('unzip -o car-prices-poland.zip')
+    os.system('unzip -o ./car-prices-poland.zip')
     print('Dataset unzipped')
-
+    print('Removing .zip file...')
+    os.system('rm ./car-prices-poland.zip')
+    print('Zip file removed')
 
 def download_dataset():
     """Download kaggle dataset."""
     print('Downloading dataset...')
-    os.system('kaggle datasets download -d anikannal/solar-power-generation-data')
+    os.system('kaggle datasets download -d aleksandrglotov/car-prices-poland')
 
     print('Dir after downloading')
     os.system('ls -la')
@@ -54,17 +48,6 @@ def divide_dataset(dataset):
     print('Dataset devided')
 
 
-def get_statistics(dataset):
-    """Mean, min, max, median etc."""
-
-    print(f'--------------- Normalized dataset length ---------------')
-    print(len(dataset))
-
-    print(f'---------------Describe dataset---------------')
-    pd.set_option('display.max_columns', None)
-    print(dataset.describe(include='all'))
-
-
 def normalize_dataset(dataset):
     """Drop unnecessary columns and set numeric values to [0,1] range"""
 
@@ -78,16 +61,14 @@ def normalize_dataset(dataset):
     # normalize numbers to [0, 1]
     for column in dataset.columns:
         if isinstance(dataset.iloc[1][column], np.int64) or isinstance(dataset.iloc[1][column], np.float64):
-            dataset[column] = (dataset[column] - dataset[column].min()) / (
-                    dataset[column].max() - dataset[column].min())
+            dataset[column] = (dataset[column] - dataset[column].min()) / (dataset[column].max() - dataset[column].min())
     return dataset
 
 
-# print(os.system('python3 -m pip freeze'))
 download_dataset()
 unzip_package()
 cars = pd.read_csv('./Car_Prices_Poland_Kaggle.csv')
 df = pd.DataFrame(cars)
 df = normalize_dataset(df)
 divide_dataset(df)
-get_statistics(df)
+
diff --git a/script-stats.py b/script-stats.py
new file mode 100755
index 0000000..258504e
--- /dev/null
+++ b/script-stats.py
@@ -0,0 +1,17 @@
+import pandas as pd
+
+print('--Full dataset stats--')
+cars = pd.read_csv('./Car_Prices_Poland_Kaggle.csv', encoding="utf-8")
+print(cars.describe(include='all'))
+
+print('Dev dataset stats')
+cars_dev = pd.read_csv('./Car_Prices_Poland_Kaggle_dev.csv', encoding="utf-8")
+print(cars_dev.describe(include='all'))
+
+print('# statystyki dla zbioru test')
+cars_test = pd.read_csv('./Car_Prices_Poland_Kaggle_test.csv', encoding="utf-8")
+print(cars_test.describe(include='all'))
+
+print('# statystyki dla zbioru train')
+cars_train = pd.read_csv('./Car_Prices_Poland_Kaggle_train.csv', encoding="utf-8")
+print(cars_train.describe(include='all'))
diff --git a/stats-docker.sh b/stats-docker.sh
new file mode 100755
index 0000000..ceb586e
--- /dev/null
+++ b/stats-docker.sh
@@ -0,0 +1,12 @@
+
+echo 'Total elements in Car Prices Poland dataset:'
+wc -l ./Car_Prices_Poland_Kaggle.csv
+
+echo 'Total elements in train dataset:'
+wc -l ./Car_Prices_Poland_Kaggle_train.csv 
+
+echo 'Total elements in test dataset:'
+wc -l ./Car_Prices_Poland_Kaggle_test.csv 
+
+echo 'Total elements in dev dataset:'
+wc -l ./Car_Prices_Poland_Kaggle_dev.csv