diff --git a/.idea/.gitignore b/.idea/.gitignore
new file mode 100644
index 0000000..13566b8
--- /dev/null
+++ b/.idea/.gitignore
@@ -0,0 +1,8 @@
+# Default ignored files
+/shelf/
+/workspace.xml
+# Editor-based HTTP Client requests
+/httpRequests/
+# Datasource local storage ignored files
+/dataSources/
+/dataSources.local.xml
diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml
new file mode 100644
index 0000000..105ce2d
--- /dev/null
+++ b/.idea/inspectionProfiles/profiles_settings.xml
@@ -0,0 +1,6 @@
+<component name="InspectionProjectProfileManager">
+  <settings>
+    <option name="USE_PROJECT_PROFILE" value="false" />
+    <version value="1.0" />
+  </settings>
+</component>
\ No newline at end of file
diff --git a/.idea/ium_464979.iml b/.idea/ium_464979.iml
new file mode 100644
index 0000000..d0876a7
--- /dev/null
+++ b/.idea/ium_464979.iml
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="PYTHON_MODULE" version="4">
+  <component name="NewModuleRootManager">
+    <content url="file://$MODULE_DIR$" />
+    <orderEntry type="inheritedJdk" />
+    <orderEntry type="sourceFolder" forTests="false" />
+  </component>
+</module>
\ No newline at end of file
diff --git a/.idea/misc.xml b/.idea/misc.xml
new file mode 100644
index 0000000..dc9ea49
--- /dev/null
+++ b/.idea/misc.xml
@@ -0,0 +1,4 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10" project-jdk-type="Python SDK" />
+</project>
\ No newline at end of file
diff --git a/.idea/modules.xml b/.idea/modules.xml
new file mode 100644
index 0000000..4f48266
--- /dev/null
+++ b/.idea/modules.xml
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/.idea/ium_464979.iml" filepath="$PROJECT_DIR$/.idea/ium_464979.iml" />
+    </modules>
+  </component>
+</project>
\ No newline at end of file
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
new file mode 100644
index 0000000..35eb1dd
--- /dev/null
+++ b/.idea/vcs.xml
@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="VcsDirectoryMappings">
+    <mapping directory="" vcs="Git" />
+  </component>
+</project>
\ No newline at end of file
diff --git a/Jenkinsfile b/Jenkinsfile
index 5cafbfe..973cf0d 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -1,29 +1,33 @@
 pipeline {
-   agent any
-   //Definijuemy parametry, kt�re b�dzie mo�na poda� podczas wywo�ywania zadania
-   parameters {
-     string (
-         defaultValue: 'Hello World!',
-         description: 'Tekst, kt�rym chcesz przywita� �wiat',
-         name: 'INPUT_TEXT',
-         trim: false
-        )
-   }
-   stages {
-      stage('Hello') {
-         steps {
-            //Wypisz warto�� parametru w konsoli (To nie jest polecenie bash, tylko groovy!)
-            echo "INPUT_TEXT: promoscan"
-            //Wywo�aj w konsoli komend� "figlet", kt�ra generuje ASCI-art
-            sh "figlet \"promoscan\" | tee output.txt"
-         }
-      }
-      stage('Goodbye!') {
-         steps {
-            echo 'Goodbye!'
-            //Zarchiwizuj wynik
-            archiveArtifacts 'output.txt'
-         }
-      }
-   }
-}
\ No newline at end of file
+    agent any
+
+    parameters {
+	properties([
+    		parameters([
+        		string(name: 'KAGGLE_DATASET_ID', defaultValue: '', description: 'Kaggle dataset'),
+        		string(name: 'REPO_URL', defaultValue: '', description: 'Git Url'),
+        		string(name: 'CUTOFF', defaultValue: '10000', description: 'Liczba wierszy do obcięcia ze zbioru danych')
+    		])
+	])
+    }
+
+    stages {
+        stage('Clone Repository') {
+            steps {
+                git url: "${params.REPO_URL}"
+            }
+        }
+
+        stage('Download, Process, and Split Dataset') {
+            steps {
+                withEnv([
+                    "KAGGLE_USERNAME=${env.KAGGLE_USERNAME}", 
+                    "KAGGLE_KEY=${env.KAGGLE_KEY}"
+                ]) {
+                    sh "bash ./kuggle_download.sh ${params.KAGGLE_DATASET_ID} ${params.CUTOFF}"
+                    sh './kuggle_download.sh' 
+                }
+            }
+        }
+    }
+}
diff --git a/kuggle_download.sh b/kuggle_download.sh
new file mode 100644
index 0000000..20e4416
--- /dev/null
+++ b/kuggle_download.sh
@@ -0,0 +1,27 @@
+#!/bin/bash
+
+pip install kaggle
+
+kaggle datasets download -d $DATASET_ID
+
+unzip -o $DATASET_FILE
+
+shuf $DATASET_FILE > shuffled_dataset.csv
+
+split -l 80000 shuffled_dataset.csv train.csv
+split -l 10000 train.csv dev.csv
+mv shuffled_dataset.csv test.csv
+
+head -n 1000 train.csv > train_head.csv
+tail -n 1000 train.csv > train_tail.csv
+
+if [ -n "$CUTOFF" ]; then
+  head -n "$CUTOFF" train.csv > train_cutoff.csv
+fi
+
+tar -czf artifacts.tar.gz train.csv dev.csv test.csv train_head.csv train_tail.csv train_cutoff.csv
+
+rm $DATASET_FILE shuffled_dataset.csv
+
+echo "artifacts.tar.gz"
+