diff --git a/.idea/.gitignore b/.idea/.gitignore
new file mode 100644
index 0000000..13566b8
--- /dev/null
+++ b/.idea/.gitignore
@@ -0,0 +1,8 @@
+# Default ignored files
+/shelf/
+/workspace.xml
+# Editor-based HTTP Client requests
+/httpRequests/
+# Datasource local storage ignored files
+/dataSources/
+/dataSources.local.xml
diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml
new file mode 100644
index 0000000..105ce2d
--- /dev/null
+++ b/.idea/inspectionProfiles/profiles_settings.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/ium_464979.iml b/.idea/ium_464979.iml
new file mode 100644
index 0000000..d0876a7
--- /dev/null
+++ b/.idea/ium_464979.iml
@@ -0,0 +1,8 @@
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/misc.xml b/.idea/misc.xml
new file mode 100644
index 0000000..dc9ea49
--- /dev/null
+++ b/.idea/misc.xml
@@ -0,0 +1,4 @@
+
+
+
+
\ No newline at end of file
diff --git a/.idea/modules.xml b/.idea/modules.xml
new file mode 100644
index 0000000..4f48266
--- /dev/null
+++ b/.idea/modules.xml
@@ -0,0 +1,8 @@
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/.idea/vcs.xml b/.idea/vcs.xml
new file mode 100644
index 0000000..35eb1dd
--- /dev/null
+++ b/.idea/vcs.xml
@@ -0,0 +1,6 @@
+
+
+
+
+
+
\ No newline at end of file
diff --git a/Jenkinsfile b/Jenkinsfile
index 5cafbfe..973cf0d 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -1,29 +1,33 @@
pipeline {
- agent any
- //Definijuemy parametry, kt�re b�dzie mo�na poda� podczas wywo�ywania zadania
- parameters {
- string (
- defaultValue: 'Hello World!',
- description: 'Tekst, kt�rym chcesz przywita� �wiat',
- name: 'INPUT_TEXT',
- trim: false
- )
- }
- stages {
- stage('Hello') {
- steps {
- //Wypisz warto�� parametru w konsoli (To nie jest polecenie bash, tylko groovy!)
- echo "INPUT_TEXT: promoscan"
- //Wywo�aj w konsoli komend� "figlet", kt�ra generuje ASCI-art
- sh "figlet \"promoscan\" | tee output.txt"
- }
- }
- stage('Goodbye!') {
- steps {
- echo 'Goodbye!'
- //Zarchiwizuj wynik
- archiveArtifacts 'output.txt'
- }
- }
- }
-}
\ No newline at end of file
+ agent any
+
+ parameters {
+ properties([
+ parameters([
+ string(name: 'KAGGLE_DATASET_ID', defaultValue: '', description: 'Kaggle dataset'),
+ string(name: 'REPO_URL', defaultValue: '', description: 'Git Url'),
+ string(name: 'CUTOFF', defaultValue: '10000', description: 'Liczba wierszy do obcięcia ze zbioru danych')
+ ])
+ ])
+ }
+
+ stages {
+ stage('Clone Repository') {
+ steps {
+ git url: "${params.REPO_URL}"
+ }
+ }
+
+ stage('Download, Process, and Split Dataset') {
+ steps {
+ withEnv([
+ "KAGGLE_USERNAME=${env.KAGGLE_USERNAME}",
+ "KAGGLE_KEY=${env.KAGGLE_KEY}"
+ ]) {
+ sh "bash ./kuggle_download.sh ${params.KAGGLE_DATASET_ID} ${params.CUTOFF}"
+ sh './kuggle_download.sh'
+ }
+ }
+ }
+ }
+}
diff --git a/kuggle_download.sh b/kuggle_download.sh
new file mode 100644
index 0000000..20e4416
--- /dev/null
+++ b/kuggle_download.sh
@@ -0,0 +1,27 @@
+#!/bin/bash
+
+pip install kaggle
+
+kaggle datasets download -d $DATASET_ID
+
+unzip -o $DATASET_FILE
+
+shuf $DATASET_FILE > shuffled_dataset.csv
+
+split -l 80000 shuffled_dataset.csv train.csv
+split -l 10000 train.csv dev.csv
+mv shuffled_dataset.csv test.csv
+
+head -n 1000 train.csv > train_head.csv
+tail -n 1000 train.csv > train_tail.csv
+
+if [ -n "$CUTOFF" ]; then
+ head -n "$CUTOFF" train.csv > train_cutoff.csv
+fi
+
+tar -czf artifacts.tar.gz train.csv dev.csv test.csv train_head.csv train_tail.csv train_cutoff.csv
+
+rm $DATASET_FILE shuffled_dataset.csv
+
+echo "artifacts.tar.gz"
+