Jenkins

2024-03-24 11:31:18 +01:00 · 2024-03-24 11:31:18 +01:00 · 37cdcc397f
commit 37cdcc397f
parent 52d2aa8a79
2 changed files with 54 additions and 30 deletions
--- a/62
+++ b/62
@ -1,33 +1,39 @@
 pipeline {
-    agent any
-    parameters {
-     string (
-          defaultValue: 'vskyper',
-          description: 'Kaggle username',
-          name: 'KAGGLE_USERNAME',
-          trim: false
-        )
-        password (
-          defaultValue: '',
-          description: 'Kaggle token taken from kaggle.json file, as described in https://github.com/Kaggle/kaggle-api#api-credentials',
-          name: 'KAGGLE_KEY',
-        )
-   }
-    stages {
-        stage('Clone Repository') {
-            steps {
-                git branch: 'main', url: 'https://git.wmi.amu.edu.pl/s464913/ium_464913.git'
-            }
-        }
-        stage('Download dataset') {
-          steps {
-            script {
-              withEnv (["KAGGLE_USERNAME=${params.KAGGLE_USERNAME}", "KAGGLE_KEY=${params.KAGGLE_KEY}"]) {
-                sh 'chmod +x download_dataset.sh'
-                sh './download_dataset.sh'
-              }
-            }
+  agent any
+  
+  parameters {
+    string (
+      defaultValue: 'vskyper',
+      description: 'Kaggle username',
+      name: 'KAGGLE_USERNAME',
+      trim: false
+    )
+    password (
+      defaultValue: '',
+      description: 'Kaggle token taken from kaggle.json file, as described in https://github.com/Kaggle/kaggle-api#api-credentials',
+      name: 'KAGGLE_KEY',
+    )
+  }
+  stages {
+    stage('Clone Repository') {
+      steps {
+        git branch: 'main', url: 'https://git.wmi.amu.edu.pl/s464913/ium_464913.git'
+      }
+    }
+    stage('Download dataset') {
+      steps {
+        script {
+          withEnv (["KAGGLE_USERNAME=${params.KAGGLE_USERNAME}", "KAGGLE_KEY=${params.KAGGLE_KEY}"]) {
+            sh 'chmod +x download_dataset.sh'
+            sh './download_dataset.sh'
          }
        }
+      }
    }
+    stage('Archive artifacts') {
+      steps {
+        archiveArtifacts artifacts: 'data/*', onlyIfSuccessful: true
+      }
+    }
+  }
 }
--- a/download_dataset.sh
+++ b/download_dataset.sh
@ -1,7 +1,25 @@
 #!/bin/bash

+# Install the Kaggle API
 pip install kaggle
-
+# Download the dataset from Kaggle
 kaggle datasets download -d mlg-ulb/creditcardfraud

-unzip -o creditcardfraud.zip
+# Unzip the dataset
+unzip -o creditcardfraud.zip
+# Remove the zip file
+rm creditcardfraud.zip
+
+# Shuffle the dataset
+shuf creditcard.csv > creditcard_shuf.csv
+# Remove the original dataset
+rm creditcard.csv
+
+# Split the dataset into training and testing
+head -n 10000 creditcard_shuf.csv > creditcard_train.csv
+tail -n +10001 creditcard_shuf.csv > creditcard_test.csv
+
+# Create a directory for the data
+mkdir -p data
+# Move the datasets to the data directory
+mv creditcard_shuf.csv creditcard_train.csv creditcard_test.csv data/