From 0cda5fe099bf1d300bff4c698fe840522a808880 Mon Sep 17 00:00:00 2001 From: s464953 Date: Sat, 23 Mar 2024 20:11:47 +0100 Subject: [PATCH] Added download script --- Jenkinsfile | 50 +++++++++++++++++++++------------------------ download_dataset.sh | 46 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 69 insertions(+), 27 deletions(-) create mode 100644 download_dataset.sh diff --git a/Jenkinsfile b/Jenkinsfile index ab8c194..1692276 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -1,27 +1,23 @@ -node { - stage('Preparation') { - properties([ - parameters([ - string( - defaultValue: 'tomaszzitkiewicz', - description: 'Kaggle username', - name: 'KAGGLE_USERNAME', - trim: false - ), - password( - defaultValue: '', - description: 'Kaggle token taken from kaggle.json file, as described in https://github.com/Kaggle/kaggle-api#api-credentials', - name: 'KAGGLE_KEY' - ) - ]) - ]) - } - stage('Build') { - // Run the maven build - withEnv(["KAGGLE_USERNAME=${params.KAGGLE_USERNAME}", - "KAGGLE_KEY=${params.KAGGLE_KEY}" ]) { - sh 'echo KAGGLE_USERNAME: $KAGGLE_USERNAME' - sh 'kaggle datasets list' - } - } -} \ No newline at end of file +pipeline { + agent any + + stages { + stage('Clone Repository') { + steps { + git 'https://git.wmi.amu.edu.pl/s464953/ium_464953.git' + } + } + stage('Run Script') { + steps { + script { + sh 'bash download_dataset.sh $KAGGLE_USERNAME $KAGGLE_KEY' + } + } + } + stage('Archive Artifacts') { + steps { + archiveArtifacts artifacts: 'artifacts/*', onlyIfSuccessful: true + } + } + } +} diff --git a/download_dataset.sh b/download_dataset.sh new file mode 100644 index 0000000..664f76c --- /dev/null +++ b/download_dataset.sh @@ -0,0 +1,46 @@ +#!/bin/bash + +pip install kaggle --upgrade + +kaggle_username=$1 +kaggle_key=$2 + +# Ustawienie zmiennych środowiskowych +export KAGGLE_USERNAME=$kaggle_username +export KAGGLE_KEY=$kaggle_key + +kaggle datasets download brunoalercon123/top-200-spotify-songs-dataset + +unzip top-200-spotify-songs-dataset.zip + +shuf Spotify-200-Songs.csv -o shuffled_spotify.csv + +head -n 100 shuffled_spotify.csv > subset1.csv +tail -n 100 shuffled_spotify.csv > subset2.csv + +cut -d ',' -f 1,2,3 shuffled_spotify.csv > trimmed_spotify.csv + +cut -d ',' -f 1,2,4,5,6 shuffled_spotify.csv > processed_spotify.csv + +echo "Shuffled dataset:" > results.txt +head shuffled_spotify.csv >> results.txt +echo "" >> results.txt + +echo "Subset 1:" >> results.txt +head subset1.csv >> results.txt +echo "" >> results.txt + +echo "Subset 2:" >> results.txt +head subset2.csv >> results.txt +echo "" >> results.txt + +echo "Trimmed dataset:" >> results.txt +head trimmed_spotify.csv >> results.txt +echo "" >> results.txt + +echo "Processed dataset:" >> results.txt +head processed_spotify.csv >> results.txt +echo "" >> results.txt + +mkdir -p artifacts +mv shuffled_spotify.csv subset1.csv subset2.csv trimmed_spotify.csv processed_spotify.csv results.txt artifacts/