Added download script

This commit is contained in:
s464953 2024-03-23 20:11:47 +01:00
parent d83fc31434
commit 0cda5fe099
2 changed files with 69 additions and 27 deletions

48
Jenkinsfile vendored
View File

@ -1,27 +1,23 @@
node {
stage('Preparation') {
properties([
parameters([
string(
defaultValue: 'tomaszzitkiewicz',
description: 'Kaggle username',
name: 'KAGGLE_USERNAME',
trim: false
),
password(
defaultValue: '',
description: 'Kaggle token taken from kaggle.json file, as described in https://github.com/Kaggle/kaggle-api#api-credentials',
name: 'KAGGLE_KEY'
)
])
])
}
stage('Build') {
// Run the maven build
withEnv(["KAGGLE_USERNAME=${params.KAGGLE_USERNAME}",
"KAGGLE_KEY=${params.KAGGLE_KEY}" ]) {
sh 'echo KAGGLE_USERNAME: $KAGGLE_USERNAME'
sh 'kaggle datasets list'
}
}
pipeline {
agent any
stages {
stage('Clone Repository') {
steps {
git 'https://git.wmi.amu.edu.pl/s464953/ium_464953.git'
}
}
stage('Run Script') {
steps {
script {
sh 'bash download_dataset.sh $KAGGLE_USERNAME $KAGGLE_KEY'
}
}
}
stage('Archive Artifacts') {
steps {
archiveArtifacts artifacts: 'artifacts/*', onlyIfSuccessful: true
}
}
}
}

46
download_dataset.sh Normal file
View File

@ -0,0 +1,46 @@
#!/bin/bash
pip install kaggle --upgrade
kaggle_username=$1
kaggle_key=$2
# Ustawienie zmiennych środowiskowych
export KAGGLE_USERNAME=$kaggle_username
export KAGGLE_KEY=$kaggle_key
kaggle datasets download brunoalercon123/top-200-spotify-songs-dataset
unzip top-200-spotify-songs-dataset.zip
shuf Spotify-200-Songs.csv -o shuffled_spotify.csv
head -n 100 shuffled_spotify.csv > subset1.csv
tail -n 100 shuffled_spotify.csv > subset2.csv
cut -d ',' -f 1,2,3 shuffled_spotify.csv > trimmed_spotify.csv
cut -d ',' -f 1,2,4,5,6 shuffled_spotify.csv > processed_spotify.csv
echo "Shuffled dataset:" > results.txt
head shuffled_spotify.csv >> results.txt
echo "" >> results.txt
echo "Subset 1:" >> results.txt
head subset1.csv >> results.txt
echo "" >> results.txt
echo "Subset 2:" >> results.txt
head subset2.csv >> results.txt
echo "" >> results.txt
echo "Trimmed dataset:" >> results.txt
head trimmed_spotify.csv >> results.txt
echo "" >> results.txt
echo "Processed dataset:" >> results.txt
head processed_spotify.csv >> results.txt
echo "" >> results.txt
mkdir -p artifacts
mv shuffled_spotify.csv subset1.csv subset2.csv trimmed_spotify.csv processed_spotify.csv results.txt artifacts/