diff --git a/Jenkinsfile b/Jenkinsfile index 20068da..a69b5b9 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -1,10 +1,28 @@ pipeline { agent any + + parameters { + string(name: 'KAGGLE_USERNAME', defaultValue: '', description: 'Kaggle username') + password(name: 'KAGGLE_KEY', defaultValue:'', description: 'Kaggle Key') + } stages { - stage('Stage 1') { + stage('Git Checkout') { steps { - echo 'zadziaƂaj prosze!' + git "https://git.wmi.amu.edu.pl/s464914/ium_464914.git" + } + } + stage('Cleanup') { + steps { + sh 'rm -rf artifacts' } } } + stage('Build') { + withEnv(["KAGGLE_USERNAME=${params.KAGGLE_USERNAME}", + "KAGGLE_KEY=${params.KAGGLE_KEY}" ]) { + + sh 'bash ./get_dataset.sh' + } + } + } \ No newline at end of file diff --git a/get_dataset.sh b/get_dataset.sh new file mode 100644 index 0000000..ad3e7e4 --- /dev/null +++ b/get_dataset.sh @@ -0,0 +1,31 @@ +#!/bin/bash + +pip install kaggle --upgrade +kaggle datasets download -d nasa/meteorite-landings + +unzip -o meteorite-landings.zip + +###Zmienne### + +train_ratio=0.8 +test_val_ratio=0.5 + +##Przetwrazanie pliku## + +shuf meteorite-landings.csv -o shuffled-meteorite-landings.csv + +total_lines=$(wc -l < shuffled-meteorite-landings.csv) +train_lines=$(echo $total_lines*$train_ratio| bc) +train_lines=$(echo "($train_lines+0.5)/1" | bc ) + +test_lines=$(echo "($total_lines-$train_lines)*$test_val_ratio" | bc) +test_lines=$(echo "($test_lines+0.5)/1" | bc ) + +validation_lines=$(echo $total_lines-$train_lines-$test_lines | bc) + +head -n "$train_lines" shuffled-meteorite-landings.csv > "meteorite_train.csv" +tail -n $((test_lines+validation_lines)) shuffled-meteorite-landings.csv | head -n "$test_lines" > "meteorite_test.csv" +tail -n "$validation_lines" shuffled-meteorite-landings.csv > "meteorite_validation.csv" + +mkdir -p artifacts +mv meteorite-landings.csv shuffled-meteorite-landings.csv meteorite_test.csv meteorite_train.csv meteorite_validation.csv artifacts/ \ No newline at end of file