Added new jenkinsfile and kuggle download script

This commit is contained in:
AWieczarek 2024-03-24 18:21:21 +01:00
parent e936d31fa5
commit e8f4387202

View File

@ -1,16 +1,16 @@
#!/bin/bash
# Pobieranie i rozpakowanie
echo $KAGGLE_USERNAME
echo $KAGGLE_KEY
pip install kaggle
kaggle datasets download -d thedevastator/1-5-million-beer-reviews-from-beer-advocate
unzip -o 1-5-million-beer-reviews-from-beer-advocate.zip
DATASET_FILE="beer_reviews.csv"
echo "------------------ Cut off top: ${1} rows ------------------"
head -n $1 $DATASET_FILE > cutoff_$DATASET_FILE
# Mieszanie i podział
echo "------------------ Split and shufle ------------------"
total_lines=$(tail -n +2 cutoff_$DATASET_FILE | wc -l)
train_lines=$((total_lines * 80 / 100))
@ -24,9 +24,11 @@ tail -n $((dev_lines + test_lines)) shuffled.csv | head -n $dev_lines > dev.csv
tail -n $test_lines shuffled.csv > test.csv
# Archiwizacja
echo "------------------ Archive ------------------"
tar -czf artifacts.tar.gz train.csv dev.csv test.csv
# Czyszczenie
echo "------------------ Clean ------------------"
rm cutoff_$DATASET_FILE shuffled.csv
echo "artifacts.tar.gz"