diff --git a/create-dataset.sh b/create-dataset.sh index 41102c3..c3f9279 100644 --- a/create-dataset.sh +++ b/create-dataset.sh @@ -1,29 +1,31 @@ -#!/bin/bash - -pip install kaggle - -kaggle datasets download -d syedanwarafridi/vehicle-sales-data - -unzip -o vehicle-sales-data.zip - -# Tasowanie -shuf car_prices.csv -o car_prices_shuf.csv - -# Podział danych na podzbiory -total_rows=$(wc -l < car_prices_shuf.csv) -test_dev_rows=$(( $1 * 2 )) - -head -n $1 car_prices_shuf.csv > car_prices_test.csv -head -n $test_dev_rows car_prices_shuf.csv | tail -n +$(( $1 + 1 )) > car_prices_dev.csv -tail -n +$(( $test_dev_rows + 1 )) car_prices_shuf.csv > car_prices_train.csv - -test_size=$(wc -l < car_prices_test.csv) -dev_size=$(wc -l < car_prices_dev.csv) -train_size=$(wc -l < car_prices_train.csv) -echo "Rozmiar zbioru testowego: $test_size" -echo "Rozmiar zbioru deweloperskiego: $dev_size" -echo "Rozmiar zbioru treningowego: $train_size" - -# Zapis artefaktów -mkdir -p data -mv car_prices.csv car_prices_shuf.csv car_prices_test.csv car_prices_dev.csv car_prices_train.csv data/ +#!/bin/bash + +export PATH=$PATH:/root/.local/bin + +pip install kaggle + +kaggle datasets download -d syedanwarafridi/vehicle-sales-data + +unzip -o vehicle-sales-data.zip + +# Tasowanie +shuf car_prices.csv -o car_prices_shuf.csv + +# Podział danych na podzbiory +total_rows=$(wc -l < car_prices_shuf.csv) +test_dev_rows=$(( $1 * 2 )) + +head -n $1 car_prices_shuf.csv > car_prices_test.csv +head -n $test_dev_rows car_prices_shuf.csv | tail -n +$(( $1 + 1 )) > car_prices_dev.csv +tail -n +$(( $test_dev_rows + 1 )) car_prices_shuf.csv > car_prices_train.csv + +test_size=$(wc -l < car_prices_test.csv) +dev_size=$(wc -l < car_prices_dev.csv) +train_size=$(wc -l < car_prices_train.csv) +echo "Rozmiar zbioru testowego: $test_size" +echo "Rozmiar zbioru deweloperskiego: $dev_size" +echo "Rozmiar zbioru treningowego: $train_size" + +# Zapis artefaktów +mkdir -p data +mv car_prices.csv car_prices_shuf.csv car_prices_test.csv car_prices_dev.csv car_prices_train.csv data/