added path to create-dataset.sh

This commit is contained in:
s464962 2024-03-30 16:19:03 +01:00
parent 5a9627d6ae
commit 4c82e6e301

View File

@ -1,29 +1,31 @@
#!/bin/bash #!/bin/bash
pip install kaggle export PATH=$PATH:/root/.local/bin
kaggle datasets download -d syedanwarafridi/vehicle-sales-data pip install kaggle
unzip -o vehicle-sales-data.zip kaggle datasets download -d syedanwarafridi/vehicle-sales-data
# Tasowanie unzip -o vehicle-sales-data.zip
shuf car_prices.csv -o car_prices_shuf.csv
# Tasowanie
# Podział danych na podzbiory shuf car_prices.csv -o car_prices_shuf.csv
total_rows=$(wc -l < car_prices_shuf.csv)
test_dev_rows=$(( $1 * 2 )) # Podział danych na podzbiory
total_rows=$(wc -l < car_prices_shuf.csv)
head -n $1 car_prices_shuf.csv > car_prices_test.csv test_dev_rows=$(( $1 * 2 ))
head -n $test_dev_rows car_prices_shuf.csv | tail -n +$(( $1 + 1 )) > car_prices_dev.csv
tail -n +$(( $test_dev_rows + 1 )) car_prices_shuf.csv > car_prices_train.csv head -n $1 car_prices_shuf.csv > car_prices_test.csv
head -n $test_dev_rows car_prices_shuf.csv | tail -n +$(( $1 + 1 )) > car_prices_dev.csv
test_size=$(wc -l < car_prices_test.csv) tail -n +$(( $test_dev_rows + 1 )) car_prices_shuf.csv > car_prices_train.csv
dev_size=$(wc -l < car_prices_dev.csv)
train_size=$(wc -l < car_prices_train.csv) test_size=$(wc -l < car_prices_test.csv)
echo "Rozmiar zbioru testowego: $test_size" dev_size=$(wc -l < car_prices_dev.csv)
echo "Rozmiar zbioru deweloperskiego: $dev_size" train_size=$(wc -l < car_prices_train.csv)
echo "Rozmiar zbioru treningowego: $train_size" echo "Rozmiar zbioru testowego: $test_size"
echo "Rozmiar zbioru deweloperskiego: $dev_size"
# Zapis artefaktów echo "Rozmiar zbioru treningowego: $train_size"
mkdir -p data
mv car_prices.csv car_prices_shuf.csv car_prices_test.csv car_prices_dev.csv car_prices_train.csv data/ # Zapis artefaktów
mkdir -p data
mv car_prices.csv car_prices_shuf.csv car_prices_test.csv car_prices_dev.csv car_prices_train.csv data/