#!/bin/bash kaggle datasets download -d uciml/forest-cover-type-dataset unzip -o forest-cover-type-dataset.zip ###Zmienne### train_ratio=0.8 test_val_ratio=0.5 ##Przetwrazanie pliku## shuf covtype.csv -o forest.csv ##Cut off $1 rows## head -n $1 forest.csv > forest.csv total_lines=$(wc -l < forest.csv) train_lines=$(echo $total_lines*$train_ratio| bc) train_lines=$(echo "($train_lines+0.5)/1" | bc ) test_lines=$(echo "($total_lines-$train_lines)*$test_val_ratio" | bc) test_lines=$(echo "($test_lines+0.5)/1" | bc ) validation_lines=$(echo $total_lines-$train_lines-$test_lines | bc) head -n "$train_lines" forest.csv > "forest_train.csv" tail -n $((test_lines+validation_lines)) forest.csv | head -n "$test_lines" > "forest_test.csv" tail -n "$validation_lines" forest.csv > "forest_validation.csv" mkdir -p artifacts mv covtype.csv forest.csv forest_test.csv forest_train.csv forest_validation.csv artifacts/