ium_470623/download.sh

11 lines
713 B
Bash
Raw Normal View History

kaggle datasets download -d csafrit2/steel-industry-energy-consumption --force
unzip -o steel-industry-energy-consumption.zip
head -n $CUTOFF Steel_industry_data.csv | tail -n +2 | cut -d, -f8 --complement | shuf > steel_industry_data_shuffled.csv
number_of_lines=$(wc -l steel_industry_data_shuffled.csv | awk '{print $1}')
test_len=$((number_of_lines/10))
dev_len=$((number_of_lines/10))
head -n $test_len steel_industry_data_shuffled.csv > steel_industry_data_test.csv
tail -n +$((test_len+1)) steel_industry_data_shuffled.csv | head -n $dev_len > steel_industry_data_dev.csv
tail -n +$((test_len+dev_len+1)) steel_industry_data_shuffled.csv > steel_industry_data_train.csv
rm steel_industry_data_shuffled.csv