IUM_s464980/download_dataset.sh

17 lines
499 B
Bash
Raw Normal View History

#!/bin/bash
# download data from kaggle
2024-03-26 18:38:12 +01:00
kaggle datasets download -d nikhil7280/student-performance-multiple-linear-regression --unzip
# change dataset name to data.csv
mv Student_Performance.csv data.csv
# cut off rows
2024-03-26 18:41:21 +01:00
head -n $2 data.csv > data.csv.tmp && mv data.csv.tmp data.csv
# get data size
data_size=$(wc -l < data.csv)
# split data to train and test and save it to csv files
head -n $(( $data_size * $1 )) data.csv > train.csv
tail -n $(( $data_size * ( 1 - $1 ) )) data.csv > test.csv