2024-03-26 18:29:52 +01:00
|
|
|
#!/bin/bash
|
|
|
|
|
|
|
|
# download data from kaggle
|
2024-03-26 18:38:12 +01:00
|
|
|
kaggle datasets download -d nikhil7280/student-performance-multiple-linear-regression --unzip
|
2024-03-26 18:29:52 +01:00
|
|
|
|
|
|
|
# change dataset name to data.csv
|
|
|
|
mv Student_Performance.csv data.csv
|
|
|
|
# cut off rows
|
2024-03-26 18:46:58 +01:00
|
|
|
head -n "$2" data.csv > data.csv.tmp && mv data.csv.tmp data.csv
|
2024-03-26 18:29:52 +01:00
|
|
|
|
|
|
|
# get data size
|
|
|
|
data_size=$(wc -l < data.csv)
|
|
|
|
|
|
|
|
# split data to train and test and save it to csv files
|
2024-03-26 18:45:26 +01:00
|
|
|
head -n $(( $data_size * $1/100 )) data.csv > train.csv
|
|
|
|
tail -n $(( $data_size * ( 1 - $1/100 ) )) data.csv > test.csv
|