add script to download and process dataset
This commit is contained in:
parent
f529abac39
commit
d7110d7bf9
31
Jenkinsfile
vendored
31
Jenkinsfile
vendored
@ -1,14 +1,39 @@
|
||||
pipeline {
|
||||
agent any
|
||||
parameters {
|
||||
string (
|
||||
name: 'KAGGLE_USERNAME',
|
||||
defaultValue: 'ardenw',
|
||||
description: 'Kaggle username'
|
||||
)
|
||||
password (
|
||||
name: 'KAGGLE_KEY',
|
||||
defaultValue: '',
|
||||
description: 'Kaggle API key'
|
||||
)
|
||||
string (
|
||||
name: 'DATA_TRAIN_RATIO',
|
||||
defaultValue: '0.8',
|
||||
description: 'Train data ratio'
|
||||
)
|
||||
string (
|
||||
name: 'CUTOFF',
|
||||
defaultValue: '500',
|
||||
description: 'Cutoff value'
|
||||
)
|
||||
}
|
||||
stages {
|
||||
stage('Checkout repository') {
|
||||
steps {
|
||||
checkout scmGit(branches: [[name: '*/master']], extensions: [], userRemoteConfigs: [[credentialsId: 's464980_token', url: 'https://git.wmi.amu.edu.pl/s464980/IUM_s464980.git']])
|
||||
checkout scm
|
||||
}
|
||||
}
|
||||
stage('Download and process data') {
|
||||
stage('Download dataset') {
|
||||
steps {
|
||||
echo "Hello"
|
||||
withEnv(["USERNAME=${params.USERNAME}", "API_KEY=${params.API_KEY}", "DATA_TRAIN_RATIO=${params.DATA_TRAIN_RATIO}"]) {
|
||||
sh "chmod +x download_dataset.sh"
|
||||
sh "./download_dataset.sh $DATA_TRAIN_RATIO"
|
||||
archiveArtifacts artifacts: 'data.csv,train.csv,test.csv', onlyIfSuccessful: true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
17
download_dataset.sh
Normal file
17
download_dataset.sh
Normal file
@ -0,0 +1,17 @@
|
||||
#!/bin/bash
|
||||
|
||||
# download data from kaggle
|
||||
kaggle datasets download -p "https://www.kaggle.com/datasets/nikhil7280/student-performance-multiple-linear-regression/code" --unzip
|
||||
|
||||
# change dataset name to data.csv
|
||||
mv Student_Performance.csv data.csv
|
||||
|
||||
# cut off rows
|
||||
head -n "$1" data.csv > data.csv.tmp && mv data.csv.tmp data.csv
|
||||
|
||||
# get data size
|
||||
data_size=$(wc -l < data.csv)
|
||||
|
||||
# split data to train and test and save it to csv files
|
||||
head -n $(( $data_size * $1 )) data.csv > train.csv
|
||||
tail -n $(( $data_size * ( 1 - $1 ) )) data.csv > test.csv
|
3
get_stats.sh
Normal file
3
get_stats.sh
Normal file
@ -0,0 +1,3 @@
|
||||
#!/bin/bash
|
||||
|
||||
wc -l < data.csv > stats.txt
|
Loading…
Reference in New Issue
Block a user