diff --git a/createDataset/Jenkinsfile b/createDataset/Jenkinsfile index f3b9169..af1cffc 100644 --- a/createDataset/Jenkinsfile +++ b/createDataset/Jenkinsfile @@ -20,25 +20,35 @@ pipeline { ) } stages { - stage('Run sh file') { + stage('Download dataset') { steps { checkout scm dir ('./createDataset') { sh 'ls -l' withEnv(["KAGGLE_USERNAME=${params.KAGGLE_USERNAME}", "KAGGLE_KEY=${params.KAGGLE_KEY}" ]) { - sh 'chmod +x ./datasetScript.sh' - sh './datasetScript.sh' + // sh 'chmod +x ./datasetScript.sh' + // sh './datasetScript.sh' + sh 'kaggle datasets download -d rishikeshkonapure/home-loan-approval' + sh 'unzip -o home-loan-approval.zip' } } } } - stage('Archive file') { - steps { - dir ('./createDataset') { - archiveArtifacts artifacts: 'loan_sanction_shuffled.csv', fingerprint: true\ - } - } - } + stage('Docker') { + steps { + def dockerImage = docker.build("docker-iamge", "./docker") + dockerImage.inside { + sh 'ls -l' + } + } + } + // stage('Archive file') { + // steps { + // dir ('./createDataset') { + // archiveArtifacts artifacts: 'loan_sanction_shuffled.csv', fingerprint: true\ + // } + // } + // } } } diff --git a/createDataset/createDataset.py b/createDataset/createDataset.py new file mode 100644 index 0000000..3963850 --- /dev/null +++ b/createDataset/createDataset.py @@ -0,0 +1,22 @@ +import pandas as pd +from sklearn.preprocessing import MinMaxScaler +from sklearn.model_selection import train_test_split +home_loan_train = pd.read_csv('loan_sanction_train.csv') +home_loan_test = pd.read_csv('loan_sanction_test.csv') + +home_loan_val_final, home_loan_test_final = train_test_split(home_loan_test, test_size=0.5, random_state=1) +home_loan_train_final = home_loan_train + +numeric_cols_train = home_loan_train_final.select_dtypes(include='number').columns +numeric_cols_test = home_loan_test_final.select_dtypes(include='number').columns +numeric_cols_val = home_loan_val_final.select_dtypes(include='number').columns + +scaler = MinMaxScaler() + +home_loan_train_final[numeric_cols_train] = scaler.fit_transform(home_loan_train_final[numeric_cols_train]) +home_loan_test_final[numeric_cols_test] = scaler.fit_transform(home_loan_test_final[numeric_cols_test]) +home_loan_val_final[numeric_cols_val] = scaler.fit_transform(home_loan_val_final[numeric_cols_val]) + +home_loan_train_final.to_csv('home_loan_train.csv', index=False) +home_loan_test_final.to_csv('home_loan_test.csv', index=False) +home_loan_val_final.to_csv('home_loan_val.csv', index=False) \ No newline at end of file