diff --git a/Jenkinsfile b/Jenkinsfile index 726ee2b..fe983ba 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -25,10 +25,10 @@ pipeline { stage('Get data save artifacts') { steps { - sh 'chmod +x -R ./data_prep.sh' withEnv(["KAGGLE_USERNAME=${params.KAGGLE_USERNAME}", "KAGGLE_KEY=${params.KAGGLE_KEY}"]) { sh 'echo KAGGLE_USERNAME: $KAGGLE_USERNAME' + archiveArtifacts artifacts: 'apps_train.csv, apps_test.csv, apps_validate.csv' } } } diff --git a/data_expl.py b/data_expl.py index 276d265..d0fd912 100644 --- a/data_expl.py +++ b/data_expl.py @@ -32,4 +32,16 @@ data["Installs"] = (data["Installs"] - min_value) / (max_value - min_value) # splitting into sets np.random.seed(123) train, validate, test = np.split(data.sample(frac=1, random_state=42), [int(.6*len(data)), int(.8*len(data))]) -print(f"Data shape: {data.shape}\nTrain shape: {train.shape}\nTest shape: {test.shape}\nValidation shape:{validate.shape}") \ No newline at end of file +print(f"Data shape: {data.shape}\nTrain shape: {train.shape}\nTest shape: {test.shape}\nValidation shape:{validate.shape}") + +f = open("apps_train.csv", "w") +f.write(train) +f.close() + +f = open("apps_test.csv", "w") +f.write(test) +f.close() + +f = open("apps_validate.csv", "w") +f.write(validate) +f.close() \ No newline at end of file