Dockerfile

2024-04-01 18:03:42 +02:00 · 2024-04-01 18:03:42 +02:00 · bdb0221253
commit bdb0221253
parent 06855fdd0e
2 changed files with 14 additions and 48 deletions
--- a/42
+++ b/42
@ -1,5 +1,10 @@
 pipeline {
-  agent any
+  agent { 
    dockerfile {
      filename 'Dockerfile'
      args '-u root'
    }
  }
  parameters {
    string (
@ -15,39 +20,16 @@ pipeline {
    )
  }
  environment {
    KAGGLE_USERNAME = "${params.KAGGLE_USERNAME}"
    KAGGLE_KEY = "${params.KAGGLE_KEY}"
  }
  stages {
    stage('Clone Repository') {
      steps {
        git branch: 'main', url: 'https://git.wmi.amu.edu.pl/s464913/ium_464913.git'
      }
    }
    stage('Download dataset') {
      steps {
        withEnv(["KAGGLE_USERNAME=${params.KAGGLE_USERNAME}", "KAGGLE_KEY=${params.KAGGLE_KEY}"]) {
          sh 'kaggle datasets download -d mlg-ulb/creditcardfraud'
          sh 'unzip -o creditcardfraud.zip'
          sh 'rm creditcardfraud.zip'
        }
      }
    }
    stage('Run create-dataset script') {
        agent {
          dockerfile {
            reuseNode true
        }
      }
      steps {
        sh 'chmod +x create-dataset.py'
-        sh 'python3 ./create-dataset.py'
+        sh 'python3 ./create-dataset.py $KAGGLE_USERNAME'
      }
    }
    stage('Archive Artifacts') {
      steps {
        archiveArtifacts artifacts: 'data/*', onlyIfSuccessful: true
      }
    }
  }
--- a/create-dataset.py
+++ b/create-dataset.py
@ -91,26 +91,10 @@ def main():
    # download_kaggle_dataset()
    os.makedirs("data", exist_ok=True)
    os.system("ls -al")
    os.system("pwd")
    os.system("rm -rf /var/lib/jenkins/workspace/z-s464913-create-dataset-1/data/")
-    df = load_data("creditcard.csv")
+    os.system("ls -l /var/lib/jenkins/workspace/z-s464913-create-dataset-1/")
    df = normalize_data(df)
    undersample_data, X_undersample, y_undersample = create_undersample_data(df)
    X_train_undersample, X_test_undersample, y_train_undersample, y_test_undersample = (
        split_undersample_data(X_undersample, y_undersample)
    )
    save_undersample_data(
        undersample_data,
        X_train_undersample,
        X_test_undersample,
        y_train_undersample,
        y_test_undersample,
    )
    X_train, X_test, y_train, y_test = split_whole_data(df)
    save_whole_data(df, X_train, X_test, y_train, y_test)
 if __name__ == "__main__":