Dockerfile

This commit is contained in:
Mateusz 2024-04-01 18:04:49 +02:00
parent bdb0221253
commit 5f863f13b1
2 changed files with 48 additions and 15 deletions

40
Jenkinsfile vendored
View File

@ -1,10 +1,5 @@
pipeline { pipeline {
agent { agent any
dockerfile {
filename 'Dockerfile'
args '-u root'
}
}
parameters { parameters {
string ( string (
@ -20,16 +15,39 @@ pipeline {
) )
} }
environment { stages {
KAGGLE_USERNAME = "${params.KAGGLE_USERNAME}" stage('Clone Repository') {
KAGGLE_KEY = "${params.KAGGLE_KEY}" steps {
git branch: 'main', url: 'https://git.wmi.amu.edu.pl/s464913/ium_464913.git'
}
}
stage('Download dataset') {
steps {
withEnv(["KAGGLE_USERNAME=${params.KAGGLE_USERNAME}", "KAGGLE_KEY=${params.KAGGLE_KEY}"]) {
sh 'kaggle datasets download -d mlg-ulb/creditcardfraud'
sh 'unzip -o creditcardfraud.zip'
sh 'rm creditcardfraud.zip'
}
}
} }
stages {
stage('Run create-dataset script') { stage('Run create-dataset script') {
agent {
dockerfile {
reuseNode true
}
}
steps { steps {
sh 'chmod +x create-dataset.py' sh 'chmod +x create-dataset.py'
sh 'python3 ./create-dataset.py $KAGGLE_USERNAME' sh 'python3 ./create-dataset.py'
}
}
stage('Archive Artifacts') {
steps {
archiveArtifacts artifacts: 'data/*', onlyIfSuccessful: true
} }
} }
} }

View File

@ -91,10 +91,25 @@ def main():
# download_kaggle_dataset() # download_kaggle_dataset()
os.makedirs("data", exist_ok=True) os.makedirs("data", exist_ok=True)
os.system("pwd") os.system
os.system("rm -rf /var/lib/jenkins/workspace/z-s464913-create-dataset-1/data/")
os.system("ls -l /var/lib/jenkins/workspace/z-s464913-create-dataset-1/") df = load_data("creditcard.csv")
df = normalize_data(df)
undersample_data, X_undersample, y_undersample = create_undersample_data(df)
X_train_undersample, X_test_undersample, y_train_undersample, y_test_undersample = (
split_undersample_data(X_undersample, y_undersample)
)
save_undersample_data(
undersample_data,
X_train_undersample,
X_test_undersample,
y_train_undersample,
y_test_undersample,
)
X_train, X_test, y_train, y_test = split_whole_data(df)
save_whole_data(df, X_train, X_test, y_train, y_test)
if __name__ == "__main__": if __name__ == "__main__":