Dockerfile

This commit is contained in:
Mateusz 2024-04-01 17:59:41 +02:00
parent 9c6088e6f9
commit 28864b02c9
2 changed files with 47 additions and 13 deletions

42
Jenkinsfile vendored
View File

@ -1,10 +1,5 @@
pipeline {
agent {
dockerfile {
filename 'Dockerfile'
args '-u root'
}
}
agent any
parameters {
string (
@ -20,16 +15,39 @@ pipeline {
)
}
environment {
KAGGLE_USERNAME = "${params.KAGGLE_USERNAME}"
KAGGLE_KEY = "${params.KAGGLE_KEY}"
}
stages {
stage('Clone Repository') {
steps {
git branch: 'main', url: 'https://git.wmi.amu.edu.pl/s464913/ium_464913.git'
}
}
stage('Download dataset') {
steps {
withEnv(["KAGGLE_USERNAME=${params.KAGGLE_USERNAME}", "KAGGLE_KEY=${params.KAGGLE_KEY}"]) {
sh 'kaggle datasets download -d mlg-ulb/creditcardfraud'
sh 'unzip -o creditcardfraud.zip'
sh 'rm creditcardfraud.zip'
}
}
}
stage('Run create-dataset script') {
agent {
dockerfile {
reuseNode true
}
}
steps {
sh 'chmod +x create-dataset.py'
sh 'python3 ./create-dataset.py $KAGGLE_USERNAME'
sh 'python3 ./create-dataset.py'
}
}
stage('Archive Artifacts') {
steps {
archiveArtifacts artifacts: 'data/*', onlyIfSuccessful: true
}
}
}

View File

@ -91,7 +91,23 @@ def main():
# download_kaggle_dataset()
os.makedirs("data", exist_ok=True)
os.system("rm -rf data/*")
df = load_data("creditcard.csv")
df = normalize_data(df)
undersample_data, X_undersample, y_undersample = create_undersample_data(df)
X_train_undersample, X_test_undersample, y_train_undersample, y_test_undersample = (
split_undersample_data(X_undersample, y_undersample)
)
save_undersample_data(
undersample_data,
X_train_undersample,
X_test_undersample,
y_train_undersample,
y_test_undersample,
)
X_train, X_test, y_train, y_test = split_whole_data(df)
save_whole_data(df, X_train, X_test, y_train, y_test)
if __name__ == "__main__":