Dockerfile

This commit is contained in:
Mateusz 2024-04-01 18:03:42 +02:00
parent 06855fdd0e
commit bdb0221253
2 changed files with 14 additions and 48 deletions

42
Jenkinsfile vendored
View File

@ -1,5 +1,10 @@
pipeline { pipeline {
agent any agent {
dockerfile {
filename 'Dockerfile'
args '-u root'
}
}
parameters { parameters {
string ( string (
@ -15,39 +20,16 @@ pipeline {
) )
} }
environment {
KAGGLE_USERNAME = "${params.KAGGLE_USERNAME}"
KAGGLE_KEY = "${params.KAGGLE_KEY}"
}
stages { stages {
stage('Clone Repository') {
steps {
git branch: 'main', url: 'https://git.wmi.amu.edu.pl/s464913/ium_464913.git'
}
}
stage('Download dataset') {
steps {
withEnv(["KAGGLE_USERNAME=${params.KAGGLE_USERNAME}", "KAGGLE_KEY=${params.KAGGLE_KEY}"]) {
sh 'kaggle datasets download -d mlg-ulb/creditcardfraud'
sh 'unzip -o creditcardfraud.zip'
sh 'rm creditcardfraud.zip'
}
}
}
stage('Run create-dataset script') { stage('Run create-dataset script') {
agent {
dockerfile {
reuseNode true
}
}
steps { steps {
sh 'chmod +x create-dataset.py' sh 'chmod +x create-dataset.py'
sh 'python3 ./create-dataset.py' sh 'python3 ./create-dataset.py $KAGGLE_USERNAME'
}
}
stage('Archive Artifacts') {
steps {
archiveArtifacts artifacts: 'data/*', onlyIfSuccessful: true
} }
} }
} }

View File

@ -91,26 +91,10 @@ def main():
# download_kaggle_dataset() # download_kaggle_dataset()
os.makedirs("data", exist_ok=True) os.makedirs("data", exist_ok=True)
os.system("ls -al")
os.system("pwd") os.system("pwd")
os.system("rm -rf /var/lib/jenkins/workspace/z-s464913-create-dataset-1/data/")
df = load_data("creditcard.csv") os.system("ls -l /var/lib/jenkins/workspace/z-s464913-create-dataset-1/")
df = normalize_data(df)
undersample_data, X_undersample, y_undersample = create_undersample_data(df)
X_train_undersample, X_test_undersample, y_train_undersample, y_test_undersample = (
split_undersample_data(X_undersample, y_undersample)
)
save_undersample_data(
undersample_data,
X_train_undersample,
X_test_undersample,
y_train_undersample,
y_test_undersample,
)
X_train, X_test, y_train, y_test = split_whole_data(df)
save_whole_data(df, X_train, X_test, y_train, y_test)
if __name__ == "__main__": if __name__ == "__main__":