Dockerfile

This commit is contained in:
Mateusz 2024-04-01 16:41:47 +02:00
parent 008914fd4f
commit 6e8d683268
2 changed files with 22 additions and 15 deletions

14
Jenkinsfile vendored
View File

@ -1,5 +1,10 @@
pipeline {
agent any
agent {
dockerfile {
filename 'Dockerfile'
args '-u root'
}
}
parameters {
string (
@ -19,8 +24,9 @@ pipeline {
stage('Run create-dataset script') {
steps {
withEnv (["KAGGLE_USERNAME=${params.KAGGLE_USERNAME}", "KAGGLE_KEY=${params.KAGGLE_KEY}"]) {
sh 'sudo rm -rf .kaggle'
sh 'ls -al'
sh 'mkdir /root/.kaggle'
sh 'echo "{\"username\":\"$KAGGLE_USERNAME\",\"key\":\"$KAGGLE_KEY\"}" > /root/.kaggle/kaggle.json'
sh 'chmod 600 /root/.kaggle/kaggle.json'
sh 'chmod +x create-dataset.py'
sh 'python3 ./create-dataset.py'
}
@ -28,7 +34,7 @@ pipeline {
}
stage('Archive Artifacts') {
steps {
archiveArtifacts artifacts: '/app/data/*', onlyIfSuccessful: true
archiveArtifacts artifacts: '/data/*', onlyIfSuccessful: true
}
}
}

View File

@ -9,6 +9,7 @@ from sklearn.model_selection import train_test_split
def download_kaggle_dataset():
os.system("/root/.kaggle/kaggle.json")
kaggle = KaggleApi()
kaggle.authenticate()
kaggle.dataset_download_files("mlg-ulb/creditcardfraud", path="./", unzip=True)
@ -69,11 +70,11 @@ def save_undersample_data(
y_train_undersample,
y_test_undersample,
):
undersample_data.to_csv("data/undersample_data.csv", index=False)
X_train_undersample.to_csv("data/X_train_undersample.csv", index=False)
X_test_undersample.to_csv("data/X_test_undersample.csv", index=False)
y_train_undersample.to_csv("data/y_train_undersample.csv", index=False)
y_test_undersample.to_csv("data/y_test_undersample.csv", index=False)
undersample_data.to_csv("/data/undersample_data.csv", index=False)
X_train_undersample.to_csv("/data/X_train_undersample.csv", index=False)
X_test_undersample.to_csv("/data/X_test_undersample.csv", index=False)
y_train_undersample.to_csv("/data/y_train_undersample.csv", index=False)
y_test_undersample.to_csv("/data/y_test_undersample.csv", index=False)
def split_whole_data(df):
@ -87,16 +88,16 @@ def split_whole_data(df):
def save_whole_data(df, X_train, X_test, y_train, y_test):
df.to_csv("data/creditcard.csv", index=False)
X_train.to_csv("data/X_train.csv", index=False)
X_test.to_csv("data/X_test.csv", index=False)
y_train.to_csv("data/y_train.csv", index=False)
y_test.to_csv("data/y_test.csv", index=False)
df.to_csv("/data/creditcard.csv", index=False)
X_train.to_csv("/data/X_train.csv", index=False)
X_test.to_csv("/data/X_test.csv", index=False)
y_train.to_csv("/data/y_train.csv", index=False)
y_test.to_csv("/data/y_test.csv", index=False)
def main():
download_kaggle_dataset()
os.makedirs("data", exist_ok=True)
os.makedirs("/data", exist_ok=True)
df = load_data("creditcard.csv")
df = normalize_data(df)