Dockerfile

This commit is contained in:
Mateusz 2024-04-01 16:41:47 +02:00
parent 008914fd4f
commit 6e8d683268
2 changed files with 22 additions and 15 deletions

14
Jenkinsfile vendored
View File

@ -1,5 +1,10 @@
pipeline { pipeline {
agent any agent {
dockerfile {
filename 'Dockerfile'
args '-u root'
}
}
parameters { parameters {
string ( string (
@ -19,8 +24,9 @@ pipeline {
stage('Run create-dataset script') { stage('Run create-dataset script') {
steps { steps {
withEnv (["KAGGLE_USERNAME=${params.KAGGLE_USERNAME}", "KAGGLE_KEY=${params.KAGGLE_KEY}"]) { withEnv (["KAGGLE_USERNAME=${params.KAGGLE_USERNAME}", "KAGGLE_KEY=${params.KAGGLE_KEY}"]) {
sh 'sudo rm -rf .kaggle' sh 'mkdir /root/.kaggle'
sh 'ls -al' sh 'echo "{\"username\":\"$KAGGLE_USERNAME\",\"key\":\"$KAGGLE_KEY\"}" > /root/.kaggle/kaggle.json'
sh 'chmod 600 /root/.kaggle/kaggle.json'
sh 'chmod +x create-dataset.py' sh 'chmod +x create-dataset.py'
sh 'python3 ./create-dataset.py' sh 'python3 ./create-dataset.py'
} }
@ -28,7 +34,7 @@ pipeline {
} }
stage('Archive Artifacts') { stage('Archive Artifacts') {
steps { steps {
archiveArtifacts artifacts: '/app/data/*', onlyIfSuccessful: true archiveArtifacts artifacts: '/data/*', onlyIfSuccessful: true
} }
} }
} }

View File

@ -9,6 +9,7 @@ from sklearn.model_selection import train_test_split
def download_kaggle_dataset(): def download_kaggle_dataset():
os.system("/root/.kaggle/kaggle.json")
kaggle = KaggleApi() kaggle = KaggleApi()
kaggle.authenticate() kaggle.authenticate()
kaggle.dataset_download_files("mlg-ulb/creditcardfraud", path="./", unzip=True) kaggle.dataset_download_files("mlg-ulb/creditcardfraud", path="./", unzip=True)
@ -69,11 +70,11 @@ def save_undersample_data(
y_train_undersample, y_train_undersample,
y_test_undersample, y_test_undersample,
): ):
undersample_data.to_csv("data/undersample_data.csv", index=False) undersample_data.to_csv("/data/undersample_data.csv", index=False)
X_train_undersample.to_csv("data/X_train_undersample.csv", index=False) X_train_undersample.to_csv("/data/X_train_undersample.csv", index=False)
X_test_undersample.to_csv("data/X_test_undersample.csv", index=False) X_test_undersample.to_csv("/data/X_test_undersample.csv", index=False)
y_train_undersample.to_csv("data/y_train_undersample.csv", index=False) y_train_undersample.to_csv("/data/y_train_undersample.csv", index=False)
y_test_undersample.to_csv("data/y_test_undersample.csv", index=False) y_test_undersample.to_csv("/data/y_test_undersample.csv", index=False)
def split_whole_data(df): def split_whole_data(df):
@ -87,16 +88,16 @@ def split_whole_data(df):
def save_whole_data(df, X_train, X_test, y_train, y_test): def save_whole_data(df, X_train, X_test, y_train, y_test):
df.to_csv("data/creditcard.csv", index=False) df.to_csv("/data/creditcard.csv", index=False)
X_train.to_csv("data/X_train.csv", index=False) X_train.to_csv("/data/X_train.csv", index=False)
X_test.to_csv("data/X_test.csv", index=False) X_test.to_csv("/data/X_test.csv", index=False)
y_train.to_csv("data/y_train.csv", index=False) y_train.to_csv("/data/y_train.csv", index=False)
y_test.to_csv("data/y_test.csv", index=False) y_test.to_csv("/data/y_test.csv", index=False)
def main(): def main():
download_kaggle_dataset() download_kaggle_dataset()
os.makedirs("data", exist_ok=True) os.makedirs("/data", exist_ok=True)
df = load_data("creditcard.csv") df = load_data("creditcard.csv")
df = normalize_data(df) df = normalize_data(df)