Dockerfile

This commit is contained in:
Mateusz 2024-04-01 17:38:17 +02:00
parent 22deaa5791
commit 3cddbbfdb4
2 changed files with 10 additions and 5 deletions

5
Jenkinsfile vendored
View File

@ -28,8 +28,11 @@ pipeline {
stages { stages {
stage('Run create-dataset script') { stage('Run create-dataset script') {
steps { steps {
sh 'kaggle datasets download -d mlg-ulb/creditcardfraud'
sh 'unzip creditcardfraud.zip'
sh 'rm creditcardfraud.zip'
sh 'chmod +x create-dataset.py' sh 'chmod +x create-dataset.py'
sh 'python3 ./create-dataset.py' sh 'python3 ./create-dataset.py $KAGGLE_USERNAME'
} }
} }
stage('Archive Artifacts') { stage('Archive Artifacts') {

View File

@ -10,9 +10,11 @@ from sklearn.model_selection import train_test_split
def download_kaggle_dataset(): def download_kaggle_dataset():
os.system("kaggle datasets download -d mlg-ulb/creditcardfraud") os.environ["KAGGLE_USERNAME"] = "vskyper"
os.system("unzip creditcardfraud.zip") os.environ["KAGGLE_KEY"] = sys.argv[1]
os.system("rm creditcardfraud.zip") kaggle = KaggleApi()
kaggle.authenticate()
kaggle.dataset_download_files("mlg-ulb/creditcardfraud", path="./", unzip=True)
def load_data(name): def load_data(name):
@ -96,7 +98,7 @@ def save_whole_data(df, X_train, X_test, y_train, y_test):
def main(): def main():
download_kaggle_dataset() # download_kaggle_dataset()
os.makedirs("data", exist_ok=True) os.makedirs("data", exist_ok=True)
df = load_data("creditcard.csv") df = load_data("creditcard.csv")