From 19460ed29472e4376a93bb4654685a0808a29d1c Mon Sep 17 00:00:00 2001 From: Michal Gulczynski Date: Tue, 11 Jun 2024 19:24:36 +0200 Subject: [PATCH] ium_07 sacred --- Jenkinsfile_sacred | 8 ++++++++ download_dataset.sh | 6 ++---- sacred/sacred_model_creator.py | 22 +--------------------- 3 files changed, 11 insertions(+), 25 deletions(-) diff --git a/Jenkinsfile_sacred b/Jenkinsfile_sacred index 15a0940..28d4813 100644 --- a/Jenkinsfile_sacred +++ b/Jenkinsfile_sacred @@ -13,6 +13,14 @@ pipeline { } } + stage('Download datasets') { + steps { + withEnv(["KAGGLE_USERNAME=${params.KAGGLE_USERNAME}", "KAGGLE_KEY=${params.KAGGLE_KEY}"]) { + sh "bash ./download_dataset.sh" + } + } + } + stage('Build and Run Experiments') { agent { dockerfile { diff --git a/download_dataset.sh b/download_dataset.sh index 1c24c6e..3c99f94 100644 --- a/download_dataset.sh +++ b/download_dataset.sh @@ -6,7 +6,5 @@ kaggle datasets download -d gulczas/spotify-dataset --force --unzip kaggle datasets download -d joebeachcapital/30000-spotify-songs --force --unzip -echo "test test test" - -mkdir -p artifacts -mv Spotify_Dataset.csv spotify_songs.csv artifacts/ \ No newline at end of file +mkdir -p datasets +mv Spotify_Dataset.csv spotify_songs.csv datesets/ \ No newline at end of file diff --git a/sacred/sacred_model_creator.py b/sacred/sacred_model_creator.py index 5d6eb71..b88063f 100644 --- a/sacred/sacred_model_creator.py +++ b/sacred/sacred_model_creator.py @@ -12,31 +12,12 @@ from sacred import Experiment from sacred.observers import MongoObserver, FileObserver # Tworzenie eksperymentu -ex = Experiment('123456') # Zastąp '123456' swoim numerem indeksu +ex = Experiment('464953') # Zastąp '123456' swoim numerem indeksu # Dodanie obserwatorów ex.observers.append(MongoObserver(url='mongodb://admin:IUM_2021@tzietkiewicz.vm.wmi.amu.edu.pl:27017')) ex.observers.append(FileObserver('my_experiment_logs')) -def download_dataset(dataset_address, destination_folder): - api = KaggleApi() - api.authenticate() - api.dataset_download_files(dataset_address, path=destination_folder, unzip=True) - -def check_datasets_presence(): - dataset_1 = "Spotify_Dataset.csv" - dataset_2 = "spotify_songs.csv" - destination_folder = "datasets" - if not os.path.exists(destination_folder): - os.makedirs(destination_folder) - print(f"Utworzono folder: {destination_folder}") - else: - print(f"Folder {destination_folder} już istnieje.") - if dataset_1 not in os.listdir(destination_folder): - download_dataset('gulczas/spotify-dataset', destination_folder) - if dataset_2 not in os.listdir(destination_folder): - download_dataset('joebeachcapital/30000-spotify-songs', destination_folder) - def datasets_preparation(): df_1 = pd.read_csv("datasets/spotify_songs.csv") df_2 = pd.read_csv("datasets/Spotify_Dataset.csv", sep=";") @@ -79,7 +60,6 @@ def config(): @ex.main def run_experiment(test_size, random_state, model_filename): - check_datasets_presence() result_df = datasets_preparation() Y = result_df[['playlist_genre']] X = result_df.drop(columns='playlist_genre')