ium_07 sacred

2024-06-11 19:24:36 +02:00 · 2024-06-11 19:24:36 +02:00 · 19460ed294
commit 19460ed294
parent 2724f348b0
3 changed files with 11 additions and 25 deletions
--- a/8
+++ b/8
@ -13,6 +13,14 @@ pipeline {
      }
    }
    stage('Download datasets') {
      steps {
        withEnv(["KAGGLE_USERNAME=${params.KAGGLE_USERNAME}", "KAGGLE_KEY=${params.KAGGLE_KEY}"]) {
          sh "bash ./download_dataset.sh"
        }
      }
    }
    stage('Build and Run Experiments') {
      agent {
        dockerfile {
--- a/download_dataset.sh
+++ b/download_dataset.sh
@ -6,7 +6,5 @@ kaggle datasets download -d gulczas/spotify-dataset --force --unzip
 kaggle datasets download -d joebeachcapital/30000-spotify-songs --force --unzip
-echo "test test test"
+mkdir -p datasets
-
+mv Spotify_Dataset.csv spotify_songs.csv datesets/
 mkdir -p artifacts
 mv Spotify_Dataset.csv spotify_songs.csv artifacts/
--- a/sacred/sacred_model_creator.py
+++ b/sacred/sacred_model_creator.py
@ -12,31 +12,12 @@ from sacred import Experiment
 from sacred.observers import MongoObserver, FileObserver
 # Tworzenie eksperymentu
-ex = Experiment('123456')  # Zastąp '123456' swoim numerem indeksu
+ex = Experiment('464953')  # Zastąp '123456' swoim numerem indeksu
 # Dodanie obserwatorów
 ex.observers.append(MongoObserver(url='mongodb://admin:IUM_2021@tzietkiewicz.vm.wmi.amu.edu.pl:27017'))
 ex.observers.append(FileObserver('my_experiment_logs'))
 def download_dataset(dataset_address, destination_folder):
    api = KaggleApi()
    api.authenticate()
    api.dataset_download_files(dataset_address, path=destination_folder, unzip=True)
 def check_datasets_presence():
    dataset_1 = "Spotify_Dataset.csv"
    dataset_2 = "spotify_songs.csv"
    destination_folder = "datasets"
    if not os.path.exists(destination_folder):
        os.makedirs(destination_folder)
        print(f"Utworzono folder: {destination_folder}")
    else:
        print(f"Folder {destination_folder} już istnieje.")
    if dataset_1 not in os.listdir(destination_folder):
        download_dataset('gulczas/spotify-dataset', destination_folder)
    if dataset_2 not in os.listdir(destination_folder):
        download_dataset('joebeachcapital/30000-spotify-songs', destination_folder)
 def datasets_preparation():
    df_1 = pd.read_csv("datasets/spotify_songs.csv")
    df_2 = pd.read_csv("datasets/Spotify_Dataset.csv", sep=";")
@ -79,7 +60,6 @@ def config():
@ex.main
 def run_experiment(test_size, random_state, model_filename):
    check_datasets_presence()
    result_df = datasets_preparation()
    Y = result_df[['playlist_genre']]
    X = result_df.drop(columns='playlist_genre')