ium_07 sacred

This commit is contained in:
Michal Gulczynski 2024-06-11 19:24:36 +02:00
parent 2724f348b0
commit 19460ed294
3 changed files with 11 additions and 25 deletions

View File

@ -13,6 +13,14 @@ pipeline {
}
}
stage('Download datasets') {
steps {
withEnv(["KAGGLE_USERNAME=${params.KAGGLE_USERNAME}", "KAGGLE_KEY=${params.KAGGLE_KEY}"]) {
sh "bash ./download_dataset.sh"
}
}
}
stage('Build and Run Experiments') {
agent {
dockerfile {

View File

@ -6,7 +6,5 @@ kaggle datasets download -d gulczas/spotify-dataset --force --unzip
kaggle datasets download -d joebeachcapital/30000-spotify-songs --force --unzip
echo "test test test"
mkdir -p artifacts
mv Spotify_Dataset.csv spotify_songs.csv artifacts/
mkdir -p datasets
mv Spotify_Dataset.csv spotify_songs.csv datesets/

View File

@ -12,31 +12,12 @@ from sacred import Experiment
from sacred.observers import MongoObserver, FileObserver
# Tworzenie eksperymentu
ex = Experiment('123456') # Zastąp '123456' swoim numerem indeksu
ex = Experiment('464953') # Zastąp '123456' swoim numerem indeksu
# Dodanie obserwatorów
ex.observers.append(MongoObserver(url='mongodb://admin:IUM_2021@tzietkiewicz.vm.wmi.amu.edu.pl:27017'))
ex.observers.append(FileObserver('my_experiment_logs'))
def download_dataset(dataset_address, destination_folder):
api = KaggleApi()
api.authenticate()
api.dataset_download_files(dataset_address, path=destination_folder, unzip=True)
def check_datasets_presence():
dataset_1 = "Spotify_Dataset.csv"
dataset_2 = "spotify_songs.csv"
destination_folder = "datasets"
if not os.path.exists(destination_folder):
os.makedirs(destination_folder)
print(f"Utworzono folder: {destination_folder}")
else:
print(f"Folder {destination_folder} już istnieje.")
if dataset_1 not in os.listdir(destination_folder):
download_dataset('gulczas/spotify-dataset', destination_folder)
if dataset_2 not in os.listdir(destination_folder):
download_dataset('joebeachcapital/30000-spotify-songs', destination_folder)
def datasets_preparation():
df_1 = pd.read_csv("datasets/spotify_songs.csv")
df_2 = pd.read_csv("datasets/Spotify_Dataset.csv", sep=";")
@ -79,7 +60,6 @@ def config():
@ex.main
def run_experiment(test_size, random_state, model_filename):
check_datasets_presence()
result_df = datasets_preparation()
Y = result_df[['playlist_genre']]
X = result_df.drop(columns='playlist_genre')