diff --git a/model_creator.py b/model_creator.py index 398b3b9..9d5521a 100644 --- a/model_creator.py +++ b/model_creator.py @@ -29,8 +29,8 @@ def check_datasets_presence(): raise FileNotFoundError(dataset_2 + " not found") def datasets_preparation(): - df_1 = pd.read_csv("datasets/spotify_songs.csv") - df_2 = pd.read_csv("datasets/Spotify_Dataset.csv", sep=";") + df_1 = pd.read_csv("artifacts/spotify_songs.csv") + df_2 = pd.read_csv("artifacts/Spotify_Dataset.csv", sep=";") df_1 = df_1.dropna() df_2 = df_2.dropna() @@ -60,8 +60,8 @@ def datasets_preparation(): #df_1 = df_1.iloc[20:] - if "docker_test_dataset.csv" not in os.listdir("datasets"): - diff_df.to_csv("datasets/docker_test_dataset.csv", index=False) + if "docker_test_dataset.csv" not in os.listdir("artifacts"): + diff_df.to_csv("artifacts/docker_test_dataset.csv", index=False) result_df = pd.merge(df_1, df_2, on='track_name', how='inner') result_df = result_df.drop_duplicates(subset=['track_name'])