added model training

This commit is contained in:
s464953 2024-05-09 00:19:43 +02:00
parent cd670b95ea
commit 8e9f37aaff
1 changed files with 4 additions and 4 deletions

View File

@ -29,8 +29,8 @@ def check_datasets_presence():
raise FileNotFoundError(dataset_2 + " not found")
def datasets_preparation():
df_1 = pd.read_csv("datasets/spotify_songs.csv")
df_2 = pd.read_csv("datasets/Spotify_Dataset.csv", sep=";")
df_1 = pd.read_csv("artifacts/spotify_songs.csv")
df_2 = pd.read_csv("artifacts/Spotify_Dataset.csv", sep=";")
df_1 = df_1.dropna()
df_2 = df_2.dropna()
@ -60,8 +60,8 @@ def datasets_preparation():
#df_1 = df_1.iloc[20:]
if "docker_test_dataset.csv" not in os.listdir("datasets"):
diff_df.to_csv("datasets/docker_test_dataset.csv", index=False)
if "docker_test_dataset.csv" not in os.listdir("artifacts"):
diff_df.to_csv("artifacts/docker_test_dataset.csv", index=False)
result_df = pd.merge(df_1, df_2, on='track_name', how='inner')
result_df = result_df.drop_duplicates(subset=['track_name'])