diff --git a/main.py b/main.py index 4f3901f..3b66f33 100644 --- a/main.py +++ b/main.py @@ -2,7 +2,7 @@ import pandas as pd from fuzzy_controllers import fuzzy_controler_similiarity from numpy import dot from numpy.linalg import norm - +import json def find_games_categorical_similarity(game_1: pd.DataFrame, game_2: pd.DataFrame) -> float: game_1_categorical = set(game_1['all_categorical'].tolist()[0]) @@ -21,6 +21,24 @@ def find_games_word_vector_distance(game_1: pd.DataFrame, game_2: pd.DataFrame) game_2_vector = game_2['all_categorical_vector'].tolist()[0] return round(dot(game_1_vector, game_2_vector) / (norm(game_1_vector) * norm(game_2_vector)), 2) +def calculate_similarities(game_title, title_list, df): + all_games = [] + for compared_title in title_list: + if game_title != compared_title: + all_games.append({ + "title": compared_title, + "similarity": compare_games(title_1=game_title, title_2=compared_title, df=df, show_graph=False) + }) + sorted_games = sorted(all_games, key=lambda k: k['similarity'], reverse=True) + print("\n ==== Top 20 most similar games: ====") + for game in sorted_games[:20]: + print(f"- {game['title']}") + save_results(game_title=game_title, game_list=sorted_games) + +def save_results(game_title, game_list): + print("The full list of similar games available in the /results directory\n") + with open(f"results/similarity_list_{game_title.lower().replace(' ', '_')}.txt", 'w+') as fp: + json.dump(game_list, fp) def compare_games(title_1: str, title_2: str, df: pd.DataFrame, show_graph: bool = False) -> float: game_1 = df.loc[df['name'] == title_1] @@ -29,9 +47,6 @@ def compare_games(title_1: str, title_2: str, df: pd.DataFrame, show_graph: bool categorical_similarity = find_games_categorical_similarity(game_1=game_1, game_2=game_2) numerical_difference = find_games_numerical_similarity(game_1=game_1, game_2=game_2) word_vector_distance = find_games_word_vector_distance(game_1=game_1, game_2=game_2) - print(f"Categorical similarity: {categorical_similarity}\nNumerical difference: {numerical_difference}\n" - f"Word vector distance: {word_vector_distance}") - similarity_score = fuzzy_controler_similiarity(categorical_data=categorical_similarity, numerical_data=numerical_difference, vector_distance=word_vector_distance, show_graph=show_graph) @@ -41,10 +56,13 @@ def compare_games(title_1: str, title_2: str, df: pd.DataFrame, show_graph: bool if __name__ == '__main__': df = pd.read_pickle('data/games_processed_vectorized.csv') - - while True: - title_1 = input("Enter title 1: ") - title_2 = input("Enter title 2: ") - similarity_score = compare_games(title_1=title_1, title_2=title_2, df=df, show_graph=False) - print(f'Similarity_score: {similarity_score}') + title_list = df["name"].values.tolist()[:2000] + run_program = True + while run_program: + print("Welcome to Fuzzy Game Reccomender!\nType in a game title and we will find the most similar games from our database") + title = input("Enter the title or type 'exit' to leave: ") + if title == "exit": + run_program = False + else: + calculate_similarities(game_title=title, title_list=title_list, df=df) diff --git a/process_dataset.py b/process_dataset.py index f36426e..6d1e1e9 100644 --- a/process_dataset.py +++ b/process_dataset.py @@ -35,7 +35,7 @@ def replace_with_vector(row, w2v): if __name__ == '__main__': df = pd.read_csv('data/games.csv') - + df = df.drop_duplicates(subset=['name']) df['positive_percentage'] = df.apply( lambda row: calculate_positive_percentage(row.positive_ratings, row.negative_ratings), axis=1) df['owners'] = df.apply(lambda row: owners_average_max_min(row.owners), axis=1) diff --git a/results/.gitkeep b/results/.gitkeep new file mode 100644 index 0000000..e69de29