import pandas as pd from fuzzy_controllers import fuzzy_controler_similiarity from numpy import dot from numpy.linalg import norm import json import multiprocessing from tqdm.auto import tqdm from sys import argv import sys, getopt import argparse import random def find_games_categorical_similarity(game_1: pd.DataFrame, game_2: pd.DataFrame) -> float: game_1_categorical = set(game_1['all_categorical'].tolist()[0]) game_2_categorical = set(game_2['all_categorical'].tolist()[0]) return round(len(game_1_categorical & game_2_categorical) / len(game_1_categorical | game_2_categorical), 2) def find_games_numerical_similarity(game_1: pd.DataFrame, game_2: pd.DataFrame) -> float: game_1_popularity = float(game_1["fuzzy_popularity"].to_string(index=False)) game_2_popularity = float(game_2["fuzzy_popularity"].to_string(index=False)) return round(abs(game_1_popularity - game_2_popularity), 2) def find_games_word_vector_distance(game_1: pd.DataFrame, game_2: pd.DataFrame) -> float: game_1_vector = game_1['all_categorical_vector'].tolist()[0] game_2_vector = game_2['all_categorical_vector'].tolist()[0] return round(dot(game_1_vector, game_2_vector) / (norm(game_1_vector) * norm(game_2_vector)), 2) def calculate_similarities(game_title, title_list, df, test=False): if game_title in title_list: title_list.remove(game_title) args_list = [] for compared_title in title_list: args_list.append((game_title, compared_title, df)) similarities = [] # call the function for each item in parallel with multiprocessing with multiprocessing.Pool() as pool: for result in pool.starmap(compare_games, tqdm(args_list, total=len(args_list), desc='Searching')): similarities.append(result) all_games = [] for title, similarity in zip(title_list, similarities): all_games.append({ "title": title, "similarity": similarity }) sorted_games = sorted(all_games, key=lambda k: k['similarity'], reverse=True) if (test): return sorted_games[:20] print("\n ==== Top 20 most similar games: ====") for game in sorted_games[:20]: print(f"- {game['title']}") save_results(game_title=game_title, game_list=sorted_games) def save_results(game_title, game_list): print("The full list of similar games available in the /results directory\n") with open(f"results/similarity_list_{game_title.lower().replace(' ', '_')}.txt", 'w+') as fp: json.dump(game_list, fp) def compare_games(title_1, title_2, df, show_graph=False): game_1 = df.loc[df['name'] == title_1] game_2 = df.loc[df['name'] == title_2] categorical_similarity = find_games_categorical_similarity(game_1=game_1, game_2=game_2) numerical_difference = find_games_numerical_similarity(game_1=game_1, game_2=game_2) word_vector_distance = find_games_word_vector_distance(game_1=game_1, game_2=game_2) similarity_score = fuzzy_controler_similiarity(categorical_data=categorical_similarity, numerical_data=numerical_difference, vector_distance=word_vector_distance, show_graph=show_graph) return similarity_score def get_game_info_from_df(data_games, game_title): finded_game = data_games.loc[data_games["name"] == game_title] # print(finded_game) result_dict = { "title" : finded_game["name"].values[0], "price" : finded_game["price"].values[0], "all_categorical" : finded_game["all_categorical"].values[0], } return result_dict def get_game_info(data_game): # finded_game = data_games.loc[data_games["name"] == game_title] # print(finded_game) result_dict = { "title" : data_game["name"], "price" : data_game["price"], "all_categorical" : data_game["all_categorical"], } return result_dict def main(argv): df = pd.read_pickle('data/games_processed_vectorized.csv') title_list = df["name"].values.tolist() test_mode = False random_mode = False eval_mode = False eval_random_mode = False opts, args = getopt.getopt(argv, "r:", ["pres", "eval", "evalrandom"]) for opt, arg in opts: if "--pres" == opt: test_mode = True if "--eval" == opt: eval_mode = True if "--evalrandom" == opt: eval_random_mode = True if "-r" == opt: random_mode = arg if (True == test_mode): game_list = ["Call of Duty®: Modern Warfare® 2", "Project CARS", "DayZ", "STAR WARS™ Jedi Knight - Mysteries of the Sith™", "Overcooked"] if (random_mode): game_list = [random.choice(title_list)] if (eval_mode or eval_random_mode): game_list = [random.choice(title_list) for i in range(10)] result_dict = {"results": []} for item in game_list: if not eval_random_mode: titles_results = calculate_similarities(game_title=item, title_list=title_list, df=df, test=test_mode) if eval_random_mode: titles_results = [{"title": random.choice(title_list)} for i in range(10)] game_result = get_game_info_from_df(df, item) game_result["fuzzy_similiar"] = [get_game_info_from_df(df, title_item["title"]) for title_item in titles_results[:10]] result_dict["results"].append(game_result) with open("results/result.json", "w", encoding="UTF-8") as outfile: json.dump(result_dict, outfile, ensure_ascii=False) if (False == test_mode): while True: print("Welcome to Fuzzy Game Reccomender!\nType in a game title and we will find the most similar games from our database") title = input("Enter the title or type 'exit' to leave: ") if title == "exit": break else: calculate_similarities(game_title=title, title_list=title_list, df=df) if __name__ == '__main__': main(sys.argv[1:])