fuzzy-game-recommender/main.py

84 lines
3.6 KiB
Python

import pandas as pd
from fuzzy_controllers import fuzzy_controler_similiarity
from numpy import dot
from numpy.linalg import norm
import json
import multiprocessing
import tqdm
def find_games_categorical_similarity(game_1: pd.DataFrame, game_2: pd.DataFrame) -> float:
game_1_categorical = set(game_1['all_categorical'].tolist()[0])
game_2_categorical = set(game_2['all_categorical'].tolist()[0])
return round(len(game_1_categorical & game_2_categorical) / len(game_1_categorical | game_2_categorical), 2)
def find_games_numerical_similarity(game_1: pd.DataFrame, game_2: pd.DataFrame) -> float:
game_1_popularity = float(game_1["fuzzy_popularity"].to_string(index=False))
game_2_popularity = float(game_2["fuzzy_popularity"].to_string(index=False))
return round(abs(game_1_popularity - game_2_popularity), 2)
def find_games_word_vector_distance(game_1: pd.DataFrame, game_2: pd.DataFrame) -> float:
game_1_vector = game_1['all_categorical_vector'].tolist()[0]
game_2_vector = game_2['all_categorical_vector'].tolist()[0]
return round(dot(game_1_vector, game_2_vector) / (norm(game_1_vector) * norm(game_2_vector)), 2)
def calculate_similarities(game_title, title_list, df):
if game_title in title_list:
title_list.remove(game_title)
args_list = []
for compared_title in title_list:
args_list.append((game_title, compared_title, df))
similarities = []
# call the function for each item in parallel with multiprocessing
with multiprocessing.Pool() as pool:
for result in pool.starmap(compare_games, tqdm.tqdm(args_list, total=len(args_list), desc='Searching')):
similarities.append(result)
all_games = []
for title, similarity in zip(title_list, similarities):
all_games.append({
"title": title,
"similarity": similarity
})
sorted_games = sorted(all_games, key=lambda k: k['similarity'], reverse=True)
print("\n ==== Top 20 most similar games: ====")
for game in sorted_games[:20]:
print(f"- {game['title']}")
save_results(game_title=game_title, game_list=sorted_games)
def save_results(game_title, game_list):
print("The full list of similar games available in the /results directory\n")
with open(f"results/similarity_list_{game_title.lower().replace(' ', '_')}.txt", 'w+') as fp:
json.dump(game_list, fp)
def compare_games(title_1, title_2, df, show_graph=False):
game_1 = df.loc[df['name'] == title_1]
game_2 = df.loc[df['name'] == title_2]
categorical_similarity = find_games_categorical_similarity(game_1=game_1, game_2=game_2)
numerical_difference = find_games_numerical_similarity(game_1=game_1, game_2=game_2)
word_vector_distance = find_games_word_vector_distance(game_1=game_1, game_2=game_2)
similarity_score = fuzzy_controler_similiarity(categorical_data=categorical_similarity,
numerical_data=numerical_difference,
vector_distance=word_vector_distance, show_graph=show_graph)
return similarity_score
if __name__ == '__main__':
df = pd.read_pickle('data/games_processed_vectorized.csv')
title_list = df["name"].values.tolist()
while True:
print("Welcome to Fuzzy Game Reccomender!\nType in a game title and we will find the most similar games from our database")
title = input("Enter the title or type 'exit' to leave: ")
if title == "exit":
break
else:
calculate_similarities(game_title=title, title_list=title_list, df=df)