2023-01-27 18:26:45 +01:00
|
|
|
import pandas as pd
|
|
|
|
from fuzzy_controllers import fuzzy_controler_similiarity
|
|
|
|
from numpy import dot
|
|
|
|
from numpy.linalg import norm
|
2023-01-29 14:00:16 +01:00
|
|
|
import json
|
2023-01-29 17:25:12 +01:00
|
|
|
import multiprocessing
|
|
|
|
import tqdm
|
2023-02-01 23:57:24 +01:00
|
|
|
from sys import argv
|
|
|
|
import sys, getopt
|
|
|
|
import argparse
|
2023-01-29 17:25:12 +01:00
|
|
|
|
2023-01-27 18:26:45 +01:00
|
|
|
|
|
|
|
def find_games_categorical_similarity(game_1: pd.DataFrame, game_2: pd.DataFrame) -> float:
|
|
|
|
game_1_categorical = set(game_1['all_categorical'].tolist()[0])
|
|
|
|
game_2_categorical = set(game_2['all_categorical'].tolist()[0])
|
|
|
|
return round(len(game_1_categorical & game_2_categorical) / len(game_1_categorical | game_2_categorical), 2)
|
|
|
|
|
|
|
|
|
|
|
|
def find_games_numerical_similarity(game_1: pd.DataFrame, game_2: pd.DataFrame) -> float:
|
|
|
|
game_1_popularity = float(game_1["fuzzy_popularity"].to_string(index=False))
|
|
|
|
game_2_popularity = float(game_2["fuzzy_popularity"].to_string(index=False))
|
|
|
|
return round(abs(game_1_popularity - game_2_popularity), 2)
|
|
|
|
|
|
|
|
|
|
|
|
def find_games_word_vector_distance(game_1: pd.DataFrame, game_2: pd.DataFrame) -> float:
|
|
|
|
game_1_vector = game_1['all_categorical_vector'].tolist()[0]
|
|
|
|
game_2_vector = game_2['all_categorical_vector'].tolist()[0]
|
|
|
|
return round(dot(game_1_vector, game_2_vector) / (norm(game_1_vector) * norm(game_2_vector)), 2)
|
|
|
|
|
2023-01-29 17:25:12 +01:00
|
|
|
|
2023-02-01 23:57:24 +01:00
|
|
|
def calculate_similarities(game_title, title_list, df, test=False):
|
2023-01-29 17:25:12 +01:00
|
|
|
if game_title in title_list:
|
|
|
|
title_list.remove(game_title)
|
|
|
|
|
|
|
|
args_list = []
|
2023-01-29 14:00:16 +01:00
|
|
|
for compared_title in title_list:
|
2023-01-29 17:25:12 +01:00
|
|
|
args_list.append((game_title, compared_title, df))
|
|
|
|
|
|
|
|
similarities = []
|
|
|
|
# call the function for each item in parallel with multiprocessing
|
|
|
|
with multiprocessing.Pool() as pool:
|
|
|
|
for result in pool.starmap(compare_games, tqdm.tqdm(args_list, total=len(args_list), desc='Searching')):
|
|
|
|
similarities.append(result)
|
|
|
|
|
|
|
|
all_games = []
|
|
|
|
for title, similarity in zip(title_list, similarities):
|
|
|
|
all_games.append({
|
|
|
|
"title": title,
|
|
|
|
"similarity": similarity
|
|
|
|
})
|
|
|
|
|
2023-01-29 14:00:16 +01:00
|
|
|
sorted_games = sorted(all_games, key=lambda k: k['similarity'], reverse=True)
|
2023-02-01 23:57:24 +01:00
|
|
|
if (test): return sorted_games[:20]
|
2023-01-29 14:00:16 +01:00
|
|
|
print("\n ==== Top 20 most similar games: ====")
|
|
|
|
for game in sorted_games[:20]:
|
|
|
|
print(f"- {game['title']}")
|
|
|
|
save_results(game_title=game_title, game_list=sorted_games)
|
|
|
|
|
|
|
|
def save_results(game_title, game_list):
|
|
|
|
print("The full list of similar games available in the /results directory\n")
|
|
|
|
with open(f"results/similarity_list_{game_title.lower().replace(' ', '_')}.txt", 'w+') as fp:
|
|
|
|
json.dump(game_list, fp)
|
2023-01-27 18:26:45 +01:00
|
|
|
|
2023-01-29 17:25:12 +01:00
|
|
|
def compare_games(title_1, title_2, df, show_graph=False):
|
2023-01-27 18:26:45 +01:00
|
|
|
game_1 = df.loc[df['name'] == title_1]
|
|
|
|
game_2 = df.loc[df['name'] == title_2]
|
|
|
|
|
|
|
|
categorical_similarity = find_games_categorical_similarity(game_1=game_1, game_2=game_2)
|
|
|
|
numerical_difference = find_games_numerical_similarity(game_1=game_1, game_2=game_2)
|
|
|
|
word_vector_distance = find_games_word_vector_distance(game_1=game_1, game_2=game_2)
|
|
|
|
similarity_score = fuzzy_controler_similiarity(categorical_data=categorical_similarity,
|
|
|
|
numerical_data=numerical_difference,
|
|
|
|
vector_distance=word_vector_distance, show_graph=show_graph)
|
|
|
|
return similarity_score
|
|
|
|
|
2023-02-01 23:57:24 +01:00
|
|
|
def get_game_info_from_df(data_games, game_title):
|
|
|
|
finded_game = data_games.loc[data_games["name"] == game_title]
|
|
|
|
# print(finded_game)
|
|
|
|
result_dict = {
|
|
|
|
"title" : finded_game["name"].values[0],
|
|
|
|
"price" : finded_game["price"].values[0],
|
|
|
|
"all_categorical" : finded_game["all_categorical"].values[0],
|
|
|
|
}
|
|
|
|
return result_dict
|
|
|
|
|
|
|
|
|
|
|
|
def get_game_info(data_game):
|
|
|
|
# finded_game = data_games.loc[data_games["name"] == game_title]
|
|
|
|
# print(finded_game)
|
|
|
|
result_dict = {
|
|
|
|
"title" : data_game["name"],
|
|
|
|
"price" : data_game["price"],
|
|
|
|
"all_categorical" : data_game["all_categorical"],
|
|
|
|
}
|
|
|
|
return result_dict
|
|
|
|
|
|
|
|
def main(argv):
|
|
|
|
df = pd.read_pickle('data/games_processed_vectorized.csv')
|
|
|
|
title_list = df["name"].values.tolist()
|
|
|
|
|
|
|
|
test_mode = False
|
|
|
|
opts, args = getopt.getopt(argv, "", ["pres"])
|
|
|
|
for opt, arg in opts:
|
|
|
|
if "--pres" == opt:
|
|
|
|
test_mode = True
|
|
|
|
if (True == test_mode):
|
|
|
|
game_list = ["Call of Duty®: Modern Warfare® 2", "Project CARS", "DayZ", "STAR WARS™ Jedi Knight - Mysteries of the Sith™", "Overcooked"]
|
|
|
|
result_dict = {"results": []}
|
|
|
|
for item in game_list:
|
|
|
|
titles_results = calculate_similarities(game_title=item, title_list=title_list, df=df, test=test_mode)
|
|
|
|
game_result = get_game_info_from_df(df, item)
|
|
|
|
game_result["fuzzy_similiar"] = [get_game_info_from_df(df, title_item["title"]) for title_item in titles_results[:10]]
|
|
|
|
result_dict["results"].append(game_result)
|
|
|
|
with open("results/result.json", "w", encoding="UTF-8") as outfile:
|
|
|
|
json.dump(result_dict, outfile, ensure_ascii=False)
|
|
|
|
|
|
|
|
if (False == test_mode):
|
|
|
|
while True:
|
|
|
|
print("Welcome to Fuzzy Game Reccomender!\nType in a game title and we will find the most similar games from our database")
|
|
|
|
title = input("Enter the title or type 'exit' to leave: ")
|
|
|
|
if title == "exit":
|
|
|
|
break
|
|
|
|
else:
|
|
|
|
calculate_similarities(game_title=title, title_list=title_list, df=df)
|
|
|
|
|
2023-01-27 18:26:45 +01:00
|
|
|
|
|
|
|
if __name__ == '__main__':
|
2023-02-01 23:57:24 +01:00
|
|
|
main(sys.argv[1:])
|
2023-01-27 18:26:45 +01:00
|
|
|
|
2023-02-01 23:57:24 +01:00
|
|
|
|