fuzzy-game-recommender/main.py

import pandas as pd
from fuzzy_controllers import fuzzy_controler_similiarity
from numpy import dot
from numpy.linalg import norm
import json
import multiprocessing
from tqdm.auto import tqdm
from sys import argv
import sys, getopt
import argparse
import random

def find_games_categorical_similarity(game_1: pd.DataFrame, game_2: pd.DataFrame) -> float:
    game_1_categorical = set(game_1['all_categorical'].tolist()[0])
    game_2_categorical = set(game_2['all_categorical'].tolist()[0])
    return round(len(game_1_categorical & game_2_categorical) / len(game_1_categorical | game_2_categorical), 2)


def find_games_numerical_similarity(game_1: pd.DataFrame, game_2: pd.DataFrame) -> float:
    game_1_popularity = float(game_1["fuzzy_popularity"].to_string(index=False))
    game_2_popularity = float(game_2["fuzzy_popularity"].to_string(index=False))
    return round(abs(game_1_popularity - game_2_popularity), 2)


def find_games_word_vector_distance(game_1: pd.DataFrame, game_2: pd.DataFrame) -> float:
    game_1_vector = game_1['all_categorical_vector'].tolist()[0]
    game_2_vector = game_2['all_categorical_vector'].tolist()[0]
    return round(dot(game_1_vector, game_2_vector) / (norm(game_1_vector) * norm(game_2_vector)), 2)


def calculate_similarities(game_title, title_list, df, test=False):
    if game_title in title_list:
        title_list.remove(game_title)

    args_list = []
    for compared_title in title_list:
        args_list.append((game_title, compared_title, df))

    similarities = []
    # call the function for each item in parallel with multiprocessing
    with multiprocessing.Pool() as pool:
        for result in pool.starmap(compare_games, tqdm(args_list, total=len(args_list), desc='Searching')):
            similarities.append(result)

    all_games = []
    for title, similarity in zip(title_list, similarities):
        all_games.append({
                    "title": title,
                    "similarity": similarity
                    })

    sorted_games = sorted(all_games, key=lambda k: k['similarity'], reverse=True)
    if (test): return sorted_games[:20]
    print("\n ==== Top 20 most similar games: ====")
    for game in sorted_games[:20]:
        print(f"- {game['title']}")
    save_results(game_title=game_title, game_list=sorted_games)

def save_results(game_title, game_list):
    print("The full list of similar games available in the /results directory\n")
    with open(f"results/similarity_list_{game_title.lower().replace(' ', '_')}.txt", 'w+') as fp:
        json.dump(game_list, fp)

def compare_games(title_1, title_2, df, show_graph=False):
    game_1 = df.loc[df['name'] == title_1]
    game_2 = df.loc[df['name'] == title_2]

    categorical_similarity = find_games_categorical_similarity(game_1=game_1, game_2=game_2)
    numerical_difference = find_games_numerical_similarity(game_1=game_1, game_2=game_2)
    word_vector_distance = find_games_word_vector_distance(game_1=game_1, game_2=game_2)
    similarity_score = fuzzy_controler_similiarity(categorical_data=categorical_similarity,
                                                   numerical_data=numerical_difference,
                                                   vector_distance=word_vector_distance, show_graph=show_graph)
    return similarity_score

def get_game_info_from_df(data_games, game_title):
  finded_game = data_games.loc[data_games["name"] == game_title]
  # print(finded_game)
  result_dict = {
    "title" : finded_game["name"].values[0],
    "price" : finded_game["price"].values[0],
    "all_categorical" : finded_game["all_categorical"].values[0],
  }
  return result_dict


def get_game_info(data_game):
  # finded_game = data_games.loc[data_games["name"] == game_title]
  # print(finded_game)
  result_dict = {
    "title" : data_game["name"],
    "price" : data_game["price"],
    "all_categorical" : data_game["all_categorical"],
  }
  return result_dict

def main(argv):
    df = pd.read_pickle('data/games_processed_vectorized.csv')
    title_list = df["name"].values.tolist()

    test_mode = False
    random_mode = False

    opts, args = getopt.getopt(argv, "r:", ["pres"])
    for opt, arg in opts:
        if "--pres" == opt: 
            test_mode = True
        if "-r" == opt: 
            random_mode = arg
    if (True == test_mode):
        game_list = ["Call of Duty®: Modern Warfare® 2", "Project CARS", "DayZ", "STAR WARS™ Jedi Knight - Mysteries of the Sith™", "Overcooked"]
        if (random_mode): game_list = [random.choice(title_list)]
        result_dict = {"results": []}
        for item in game_list:
            titles_results = calculate_similarities(game_title=item, title_list=title_list, df=df, test=test_mode)
            game_result = get_game_info_from_df(df, item)
            game_result["fuzzy_similiar"] = [get_game_info_from_df(df, title_item["title"]) for title_item in titles_results[:10]]
            result_dict["results"].append(game_result)
        with open("results/result.json", "w", encoding="UTF-8") as outfile:
          json.dump(result_dict, outfile, ensure_ascii=False)

    if (False == test_mode):
        while True:
            print("Welcome to Fuzzy Game Reccomender!\nType in a game title and we will find the most similar games from our database")
            title = input("Enter the title or type 'exit' to leave: ")
            if title == "exit":
                break
            else:
                calculate_similarities(game_title=title, title_list=title_list, df=df)


if __name__ == '__main__':
  main(sys.argv[1:])
Add notebook contents (plus a bit) as .py files 2023-01-27 18:26:45 +01:00			`import pandas as pd`
			`from fuzzy_controllers import fuzzy_controler_similiarity`
			`from numpy import dot`
			`from numpy.linalg import norm`
add functionality to compare a game to the whole db 2023-01-29 14:00:16 +01:00			`import json`
Removed prints, new datasets, multiprocessing 2023-01-29 17:25:12 +01:00			`import multiprocessing`
add random 2023-02-02 01:12:00 +01:00			`from tqdm.auto import tqdm`
presentation of results 2023-02-01 23:57:24 +01:00			`from sys import argv`
			`import sys, getopt`
			`import argparse`
add random 2023-02-02 01:12:00 +01:00			`import random`
Add notebook contents (plus a bit) as .py files 2023-01-27 18:26:45 +01:00
			`def find_games_categorical_similarity(game_1: pd.DataFrame, game_2: pd.DataFrame) -> float:`
			`game_1_categorical = set(game_1['all_categorical'].tolist()[0])`
			`game_2_categorical = set(game_2['all_categorical'].tolist()[0])`
			`return round(len(game_1_categorical & game_2_categorical) / len(game_1_categorical \| game_2_categorical), 2)`


			`def find_games_numerical_similarity(game_1: pd.DataFrame, game_2: pd.DataFrame) -> float:`
			`game_1_popularity = float(game_1["fuzzy_popularity"].to_string(index=False))`
			`game_2_popularity = float(game_2["fuzzy_popularity"].to_string(index=False))`
			`return round(abs(game_1_popularity - game_2_popularity), 2)`


			`def find_games_word_vector_distance(game_1: pd.DataFrame, game_2: pd.DataFrame) -> float:`
			`game_1_vector = game_1['all_categorical_vector'].tolist()[0]`
			`game_2_vector = game_2['all_categorical_vector'].tolist()[0]`
			`return round(dot(game_1_vector, game_2_vector) / (norm(game_1_vector) * norm(game_2_vector)), 2)`

Removed prints, new datasets, multiprocessing 2023-01-29 17:25:12 +01:00
presentation of results 2023-02-01 23:57:24 +01:00			`def calculate_similarities(game_title, title_list, df, test=False):`
Removed prints, new datasets, multiprocessing 2023-01-29 17:25:12 +01:00			`if game_title in title_list:`
			`title_list.remove(game_title)`

			`args_list = []`
add functionality to compare a game to the whole db 2023-01-29 14:00:16 +01:00			`for compared_title in title_list:`
Removed prints, new datasets, multiprocessing 2023-01-29 17:25:12 +01:00			`args_list.append((game_title, compared_title, df))`

			`similarities = []`
			`# call the function for each item in parallel with multiprocessing`
			`with multiprocessing.Pool() as pool:`
add random 2023-02-02 01:12:00 +01:00			`for result in pool.starmap(compare_games, tqdm(args_list, total=len(args_list), desc='Searching')):`
Removed prints, new datasets, multiprocessing 2023-01-29 17:25:12 +01:00			`similarities.append(result)`

			`all_games = []`
			`for title, similarity in zip(title_list, similarities):`
			`all_games.append({`
			`"title": title,`
			`"similarity": similarity`
			`})`

add functionality to compare a game to the whole db 2023-01-29 14:00:16 +01:00			`sorted_games = sorted(all_games, key=lambda k: k['similarity'], reverse=True)`
presentation of results 2023-02-01 23:57:24 +01:00			`if (test): return sorted_games[:20]`
add functionality to compare a game to the whole db 2023-01-29 14:00:16 +01:00			`print("\n ==== Top 20 most similar games: ====")`
			`for game in sorted_games[:20]:`
			`print(f"- {game['title']}")`
			`save_results(game_title=game_title, game_list=sorted_games)`

			`def save_results(game_title, game_list):`
			`print("The full list of similar games available in the /results directory\n")`
			`with open(f"results/similarity_list_{game_title.lower().replace(' ', '_')}.txt", 'w+') as fp:`
			`json.dump(game_list, fp)`
Add notebook contents (plus a bit) as .py files 2023-01-27 18:26:45 +01:00
Removed prints, new datasets, multiprocessing 2023-01-29 17:25:12 +01:00			`def compare_games(title_1, title_2, df, show_graph=False):`
Add notebook contents (plus a bit) as .py files 2023-01-27 18:26:45 +01:00			`game_1 = df.loc[df['name'] == title_1]`
			`game_2 = df.loc[df['name'] == title_2]`

			`categorical_similarity = find_games_categorical_similarity(game_1=game_1, game_2=game_2)`
			`numerical_difference = find_games_numerical_similarity(game_1=game_1, game_2=game_2)`
			`word_vector_distance = find_games_word_vector_distance(game_1=game_1, game_2=game_2)`
			`similarity_score = fuzzy_controler_similiarity(categorical_data=categorical_similarity,`
			`numerical_data=numerical_difference,`
			`vector_distance=word_vector_distance, show_graph=show_graph)`
			`return similarity_score`

presentation of results 2023-02-01 23:57:24 +01:00			`def get_game_info_from_df(data_games, game_title):`
			`finded_game = data_games.loc[data_games["name"] == game_title]`
			`# print(finded_game)`
			`result_dict = {`
			`"title" : finded_game["name"].values[0],`
			`"price" : finded_game["price"].values[0],`
			`"all_categorical" : finded_game["all_categorical"].values[0],`
			`}`
			`return result_dict`


			`def get_game_info(data_game):`
			`# finded_game = data_games.loc[data_games["name"] == game_title]`
			`# print(finded_game)`
			`result_dict = {`
			`"title" : data_game["name"],`
			`"price" : data_game["price"],`
			`"all_categorical" : data_game["all_categorical"],`
			`}`
			`return result_dict`

			`def main(argv):`
			`df = pd.read_pickle('data/games_processed_vectorized.csv')`
			`title_list = df["name"].values.tolist()`

			`test_mode = False`
add random 2023-02-02 01:12:00 +01:00			`random_mode = False`

			`opts, args = getopt.getopt(argv, "r:", ["pres"])`
presentation of results 2023-02-01 23:57:24 +01:00			`for opt, arg in opts:`
			`if "--pres" == opt:`
			`test_mode = True`
add random 2023-02-02 01:12:00 +01:00			`if "-r" == opt:`
			`random_mode = arg`
presentation of results 2023-02-01 23:57:24 +01:00			`if (True == test_mode):`
			`game_list = ["Call of Duty®: Modern Warfare® 2", "Project CARS", "DayZ", "STAR WARS™ Jedi Knight - Mysteries of the Sith™", "Overcooked"]`
add random 2023-02-02 01:12:00 +01:00			`if (random_mode): game_list = [random.choice(title_list)]`
presentation of results 2023-02-01 23:57:24 +01:00			`result_dict = {"results": []}`
			`for item in game_list:`
			`titles_results = calculate_similarities(game_title=item, title_list=title_list, df=df, test=test_mode)`
			`game_result = get_game_info_from_df(df, item)`
			`game_result["fuzzy_similiar"] = [get_game_info_from_df(df, title_item["title"]) for title_item in titles_results[:10]]`
			`result_dict["results"].append(game_result)`
			`with open("results/result.json", "w", encoding="UTF-8") as outfile:`
			`json.dump(result_dict, outfile, ensure_ascii=False)`

			`if (False == test_mode):`
			`while True:`
			`print("Welcome to Fuzzy Game Reccomender!\nType in a game title and we will find the most similar games from our database")`
			`title = input("Enter the title or type 'exit' to leave: ")`
			`if title == "exit":`
			`break`
			`else:`
			`calculate_similarities(game_title=title, title_list=title_list, df=df)`

Add notebook contents (plus a bit) as .py files 2023-01-27 18:26:45 +01:00
			`if __name__ == '__main__':`
presentation of results 2023-02-01 23:57:24 +01:00			`main(sys.argv[1:])`
Add notebook contents (plus a bit) as .py files 2023-01-27 18:26:45 +01:00
presentation of results 2023-02-01 23:57:24 +01:00