2023-02-03 00:42:23 +01:00
5 changed files with 82758 additions and 9 deletions
--- a/Fuzzy_presentation.ipynb
+++ b/Fuzzy_presentation.ipynb
--- a/README.md
+++ b/README.md
@ -5,8 +5,14 @@
    pip install -r requirements.txt
    python main.py
 #### To run the project in presentation mode:
    python main.py --pres
 it will generate .json file which can be presented by running all cells of `Fuzzy_presentation.ipynb`
 Processed dataset files are already provided, but can be created from the base ``games.csv`` file by running:
    python process_dataset.py
 If no ``GoogleNews-vectors-negative300.bin`` file is present, only ``games_processed.csv`` will be created.
--- a/data/steam_data.csv
+++ b/data/steam_data.csv
--- a/main.py
+++ b/main.py
@ -5,6 +5,9 @@ from numpy.linalg import norm
 import json
 import multiprocessing
 import tqdm
 from sys import argv
 import sys, getopt
 import argparse
 def find_games_categorical_similarity(game_1: pd.DataFrame, game_2: pd.DataFrame) -> float:
@ -25,7 +28,7 @@ def find_games_word_vector_distance(game_1: pd.DataFrame, game_2: pd.DataFrame)
    return round(dot(game_1_vector, game_2_vector) / (norm(game_1_vector) * norm(game_2_vector)), 2)
-def calculate_similarities(game_title, title_list, df):
+def calculate_similarities(game_title, title_list, df, test=False):
    if game_title in title_list:
        title_list.remove(game_title)
@ -47,6 +50,7 @@ def calculate_similarities(game_title, title_list, df):
                    })
    sorted_games = sorted(all_games, key=lambda k: k['similarity'], reverse=True)
    if (test): return sorted_games[:20]
    print("\n ==== Top 20 most similar games: ====")
    for game in sorted_games[:20]:
        print(f"- {game['title']}")
@ -69,11 +73,48 @@ def compare_games(title_1, title_2, df, show_graph=False):
                                                   vector_distance=word_vector_distance, show_graph=show_graph)
    return similarity_score
 def get_game_info_from_df(data_games, game_title):
  finded_game = data_games.loc[data_games["name"] == game_title]
  # print(finded_game)
  result_dict = {
    "title" : finded_game["name"].values[0],
    "price" : finded_game["price"].values[0],
    "all_categorical" : finded_game["all_categorical"].values[0],
  }
  return result_dict
 if __name__ == '__main__':
 def get_game_info(data_game):
  # finded_game = data_games.loc[data_games["name"] == game_title]
  # print(finded_game)
  result_dict = {
    "title" : data_game["name"],
    "price" : data_game["price"],
    "all_categorical" : data_game["all_categorical"],
  }
  return result_dict
 def main(argv):
    df = pd.read_pickle('data/games_processed_vectorized.csv')
    title_list = df["name"].values.tolist()
    test_mode = False
    opts, args = getopt.getopt(argv, "", ["pres"])
    for opt, arg in opts:
        if "--pres" == opt: 
            test_mode = True
    if (True == test_mode):
        game_list = ["Call of Duty®: Modern Warfare® 2", "Project CARS", "DayZ", "STAR WARS™ Jedi Knight - Mysteries of the Sith™", "Overcooked"]
        result_dict = {"results": []}
        for item in game_list:
            titles_results = calculate_similarities(game_title=item, title_list=title_list, df=df, test=test_mode)
            game_result = get_game_info_from_df(df, item)
            game_result["fuzzy_similiar"] = [get_game_info_from_df(df, title_item["title"]) for title_item in titles_results[:10]]
            result_dict["results"].append(game_result)
        with open("results/result.json", "w", encoding="UTF-8") as outfile:
          json.dump(result_dict, outfile, ensure_ascii=False)
    if (False == test_mode):
        while True:
            print("Welcome to Fuzzy Game Reccomender!\nType in a game title and we will find the most similar games from our database")
            title = input("Enter the title or type 'exit' to leave: ")
@ -81,3 +122,9 @@ if __name__ == '__main__':
                break
            else:
                calculate_similarities(game_title=title, title_list=title_list, df=df)
 if __name__ == '__main__':
  main(sys.argv[1:])
--- a/results/result.json
+++ b/results/result.json