2023-02-03 00:42:23 +01:00
5 changed files with 82758 additions and 9 deletions
--- a/Fuzzy_presentation.ipynb
+++ b/Fuzzy_presentation.ipynb
--- a/README.md
+++ b/README.md
@ -5,8 +5,14 @@
    pip install -r requirements.txt
    python main.py

+#### To run the project in presentation mode:
+
+    python main.py --pres
+it will generate .json file which can be presented by running all cells of `Fuzzy_presentation.ipynb`
+
 Processed dataset files are already provided, but can be created from the base ``games.csv`` file by running:

    python process_dataset.py

+
 If no ``GoogleNews-vectors-negative300.bin`` file is present, only ``games_processed.csv`` will be created.
--- a/data/steam_data.csv
+++ b/data/steam_data.csv
--- a/main.py
+++ b/main.py
@ -5,6 +5,9 @@ from numpy.linalg import norm
 import json
 import multiprocessing
 import tqdm
+from sys import argv
+import sys, getopt
+import argparse


 def find_games_categorical_similarity(game_1: pd.DataFrame, game_2: pd.DataFrame) -> float:
@ -25,7 +28,7 @@ def find_games_word_vector_distance(game_1: pd.DataFrame, game_2: pd.DataFrame)
    return round(dot(game_1_vector, game_2_vector) / (norm(game_1_vector) * norm(game_2_vector)), 2)


-def calculate_similarities(game_title, title_list, df):
+def calculate_similarities(game_title, title_list, df, test=False):
    if game_title in title_list:
        title_list.remove(game_title)

@ -47,6 +50,7 @@ def calculate_similarities(game_title, title_list, df):
                    })

    sorted_games = sorted(all_games, key=lambda k: k['similarity'], reverse=True)
+    if (test): return sorted_games[:20]
    print("\n ==== Top 20 most similar games: ====")
    for game in sorted_games[:20]:
        print(f"- {game['title']}")
@ -69,11 +73,48 @@ def compare_games(title_1, title_2, df, show_graph=False):
                                                   vector_distance=word_vector_distance, show_graph=show_graph)
    return similarity_score

+def get_game_info_from_df(data_games, game_title):
+  finded_game = data_games.loc[data_games["name"] == game_title]
+  # print(finded_game)
+  result_dict = {
+    "title" : finded_game["name"].values[0],
+    "price" : finded_game["price"].values[0],
+    "all_categorical" : finded_game["all_categorical"].values[0],
+  }
+  return result_dict

-if __name__ == '__main__':

+def get_game_info(data_game):
+  # finded_game = data_games.loc[data_games["name"] == game_title]
+  # print(finded_game)
+  result_dict = {
+    "title" : data_game["name"],
+    "price" : data_game["price"],
+    "all_categorical" : data_game["all_categorical"],
+  }
+  return result_dict
+
+def main(argv):
    df = pd.read_pickle('data/games_processed_vectorized.csv')
    title_list = df["name"].values.tolist()
+
+    test_mode = False
+    opts, args = getopt.getopt(argv, "", ["pres"])
+    for opt, arg in opts:
+        if "--pres" == opt: 
+            test_mode = True
+    if (True == test_mode):
+        game_list = ["Call of Duty®: Modern Warfare® 2", "Project CARS", "DayZ", "STAR WARS™ Jedi Knight - Mysteries of the Sith™", "Overcooked"]
+        result_dict = {"results": []}
+        for item in game_list:
+            titles_results = calculate_similarities(game_title=item, title_list=title_list, df=df, test=test_mode)
+            game_result = get_game_info_from_df(df, item)
+            game_result["fuzzy_similiar"] = [get_game_info_from_df(df, title_item["title"]) for title_item in titles_results[:10]]
+            result_dict["results"].append(game_result)
+        with open("results/result.json", "w", encoding="UTF-8") as outfile:
+          json.dump(result_dict, outfile, ensure_ascii=False)
+
+    if (False == test_mode):
        while True:
            print("Welcome to Fuzzy Game Reccomender!\nType in a game title and we will find the most similar games from our database")
            title = input("Enter the title or type 'exit' to leave: ")
@ -81,3 +122,9 @@ if __name__ == '__main__':
                break
            else:
                calculate_similarities(game_title=title, title_list=title_list, df=df)
+
+
+if __name__ == '__main__':
+  main(sys.argv[1:])
+
+    
--- a/results/result.json
+++ b/results/result.json