Merge pull request 'compare_to_all_games' (#1 ) from compare_to_all_games into master

Reviewed-on: s449288/fuzzy-game-recommender#1
add evaluation and baseline
2023-02-03 00:42:21 +01:00 · 2023-02-03 00:41:05 +01:00 · 2023-02-03 00:40:25 +01:00 · 2023-02-02 17:48:38 +01:00 · 2023-02-02 01:12:00 +01:00 · 2023-02-01 23:57:24 +01:00
7 changed files with 83634 additions and 15 deletions
--- a/Fuzzy_presentation.ipynb
+++ b/Fuzzy_presentation.ipynb
--- a/README.md
+++ b/README.md
@ -5,8 +5,23 @@
    pip install -r requirements.txt
    python main.py

+#### To run the project in presentation mode:
+
+    python main.py --pres
+it will generate .json file which can be presented by running all cells of `Fuzzy_presentation.ipynb`
+
+#### Random mode 
+
+    python main.py --pres -r True
+
+#### Evaluation mode
+
+    python main.py --pres --eval 
+generates result.json file with 10 random games and 10 recomendations for each game, results can be evaluated in `Fuzzy_presentation.ipynb` file, with Jaccard Similiarity
+
 Processed dataset files are already provided, but can be created from the base ``games.csv`` file by running:

    python process_dataset.py

+
 If no ``GoogleNews-vectors-negative300.bin`` file is present, only ``games_processed.csv`` will be created.
--- a/data/steam_data.csv
+++ b/data/steam_data.csv
--- a/doc/project_doc.pdf
+++ b/doc/project_doc.pdf
--- a/fuzzy_controllers.py
+++ b/fuzzy_controllers.py
@ -91,9 +91,9 @@ def fuzzy_controler_similiarity(categorical_data: str, numerical_data: str, vect
    FSS.set_crisp_output_value("big", 1)

    # TODO: add Word_vector_distance to rules
-    R1 = "IF (Categorical_similarity IS average) OR (Numerical_difference IS average) THEN (Similarity IS average)"
-    R2 = "IF (Categorical_similarity IS small) OR (Numerical_difference IS big) THEN (Similarity IS small)"
-    R3 = "IF (Categorical_similarity IS big) OR (Numerical_difference IS small) THEN (Similarity IS big)"
+    R1 = "IF (Categorical_similarity IS average) AND (Numerical_difference IS average) THEN (Similarity IS average)"
+    R2 = "IF (Categorical_similarity IS small) AND (Numerical_difference IS big) THEN (Similarity IS small)"
+    R3 = "IF (Categorical_similarity IS big) AND (Numerical_difference IS small) THEN (Similarity IS big)"

    FSS.add_rules([R1, R2, R3])

--- a/main.py
+++ b/main.py
@ -4,8 +4,11 @@ from numpy import dot
 from numpy.linalg import norm
 import json
 import multiprocessing
-import tqdm
-
+from tqdm.auto import tqdm
+from sys import argv
+import sys, getopt
+import argparse
+import random

 def find_games_categorical_similarity(game_1: pd.DataFrame, game_2: pd.DataFrame) -> float:
    game_1_categorical = set(game_1['all_categorical'].tolist()[0])
@ -25,7 +28,7 @@ def find_games_word_vector_distance(game_1: pd.DataFrame, game_2: pd.DataFrame)
    return round(dot(game_1_vector, game_2_vector) / (norm(game_1_vector) * norm(game_2_vector)), 2)


-def calculate_similarities(game_title, title_list, df):
+def calculate_similarities(game_title, title_list, df, test=False):
    if game_title in title_list:
        title_list.remove(game_title)

@ -36,7 +39,7 @@ def calculate_similarities(game_title, title_list, df):
    similarities = []
    # call the function for each item in parallel with multiprocessing
    with multiprocessing.Pool() as pool:
-        for result in pool.starmap(compare_games, tqdm.tqdm(args_list, total=len(args_list), desc='Searching')):
+        for result in pool.starmap(compare_games, tqdm(args_list, total=len(args_list), desc='Searching')):
            similarities.append(result)

    all_games = []
@ -47,6 +50,7 @@ def calculate_similarities(game_title, title_list, df):
                    })

    sorted_games = sorted(all_games, key=lambda k: k['similarity'], reverse=True)
+    if (test): return sorted_games[:20]
    print("\n ==== Top 20 most similar games: ====")
    for game in sorted_games[:20]:
        print(f"- {game['title']}")
@ -69,15 +73,73 @@ def compare_games(title_1, title_2, df, show_graph=False):
                                                   vector_distance=word_vector_distance, show_graph=show_graph)
    return similarity_score

+def get_game_info_from_df(data_games, game_title):
+  finded_game = data_games.loc[data_games["name"] == game_title]
+  # print(finded_game)
+  result_dict = {
+    "title" : finded_game["name"].values[0],
+    "price" : finded_game["price"].values[0],
+    "all_categorical" : finded_game["all_categorical"].values[0],
+  }
+  return result_dict

-if __name__ == '__main__':

+def get_game_info(data_game):
+  # finded_game = data_games.loc[data_games["name"] == game_title]
+  # print(finded_game)
+  result_dict = {
+    "title" : data_game["name"],
+    "price" : data_game["price"],
+    "all_categorical" : data_game["all_categorical"],
+  }
+  return result_dict
+
+def main(argv):
    df = pd.read_pickle('data/games_processed_vectorized.csv')
    title_list = df["name"].values.tolist()
-    while True:
-        print("Welcome to Fuzzy Game Reccomender!\nType in a game title and we will find the most similar games from our database")
-        title = input("Enter the title or type 'exit' to leave: ")
-        if title == "exit":
-            break
-        else:
-            calculate_similarities(game_title=title, title_list=title_list, df=df)
+
+    test_mode = False
+    random_mode = False
+    eval_mode = False
+    eval_random_mode = False
+
+    opts, args = getopt.getopt(argv, "r:", ["pres", "eval", "evalrandom"])
+    for opt, arg in opts:
+        if "--pres" == opt: 
+            test_mode = True
+        if "--eval" == opt: 
+            eval_mode = True
+        if "--evalrandom" == opt: 
+            eval_random_mode = True
+        if "-r" == opt: 
+            random_mode = arg
+    if (True == test_mode):
+        game_list = ["Call of Duty®: Modern Warfare® 2", "Project CARS", "DayZ", "STAR WARS™ Jedi Knight - Mysteries of the Sith™", "Overcooked"]
+        if (random_mode): game_list = [random.choice(title_list)]
+        if (eval_mode or eval_random_mode): game_list = [random.choice(title_list) for i in range(10)]
+        result_dict = {"results": []}
+        for item in game_list:
+            if not eval_random_mode:
+                titles_results = calculate_similarities(game_title=item, title_list=title_list, df=df, test=test_mode)
+            if eval_random_mode:
+                titles_results = [{"title": random.choice(title_list)} for i in range(10)]
+            game_result = get_game_info_from_df(df, item)
+            game_result["fuzzy_similiar"] = [get_game_info_from_df(df, title_item["title"]) for title_item in titles_results[:10]]
+            result_dict["results"].append(game_result)
+        with open("results/result.json", "w", encoding="UTF-8") as outfile:
+          json.dump(result_dict, outfile, ensure_ascii=False)
+
+    if (False == test_mode):
+        while True:
+            print("Welcome to Fuzzy Game Reccomender!\nType in a game title and we will find the most similar games from our database")
+            title = input("Enter the title or type 'exit' to leave: ")
+            if title == "exit":
+                break
+            else:
+                calculate_similarities(game_title=title, title_list=title_list, df=df)
+
+
+if __name__ == '__main__':
+  main(sys.argv[1:])
+
+    
--- a/results/result.json
+++ b/results/result.json
Author	SHA1	Message	Date
Mikołaj Krzymiński	8c80734dda	Merge pull request 'compare_to_all_games' (#1 ) from compare_to_all_games into master Reviewed-on: s449288/fuzzy-game-recommender#1	2023-02-03 00:42:21 +01:00
s444417	23fb8b8c46	add evaluation and baseline	2023-02-03 00:41:05 +01:00
s444417	5d1d385ea4	add evaluation and baseline	2023-02-03 00:40:25 +01:00
s444417	868bc82569	add doc	2023-02-02 17:48:38 +01:00
s444417	8f3a3f7bb1	add random	2023-02-02 01:12:00 +01:00
s444417	9ce43cafad	presentation of results	2023-02-01 23:57:24 +01:00
s444417	d4701cd745	change OR to AND	2023-02-01 16:15:56 +01:00