Merge pull request 'compare_to_all_games' (#1 ) from compare_to_all_games into master

Reviewed-on: s449288/fuzzy-game-recommender#1
add evaluation and baseline
2023-02-03 00:42:21 +01:00 · 2023-02-03 00:41:05 +01:00 · 2023-02-03 00:40:25 +01:00 · 2023-02-02 17:48:38 +01:00 · 2023-02-02 01:12:00 +01:00 · 2023-02-01 23:57:24 +01:00
9 changed files with 83634 additions and 229 deletions
--- a/Fuzzy_presentation.ipynb
+++ b/Fuzzy_presentation.ipynb
--- a/README.md
+++ b/README.md
@ -5,8 +5,23 @@
    pip install -r requirements.txt
    python main.py
 #### To run the project in presentation mode:
    python main.py --pres
 it will generate .json file which can be presented by running all cells of `Fuzzy_presentation.ipynb`
 #### Random mode 
    python main.py --pres -r True
 #### Evaluation mode
    python main.py --pres --eval 
 generates result.json file with 10 random games and 10 recomendations for each game, results can be evaluated in `Fuzzy_presentation.ipynb` file, with Jaccard Similiarity
 Processed dataset files are already provided, but can be created from the base ``games.csv`` file by running:
    python process_dataset.py
 If no ``GoogleNews-vectors-negative300.bin`` file is present, only ``games_processed.csv`` will be created.
--- a/app.py
+++ b/app.py
@ -1,109 +0,0 @@
 from flask import Flask, render_template, request
 import pandas as pd
 from fuzzy_controllers import fuzzy_controler_similiarity
 from numpy import dot
 from numpy.linalg import norm
 import json
 import multiprocessing
 import tqdm
 app = Flask(__name__)
 def find_games_categorical_similarity(game_1: pd.DataFrame, game_2: pd.DataFrame) -> float:
    game_1_categorical = set(game_1['all_categorical'].tolist()[0])
    game_2_categorical = set(game_2['all_categorical'].tolist()[0])
    return round(len(game_1_categorical & game_2_categorical) / len(game_1_categorical | game_2_categorical), 2)
 def find_games_numerical_similarity(game_1: pd.DataFrame, game_2: pd.DataFrame) -> float:
    game_1_popularity = float(game_1["fuzzy_popularity"].to_string(index=False))
    game_2_popularity = float(game_2["fuzzy_popularity"].to_string(index=False))
    return round(abs(game_1_popularity - game_2_popularity), 2)
 def find_games_word_vector_distance(game_1: pd.DataFrame, game_2: pd.DataFrame) -> float:
    game_1_vector = game_1['all_categorical_vector'].tolist()[0]
    game_2_vector = game_2['all_categorical_vector'].tolist()[0]
    return round(dot(game_1_vector, game_2_vector) / (norm(game_1_vector) * norm(game_2_vector)), 2)
 def calculate_similarities(game_title, title_list, df):
    if game_title in title_list:
        title_list.remove(game_title)
    args_list = []
    for compared_title in title_list:
        args_list.append((game_title, compared_title, df))
    similarities = []
    # call the function for each item in parallel with multiprocessing
    with multiprocessing.Pool() as pool:
        for result in pool.starmap(compare_games, tqdm.tqdm(args_list, total=len(args_list), desc='Searching')):
            similarities.append(result)
    all_games = []
    for title, similarity in zip(title_list, similarities):
        all_games.append({
                    "title": title,
                    "similarity": similarity
                    })
    sorted_games = sorted(all_games, key=lambda k: k['similarity'], reverse=True)
    print("\n ==== Top 20 most similar games: ====")
    for game in sorted_games[:20]:
        print(f"- {game['title']}")
    return sorted_games[:20]
    # save_results(game_title=game_title, game_list=sorted_games)
 def save_results(game_title, game_list):
    print("The full list of similar games available in the /results directory\n")
    with open(f"results/similarity_list_{game_title.lower().replace(' ', '_')}.txt", 'w+') as fp:
        json.dump(game_list, fp)
 def compare_games(title_1, title_2, df, show_graph=False):
    game_1 = df.loc[df['name'] == title_1]
    game_2 = df.loc[df['name'] == title_2]
    categorical_similarity = find_games_categorical_similarity(game_1=game_1, game_2=game_2)
    numerical_difference = find_games_numerical_similarity(game_1=game_1, game_2=game_2)
    word_vector_distance = find_games_word_vector_distance(game_1=game_1, game_2=game_2)
    similarity_score = fuzzy_controler_similiarity(categorical_data=categorical_similarity,
                                                   numerical_data=numerical_difference,
                                                   vector_distance=word_vector_distance, show_graph=show_graph)
    return similarity_score
 if __name__ == '__main__':
    df = pd.read_pickle('data/games_processed_vectorized.csv')
    title_list = df["name"].values.tolist()
    while True:
        print("Welcome to Fuzzy Game Reccomender!\nType in a game title and we will find the most similar games from our database")
        title = input("Enter the title or type 'exit' to leave: ")
        if title == "exit":
            break
        else:
            calculate_similarities(game_title=title, title_list=title_list, df=df)
@app.route('/')
 def index():
    return render_template('index.html')
@app.route('/', methods=['POST'])
 def form_post():
    df = pd.read_pickle('data/games_processed_vectorized.csv')
    first_game = request.form['first_game']
    second_game = request.form['second_game']
    third_game = request.form['third_game']
    processed_text1 = first_game
    processed_text2 = second_game
    processed_text3 = third_game
    title_list = df["name"].values.tolist()
    similarities = calculate_similarities(game_title=processed_text1, title_list=title_list, df=df)
    return similarities
--- a/data/steam_data.csv
+++ b/data/steam_data.csv
--- a/doc/project_doc.pdf
+++ b/doc/project_doc.pdf
--- a/fuzzy_controllers.py
+++ b/fuzzy_controllers.py
@ -91,9 +91,9 @@ def fuzzy_controler_similiarity(categorical_data: str, numerical_data: str, vect
    FSS.set_crisp_output_value("big", 1)
    # TODO: add Word_vector_distance to rules
-    R1 = "IF (Categorical_similarity IS average) OR (Numerical_difference IS average) THEN (Similarity IS average)"
+    R1 = "IF (Categorical_similarity IS average) AND (Numerical_difference IS average) THEN (Similarity IS average)"
-    R2 = "IF (Categorical_similarity IS small) OR (Numerical_difference IS big) THEN (Similarity IS small)"
+    R2 = "IF (Categorical_similarity IS small) AND (Numerical_difference IS big) THEN (Similarity IS small)"
-    R3 = "IF (Categorical_similarity IS big) OR (Numerical_difference IS small) THEN (Similarity IS big)"
+    R3 = "IF (Categorical_similarity IS big) AND (Numerical_difference IS small) THEN (Similarity IS big)"
    FSS.add_rules([R1, R2, R3])
--- a/main.py
+++ b/main.py
@ -4,8 +4,11 @@ from numpy import dot
 from numpy.linalg import norm
 import json
 import multiprocessing
-import tqdm
+from tqdm.auto import tqdm
-
+from sys import argv
 import sys, getopt
 import argparse
 import random
 def find_games_categorical_similarity(game_1: pd.DataFrame, game_2: pd.DataFrame) -> float:
    game_1_categorical = set(game_1['all_categorical'].tolist()[0])
@ -25,7 +28,7 @@ def find_games_word_vector_distance(game_1: pd.DataFrame, game_2: pd.DataFrame)
    return round(dot(game_1_vector, game_2_vector) / (norm(game_1_vector) * norm(game_2_vector)), 2)
-def calculate_similarities(game_title, title_list, df):
+def calculate_similarities(game_title, title_list, df, test=False):
    if game_title in title_list:
        title_list.remove(game_title)
@ -36,7 +39,7 @@ def calculate_similarities(game_title, title_list, df):
    similarities = []
    # call the function for each item in parallel with multiprocessing
    with multiprocessing.Pool() as pool:
-        for result in pool.starmap(compare_games, tqdm.tqdm(args_list, total=len(args_list), desc='Searching')):
+        for result in pool.starmap(compare_games, tqdm(args_list, total=len(args_list), desc='Searching')):
            similarities.append(result)
    all_games = []
@ -47,6 +50,7 @@ def calculate_similarities(game_title, title_list, df):
                    })
    sorted_games = sorted(all_games, key=lambda k: k['similarity'], reverse=True)
    if (test): return sorted_games[:20]
    print("\n ==== Top 20 most similar games: ====")
    for game in sorted_games[:20]:
        print(f"- {game['title']}")
@ -69,15 +73,73 @@ def compare_games(title_1, title_2, df, show_graph=False):
                                                   vector_distance=word_vector_distance, show_graph=show_graph)
    return similarity_score
 def get_game_info_from_df(data_games, game_title):
  finded_game = data_games.loc[data_games["name"] == game_title]
  # print(finded_game)
  result_dict = {
    "title" : finded_game["name"].values[0],
    "price" : finded_game["price"].values[0],
    "all_categorical" : finded_game["all_categorical"].values[0],
  }
  return result_dict
 if __name__ == '__main__':
 def get_game_info(data_game):
  # finded_game = data_games.loc[data_games["name"] == game_title]
  # print(finded_game)
  result_dict = {
    "title" : data_game["name"],
    "price" : data_game["price"],
    "all_categorical" : data_game["all_categorical"],
  }
  return result_dict
 def main(argv):
    df = pd.read_pickle('data/games_processed_vectorized.csv')
    title_list = df["name"].values.tolist()
-    while True:
+
-        print("Welcome to Fuzzy Game Reccomender!\nType in a game title and we will find the most similar games from our database")
+    test_mode = False
-        title = input("Enter the title or type 'exit' to leave: ")
+    random_mode = False
-        if title == "exit":
+    eval_mode = False
-            break
+    eval_random_mode = False
-        else:
+
-            calculate_similarities(game_title=title, title_list=title_list, df=df)
+    opts, args = getopt.getopt(argv, "r:", ["pres", "eval", "evalrandom"])
    for opt, arg in opts:
        if "--pres" == opt: 
            test_mode = True
        if "--eval" == opt: 
            eval_mode = True
        if "--evalrandom" == opt: 
            eval_random_mode = True
        if "-r" == opt: 
            random_mode = arg
    if (True == test_mode):
        game_list = ["Call of Duty®: Modern Warfare® 2", "Project CARS", "DayZ", "STAR WARS™ Jedi Knight - Mysteries of the Sith™", "Overcooked"]
        if (random_mode): game_list = [random.choice(title_list)]
        if (eval_mode or eval_random_mode): game_list = [random.choice(title_list) for i in range(10)]
        result_dict = {"results": []}
        for item in game_list:
            if not eval_random_mode:
                titles_results = calculate_similarities(game_title=item, title_list=title_list, df=df, test=test_mode)
            if eval_random_mode:
                titles_results = [{"title": random.choice(title_list)} for i in range(10)]
            game_result = get_game_info_from_df(df, item)
            game_result["fuzzy_similiar"] = [get_game_info_from_df(df, title_item["title"]) for title_item in titles_results[:10]]
            result_dict["results"].append(game_result)
        with open("results/result.json", "w", encoding="UTF-8") as outfile:
          json.dump(result_dict, outfile, ensure_ascii=False)
    if (False == test_mode):
        while True:
            print("Welcome to Fuzzy Game Reccomender!\nType in a game title and we will find the most similar games from our database")
            title = input("Enter the title or type 'exit' to leave: ")
            if title == "exit":
                break
            else:
                calculate_similarities(game_title=title, title_list=title_list, df=df)
 if __name__ == '__main__':
  main(sys.argv[1:])
--- a/results/result.json
+++ b/results/result.json
--- a/templates/index.html
+++ b/templates/index.html
@ -1,105 +0,0 @@
 <head>
  <script>
 const toggleCheckboxes = document.querySelectorAll('input[type="checkbox"]');
 toggleCheckboxes.forEach(checkbox => {
  checkbox.addEventListener('change', function() {
    const target = document.getElementById(this.id.replace('toggle', ''));
    const label = target.previousElementSibling;
    label.style.visibility = this.checked ? 'visible' : 'hidden';
  });
 });
  </script>
    <style>
        h1 {
    border: 2px #eee solid;
    color: brown;
    text-align: center;
    padding: 10px;
 }
 html, body {
  height: 100%;
  margin: 0;
  padding: 0;
  background-color: #ADD8E6; /* Light blue color */
 }
 form {
  display: flex;
  flex-direction: column;
  align-items: center;
  justify-content: center;
  height: 100%;
  background-color: white;
  padding: 20px;
  border-radius: 10px;
  box-shadow: 0 0 10px gray;
 }
 input[type="text"] {
  width: 50%;
  padding: 10px;
  margin: 10px 0;
  font-size: 16px;
  background-color: lightgray;
  border: none;
  border-radius: 5px;
 }
 input[type="submit"] {
  padding: 10px 20px;
  font-size: 16px;
  background-color: lightblue;
  color: white;
  border: none;
  border-radius: 5px;
  cursor: pointer;
 }
 .red-border {
  border: 2px solid red;
  display: inline-block;
  padding: 5px;
  border-radius: 5px;
  visibility: hidden;
 }
 button.toggle-border {
  padding: 5px 10px;
  font-size: 14px;
  background-color: lightgray;
  border: none;
  border-radius: 5px;
  cursor: pointer;
  margin-left: 10px;
 }
    </style>
 </head>
 <form action="" method="post">
  <div>
    <input type="checkbox" id="first_game_toggle">
    <label for="first_game_toggle">I don't like this game</label>
    <label for="first_game" class="red-border">First game:</label>
    <input type="text" id="first_game" name="first_game">
  </div>
  <div>
    <input type="checkbox" id="second_game_toggle">
    <label for="second_game_toggle">I don't like this game</label>
    <label for="second_game" class="red-border">Second game:</label>
    <input type="text" id="second_game" name="second_game">
  </div>
    <div>
    <input type="checkbox" id="third_game_toggle">
    <label for="third_game_toggle">I don't like this game</label>
    <label for="third_game" class="red-border">Second game:</label>
    <input type="text" id="third_game" name="third_game">
  </div>
  <input type="submit" value="Submit">
 </form>
Author	SHA1	Message	Date
Mikołaj Krzymiński	8c80734dda	Merge pull request 'compare_to_all_games' (#1 ) from compare_to_all_games into master Reviewed-on: s449288/fuzzy-game-recommender#1	2023-02-03 00:42:21 +01:00
s444417	23fb8b8c46	add evaluation and baseline	2023-02-03 00:41:05 +01:00
s444417	5d1d385ea4	add evaluation and baseline	2023-02-03 00:40:25 +01:00
s444417	868bc82569	add doc	2023-02-02 17:48:38 +01:00
s444417	8f3a3f7bb1	add random	2023-02-02 01:12:00 +01:00
s444417	9ce43cafad	presentation of results	2023-02-01 23:57:24 +01:00
s444417	d4701cd745	change OR to AND	2023-02-01 16:15:56 +01:00