Compare commits

...

7 Commits

Author SHA1 Message Date
8c80734dda Merge pull request 'compare_to_all_games' (#1) from compare_to_all_games into master
Reviewed-on: s449288/fuzzy-game-recommender#1
2023-02-03 00:42:21 +01:00
s444417
23fb8b8c46 add evaluation and baseline 2023-02-03 00:41:05 +01:00
s444417
5d1d385ea4 add evaluation and baseline 2023-02-03 00:40:25 +01:00
s444417
868bc82569 add doc 2023-02-02 17:48:38 +01:00
s444417
8f3a3f7bb1 add random 2023-02-02 01:12:00 +01:00
s444417
9ce43cafad presentation of results 2023-02-01 23:57:24 +01:00
s444417
d4701cd745 change OR to AND 2023-02-01 16:15:56 +01:00
7 changed files with 83634 additions and 15 deletions

2492
Fuzzy_presentation.ipynb Normal file

File diff suppressed because one or more lines are too long

View File

@ -5,8 +5,23 @@
pip install -r requirements.txt
python main.py
#### To run the project in presentation mode:
python main.py --pres
it will generate .json file which can be presented by running all cells of `Fuzzy_presentation.ipynb`
#### Random mode
python main.py --pres -r True
#### Evaluation mode
python main.py --pres --eval
generates result.json file with 10 random games and 10 recomendations for each game, results can be evaluated in `Fuzzy_presentation.ipynb` file, with Jaccard Similiarity
Processed dataset files are already provided, but can be created from the base ``games.csv`` file by running:
python process_dataset.py
If no ``GoogleNews-vectors-negative300.bin`` file is present, only ``games_processed.csv`` will be created.

81049
data/steam_data.csv Normal file

File diff suppressed because it is too large Load Diff

BIN
doc/project_doc.pdf Normal file

Binary file not shown.

View File

@ -91,9 +91,9 @@ def fuzzy_controler_similiarity(categorical_data: str, numerical_data: str, vect
FSS.set_crisp_output_value("big", 1)
# TODO: add Word_vector_distance to rules
R1 = "IF (Categorical_similarity IS average) OR (Numerical_difference IS average) THEN (Similarity IS average)"
R2 = "IF (Categorical_similarity IS small) OR (Numerical_difference IS big) THEN (Similarity IS small)"
R3 = "IF (Categorical_similarity IS big) OR (Numerical_difference IS small) THEN (Similarity IS big)"
R1 = "IF (Categorical_similarity IS average) AND (Numerical_difference IS average) THEN (Similarity IS average)"
R2 = "IF (Categorical_similarity IS small) AND (Numerical_difference IS big) THEN (Similarity IS small)"
R3 = "IF (Categorical_similarity IS big) AND (Numerical_difference IS small) THEN (Similarity IS big)"
FSS.add_rules([R1, R2, R3])

86
main.py
View File

@ -4,8 +4,11 @@ from numpy import dot
from numpy.linalg import norm
import json
import multiprocessing
import tqdm
from tqdm.auto import tqdm
from sys import argv
import sys, getopt
import argparse
import random
def find_games_categorical_similarity(game_1: pd.DataFrame, game_2: pd.DataFrame) -> float:
game_1_categorical = set(game_1['all_categorical'].tolist()[0])
@ -25,7 +28,7 @@ def find_games_word_vector_distance(game_1: pd.DataFrame, game_2: pd.DataFrame)
return round(dot(game_1_vector, game_2_vector) / (norm(game_1_vector) * norm(game_2_vector)), 2)
def calculate_similarities(game_title, title_list, df):
def calculate_similarities(game_title, title_list, df, test=False):
if game_title in title_list:
title_list.remove(game_title)
@ -36,7 +39,7 @@ def calculate_similarities(game_title, title_list, df):
similarities = []
# call the function for each item in parallel with multiprocessing
with multiprocessing.Pool() as pool:
for result in pool.starmap(compare_games, tqdm.tqdm(args_list, total=len(args_list), desc='Searching')):
for result in pool.starmap(compare_games, tqdm(args_list, total=len(args_list), desc='Searching')):
similarities.append(result)
all_games = []
@ -47,6 +50,7 @@ def calculate_similarities(game_title, title_list, df):
})
sorted_games = sorted(all_games, key=lambda k: k['similarity'], reverse=True)
if (test): return sorted_games[:20]
print("\n ==== Top 20 most similar games: ====")
for game in sorted_games[:20]:
print(f"- {game['title']}")
@ -69,15 +73,73 @@ def compare_games(title_1, title_2, df, show_graph=False):
vector_distance=word_vector_distance, show_graph=show_graph)
return similarity_score
def get_game_info_from_df(data_games, game_title):
finded_game = data_games.loc[data_games["name"] == game_title]
# print(finded_game)
result_dict = {
"title" : finded_game["name"].values[0],
"price" : finded_game["price"].values[0],
"all_categorical" : finded_game["all_categorical"].values[0],
}
return result_dict
if __name__ == '__main__':
def get_game_info(data_game):
# finded_game = data_games.loc[data_games["name"] == game_title]
# print(finded_game)
result_dict = {
"title" : data_game["name"],
"price" : data_game["price"],
"all_categorical" : data_game["all_categorical"],
}
return result_dict
def main(argv):
df = pd.read_pickle('data/games_processed_vectorized.csv')
title_list = df["name"].values.tolist()
while True:
print("Welcome to Fuzzy Game Reccomender!\nType in a game title and we will find the most similar games from our database")
title = input("Enter the title or type 'exit' to leave: ")
if title == "exit":
break
else:
calculate_similarities(game_title=title, title_list=title_list, df=df)
test_mode = False
random_mode = False
eval_mode = False
eval_random_mode = False
opts, args = getopt.getopt(argv, "r:", ["pres", "eval", "evalrandom"])
for opt, arg in opts:
if "--pres" == opt:
test_mode = True
if "--eval" == opt:
eval_mode = True
if "--evalrandom" == opt:
eval_random_mode = True
if "-r" == opt:
random_mode = arg
if (True == test_mode):
game_list = ["Call of Duty®: Modern Warfare® 2", "Project CARS", "DayZ", "STAR WARS™ Jedi Knight - Mysteries of the Sith™", "Overcooked"]
if (random_mode): game_list = [random.choice(title_list)]
if (eval_mode or eval_random_mode): game_list = [random.choice(title_list) for i in range(10)]
result_dict = {"results": []}
for item in game_list:
if not eval_random_mode:
titles_results = calculate_similarities(game_title=item, title_list=title_list, df=df, test=test_mode)
if eval_random_mode:
titles_results = [{"title": random.choice(title_list)} for i in range(10)]
game_result = get_game_info_from_df(df, item)
game_result["fuzzy_similiar"] = [get_game_info_from_df(df, title_item["title"]) for title_item in titles_results[:10]]
result_dict["results"].append(game_result)
with open("results/result.json", "w", encoding="UTF-8") as outfile:
json.dump(result_dict, outfile, ensure_ascii=False)
if (False == test_mode):
while True:
print("Welcome to Fuzzy Game Reccomender!\nType in a game title and we will find the most similar games from our database")
title = input("Enter the title or type 'exit' to leave: ")
if title == "exit":
break
else:
calculate_similarities(game_title=title, title_list=title_list, df=df)
if __name__ == '__main__':
main(sys.argv[1:])

1
results/result.json Normal file

File diff suppressed because one or more lines are too long