Compare commits

..

7 Commits

Author SHA1 Message Date
8c80734dda Merge pull request 'compare_to_all_games' (#1) from compare_to_all_games into master
Reviewed-on: s449288/fuzzy-game-recommender#1
2023-02-03 00:42:21 +01:00
s444417
23fb8b8c46 add evaluation and baseline 2023-02-03 00:41:05 +01:00
s444417
5d1d385ea4 add evaluation and baseline 2023-02-03 00:40:25 +01:00
s444417
868bc82569 add doc 2023-02-02 17:48:38 +01:00
s444417
8f3a3f7bb1 add random 2023-02-02 01:12:00 +01:00
s444417
9ce43cafad presentation of results 2023-02-01 23:57:24 +01:00
s444417
d4701cd745 change OR to AND 2023-02-01 16:15:56 +01:00
9 changed files with 83634 additions and 229 deletions

2492
Fuzzy_presentation.ipynb Normal file

File diff suppressed because one or more lines are too long

View File

@ -5,8 +5,23 @@
pip install -r requirements.txt pip install -r requirements.txt
python main.py python main.py
#### To run the project in presentation mode:
python main.py --pres
it will generate .json file which can be presented by running all cells of `Fuzzy_presentation.ipynb`
#### Random mode
python main.py --pres -r True
#### Evaluation mode
python main.py --pres --eval
generates result.json file with 10 random games and 10 recomendations for each game, results can be evaluated in `Fuzzy_presentation.ipynb` file, with Jaccard Similiarity
Processed dataset files are already provided, but can be created from the base ``games.csv`` file by running: Processed dataset files are already provided, but can be created from the base ``games.csv`` file by running:
python process_dataset.py python process_dataset.py
If no ``GoogleNews-vectors-negative300.bin`` file is present, only ``games_processed.csv`` will be created. If no ``GoogleNews-vectors-negative300.bin`` file is present, only ``games_processed.csv`` will be created.

109
app.py
View File

@ -1,109 +0,0 @@
from flask import Flask, render_template, request
import pandas as pd
from fuzzy_controllers import fuzzy_controler_similiarity
from numpy import dot
from numpy.linalg import norm
import json
import multiprocessing
import tqdm
app = Flask(__name__)
def find_games_categorical_similarity(game_1: pd.DataFrame, game_2: pd.DataFrame) -> float:
game_1_categorical = set(game_1['all_categorical'].tolist()[0])
game_2_categorical = set(game_2['all_categorical'].tolist()[0])
return round(len(game_1_categorical & game_2_categorical) / len(game_1_categorical | game_2_categorical), 2)
def find_games_numerical_similarity(game_1: pd.DataFrame, game_2: pd.DataFrame) -> float:
game_1_popularity = float(game_1["fuzzy_popularity"].to_string(index=False))
game_2_popularity = float(game_2["fuzzy_popularity"].to_string(index=False))
return round(abs(game_1_popularity - game_2_popularity), 2)
def find_games_word_vector_distance(game_1: pd.DataFrame, game_2: pd.DataFrame) -> float:
game_1_vector = game_1['all_categorical_vector'].tolist()[0]
game_2_vector = game_2['all_categorical_vector'].tolist()[0]
return round(dot(game_1_vector, game_2_vector) / (norm(game_1_vector) * norm(game_2_vector)), 2)
def calculate_similarities(game_title, title_list, df):
if game_title in title_list:
title_list.remove(game_title)
args_list = []
for compared_title in title_list:
args_list.append((game_title, compared_title, df))
similarities = []
# call the function for each item in parallel with multiprocessing
with multiprocessing.Pool() as pool:
for result in pool.starmap(compare_games, tqdm.tqdm(args_list, total=len(args_list), desc='Searching')):
similarities.append(result)
all_games = []
for title, similarity in zip(title_list, similarities):
all_games.append({
"title": title,
"similarity": similarity
})
sorted_games = sorted(all_games, key=lambda k: k['similarity'], reverse=True)
print("\n ==== Top 20 most similar games: ====")
for game in sorted_games[:20]:
print(f"- {game['title']}")
return sorted_games[:20]
# save_results(game_title=game_title, game_list=sorted_games)
def save_results(game_title, game_list):
print("The full list of similar games available in the /results directory\n")
with open(f"results/similarity_list_{game_title.lower().replace(' ', '_')}.txt", 'w+') as fp:
json.dump(game_list, fp)
def compare_games(title_1, title_2, df, show_graph=False):
game_1 = df.loc[df['name'] == title_1]
game_2 = df.loc[df['name'] == title_2]
categorical_similarity = find_games_categorical_similarity(game_1=game_1, game_2=game_2)
numerical_difference = find_games_numerical_similarity(game_1=game_1, game_2=game_2)
word_vector_distance = find_games_word_vector_distance(game_1=game_1, game_2=game_2)
similarity_score = fuzzy_controler_similiarity(categorical_data=categorical_similarity,
numerical_data=numerical_difference,
vector_distance=word_vector_distance, show_graph=show_graph)
return similarity_score
if __name__ == '__main__':
df = pd.read_pickle('data/games_processed_vectorized.csv')
title_list = df["name"].values.tolist()
while True:
print("Welcome to Fuzzy Game Reccomender!\nType in a game title and we will find the most similar games from our database")
title = input("Enter the title or type 'exit' to leave: ")
if title == "exit":
break
else:
calculate_similarities(game_title=title, title_list=title_list, df=df)
@app.route('/')
def index():
return render_template('index.html')
@app.route('/', methods=['POST'])
def form_post():
df = pd.read_pickle('data/games_processed_vectorized.csv')
first_game = request.form['first_game']
second_game = request.form['second_game']
third_game = request.form['third_game']
processed_text1 = first_game
processed_text2 = second_game
processed_text3 = third_game
title_list = df["name"].values.tolist()
similarities = calculate_similarities(game_title=processed_text1, title_list=title_list, df=df)
return similarities

81049
data/steam_data.csv Normal file

File diff suppressed because it is too large Load Diff

BIN
doc/project_doc.pdf Normal file

Binary file not shown.

View File

@ -91,9 +91,9 @@ def fuzzy_controler_similiarity(categorical_data: str, numerical_data: str, vect
FSS.set_crisp_output_value("big", 1) FSS.set_crisp_output_value("big", 1)
# TODO: add Word_vector_distance to rules # TODO: add Word_vector_distance to rules
R1 = "IF (Categorical_similarity IS average) OR (Numerical_difference IS average) THEN (Similarity IS average)" R1 = "IF (Categorical_similarity IS average) AND (Numerical_difference IS average) THEN (Similarity IS average)"
R2 = "IF (Categorical_similarity IS small) OR (Numerical_difference IS big) THEN (Similarity IS small)" R2 = "IF (Categorical_similarity IS small) AND (Numerical_difference IS big) THEN (Similarity IS small)"
R3 = "IF (Categorical_similarity IS big) OR (Numerical_difference IS small) THEN (Similarity IS big)" R3 = "IF (Categorical_similarity IS big) AND (Numerical_difference IS small) THEN (Similarity IS big)"
FSS.add_rules([R1, R2, R3]) FSS.add_rules([R1, R2, R3])

72
main.py
View File

@ -4,8 +4,11 @@ from numpy import dot
from numpy.linalg import norm from numpy.linalg import norm
import json import json
import multiprocessing import multiprocessing
import tqdm from tqdm.auto import tqdm
from sys import argv
import sys, getopt
import argparse
import random
def find_games_categorical_similarity(game_1: pd.DataFrame, game_2: pd.DataFrame) -> float: def find_games_categorical_similarity(game_1: pd.DataFrame, game_2: pd.DataFrame) -> float:
game_1_categorical = set(game_1['all_categorical'].tolist()[0]) game_1_categorical = set(game_1['all_categorical'].tolist()[0])
@ -25,7 +28,7 @@ def find_games_word_vector_distance(game_1: pd.DataFrame, game_2: pd.DataFrame)
return round(dot(game_1_vector, game_2_vector) / (norm(game_1_vector) * norm(game_2_vector)), 2) return round(dot(game_1_vector, game_2_vector) / (norm(game_1_vector) * norm(game_2_vector)), 2)
def calculate_similarities(game_title, title_list, df): def calculate_similarities(game_title, title_list, df, test=False):
if game_title in title_list: if game_title in title_list:
title_list.remove(game_title) title_list.remove(game_title)
@ -36,7 +39,7 @@ def calculate_similarities(game_title, title_list, df):
similarities = [] similarities = []
# call the function for each item in parallel with multiprocessing # call the function for each item in parallel with multiprocessing
with multiprocessing.Pool() as pool: with multiprocessing.Pool() as pool:
for result in pool.starmap(compare_games, tqdm.tqdm(args_list, total=len(args_list), desc='Searching')): for result in pool.starmap(compare_games, tqdm(args_list, total=len(args_list), desc='Searching')):
similarities.append(result) similarities.append(result)
all_games = [] all_games = []
@ -47,6 +50,7 @@ def calculate_similarities(game_title, title_list, df):
}) })
sorted_games = sorted(all_games, key=lambda k: k['similarity'], reverse=True) sorted_games = sorted(all_games, key=lambda k: k['similarity'], reverse=True)
if (test): return sorted_games[:20]
print("\n ==== Top 20 most similar games: ====") print("\n ==== Top 20 most similar games: ====")
for game in sorted_games[:20]: for game in sorted_games[:20]:
print(f"- {game['title']}") print(f"- {game['title']}")
@ -69,11 +73,63 @@ def compare_games(title_1, title_2, df, show_graph=False):
vector_distance=word_vector_distance, show_graph=show_graph) vector_distance=word_vector_distance, show_graph=show_graph)
return similarity_score return similarity_score
def get_game_info_from_df(data_games, game_title):
finded_game = data_games.loc[data_games["name"] == game_title]
# print(finded_game)
result_dict = {
"title" : finded_game["name"].values[0],
"price" : finded_game["price"].values[0],
"all_categorical" : finded_game["all_categorical"].values[0],
}
return result_dict
if __name__ == '__main__':
def get_game_info(data_game):
# finded_game = data_games.loc[data_games["name"] == game_title]
# print(finded_game)
result_dict = {
"title" : data_game["name"],
"price" : data_game["price"],
"all_categorical" : data_game["all_categorical"],
}
return result_dict
def main(argv):
df = pd.read_pickle('data/games_processed_vectorized.csv') df = pd.read_pickle('data/games_processed_vectorized.csv')
title_list = df["name"].values.tolist() title_list = df["name"].values.tolist()
test_mode = False
random_mode = False
eval_mode = False
eval_random_mode = False
opts, args = getopt.getopt(argv, "r:", ["pres", "eval", "evalrandom"])
for opt, arg in opts:
if "--pres" == opt:
test_mode = True
if "--eval" == opt:
eval_mode = True
if "--evalrandom" == opt:
eval_random_mode = True
if "-r" == opt:
random_mode = arg
if (True == test_mode):
game_list = ["Call of Duty®: Modern Warfare® 2", "Project CARS", "DayZ", "STAR WARS™ Jedi Knight - Mysteries of the Sith™", "Overcooked"]
if (random_mode): game_list = [random.choice(title_list)]
if (eval_mode or eval_random_mode): game_list = [random.choice(title_list) for i in range(10)]
result_dict = {"results": []}
for item in game_list:
if not eval_random_mode:
titles_results = calculate_similarities(game_title=item, title_list=title_list, df=df, test=test_mode)
if eval_random_mode:
titles_results = [{"title": random.choice(title_list)} for i in range(10)]
game_result = get_game_info_from_df(df, item)
game_result["fuzzy_similiar"] = [get_game_info_from_df(df, title_item["title"]) for title_item in titles_results[:10]]
result_dict["results"].append(game_result)
with open("results/result.json", "w", encoding="UTF-8") as outfile:
json.dump(result_dict, outfile, ensure_ascii=False)
if (False == test_mode):
while True: while True:
print("Welcome to Fuzzy Game Reccomender!\nType in a game title and we will find the most similar games from our database") print("Welcome to Fuzzy Game Reccomender!\nType in a game title and we will find the most similar games from our database")
title = input("Enter the title or type 'exit' to leave: ") title = input("Enter the title or type 'exit' to leave: ")
@ -81,3 +137,9 @@ if __name__ == '__main__':
break break
else: else:
calculate_similarities(game_title=title, title_list=title_list, df=df) calculate_similarities(game_title=title, title_list=title_list, df=df)
if __name__ == '__main__':
main(sys.argv[1:])

1
results/result.json Normal file

File diff suppressed because one or more lines are too long

View File

@ -1,105 +0,0 @@
<head>
<script>
const toggleCheckboxes = document.querySelectorAll('input[type="checkbox"]');
toggleCheckboxes.forEach(checkbox => {
checkbox.addEventListener('change', function() {
const target = document.getElementById(this.id.replace('toggle', ''));
const label = target.previousElementSibling;
label.style.visibility = this.checked ? 'visible' : 'hidden';
});
});
</script>
<style>
h1 {
border: 2px #eee solid;
color: brown;
text-align: center;
padding: 10px;
}
html, body {
height: 100%;
margin: 0;
padding: 0;
background-color: #ADD8E6; /* Light blue color */
}
form {
display: flex;
flex-direction: column;
align-items: center;
justify-content: center;
height: 100%;
background-color: white;
padding: 20px;
border-radius: 10px;
box-shadow: 0 0 10px gray;
}
input[type="text"] {
width: 50%;
padding: 10px;
margin: 10px 0;
font-size: 16px;
background-color: lightgray;
border: none;
border-radius: 5px;
}
input[type="submit"] {
padding: 10px 20px;
font-size: 16px;
background-color: lightblue;
color: white;
border: none;
border-radius: 5px;
cursor: pointer;
}
.red-border {
border: 2px solid red;
display: inline-block;
padding: 5px;
border-radius: 5px;
visibility: hidden;
}
button.toggle-border {
padding: 5px 10px;
font-size: 14px;
background-color: lightgray;
border: none;
border-radius: 5px;
cursor: pointer;
margin-left: 10px;
}
</style>
</head>
<form action="" method="post">
<div>
<input type="checkbox" id="first_game_toggle">
<label for="first_game_toggle">I don't like this game</label>
<label for="first_game" class="red-border">First game:</label>
<input type="text" id="first_game" name="first_game">
</div>
<div>
<input type="checkbox" id="second_game_toggle">
<label for="second_game_toggle">I don't like this game</label>
<label for="second_game" class="red-border">Second game:</label>
<input type="text" id="second_game" name="second_game">
</div>
<div>
<input type="checkbox" id="third_game_toggle">
<label for="third_game_toggle">I don't like this game</label>
<label for="third_game" class="red-border">Second game:</label>
<input type="text" id="third_game" name="third_game">
</div>
<input type="submit" value="Submit">
</form>