Compare commits
7 Commits
Author | SHA1 | Date | |
---|---|---|---|
8c80734dda | |||
|
23fb8b8c46 | ||
|
5d1d385ea4 | ||
|
868bc82569 | ||
|
8f3a3f7bb1 | ||
|
9ce43cafad | ||
|
d4701cd745 |
2492
Fuzzy_presentation.ipynb
Normal file
2492
Fuzzy_presentation.ipynb
Normal file
File diff suppressed because one or more lines are too long
15
README.md
15
README.md
@ -5,8 +5,23 @@
|
|||||||
pip install -r requirements.txt
|
pip install -r requirements.txt
|
||||||
python main.py
|
python main.py
|
||||||
|
|
||||||
|
#### To run the project in presentation mode:
|
||||||
|
|
||||||
|
python main.py --pres
|
||||||
|
it will generate .json file which can be presented by running all cells of `Fuzzy_presentation.ipynb`
|
||||||
|
|
||||||
|
#### Random mode
|
||||||
|
|
||||||
|
python main.py --pres -r True
|
||||||
|
|
||||||
|
#### Evaluation mode
|
||||||
|
|
||||||
|
python main.py --pres --eval
|
||||||
|
generates result.json file with 10 random games and 10 recomendations for each game, results can be evaluated in `Fuzzy_presentation.ipynb` file, with Jaccard Similiarity
|
||||||
|
|
||||||
Processed dataset files are already provided, but can be created from the base ``games.csv`` file by running:
|
Processed dataset files are already provided, but can be created from the base ``games.csv`` file by running:
|
||||||
|
|
||||||
python process_dataset.py
|
python process_dataset.py
|
||||||
|
|
||||||
|
|
||||||
If no ``GoogleNews-vectors-negative300.bin`` file is present, only ``games_processed.csv`` will be created.
|
If no ``GoogleNews-vectors-negative300.bin`` file is present, only ``games_processed.csv`` will be created.
|
109
app.py
109
app.py
@ -1,109 +0,0 @@
|
|||||||
from flask import Flask, render_template, request
|
|
||||||
import pandas as pd
|
|
||||||
from fuzzy_controllers import fuzzy_controler_similiarity
|
|
||||||
from numpy import dot
|
|
||||||
from numpy.linalg import norm
|
|
||||||
import json
|
|
||||||
import multiprocessing
|
|
||||||
import tqdm
|
|
||||||
|
|
||||||
app = Flask(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def find_games_categorical_similarity(game_1: pd.DataFrame, game_2: pd.DataFrame) -> float:
|
|
||||||
game_1_categorical = set(game_1['all_categorical'].tolist()[0])
|
|
||||||
game_2_categorical = set(game_2['all_categorical'].tolist()[0])
|
|
||||||
return round(len(game_1_categorical & game_2_categorical) / len(game_1_categorical | game_2_categorical), 2)
|
|
||||||
|
|
||||||
|
|
||||||
def find_games_numerical_similarity(game_1: pd.DataFrame, game_2: pd.DataFrame) -> float:
|
|
||||||
game_1_popularity = float(game_1["fuzzy_popularity"].to_string(index=False))
|
|
||||||
game_2_popularity = float(game_2["fuzzy_popularity"].to_string(index=False))
|
|
||||||
return round(abs(game_1_popularity - game_2_popularity), 2)
|
|
||||||
|
|
||||||
|
|
||||||
def find_games_word_vector_distance(game_1: pd.DataFrame, game_2: pd.DataFrame) -> float:
|
|
||||||
game_1_vector = game_1['all_categorical_vector'].tolist()[0]
|
|
||||||
game_2_vector = game_2['all_categorical_vector'].tolist()[0]
|
|
||||||
return round(dot(game_1_vector, game_2_vector) / (norm(game_1_vector) * norm(game_2_vector)), 2)
|
|
||||||
|
|
||||||
|
|
||||||
def calculate_similarities(game_title, title_list, df):
|
|
||||||
if game_title in title_list:
|
|
||||||
title_list.remove(game_title)
|
|
||||||
|
|
||||||
args_list = []
|
|
||||||
for compared_title in title_list:
|
|
||||||
args_list.append((game_title, compared_title, df))
|
|
||||||
|
|
||||||
similarities = []
|
|
||||||
# call the function for each item in parallel with multiprocessing
|
|
||||||
with multiprocessing.Pool() as pool:
|
|
||||||
for result in pool.starmap(compare_games, tqdm.tqdm(args_list, total=len(args_list), desc='Searching')):
|
|
||||||
similarities.append(result)
|
|
||||||
|
|
||||||
all_games = []
|
|
||||||
for title, similarity in zip(title_list, similarities):
|
|
||||||
all_games.append({
|
|
||||||
"title": title,
|
|
||||||
"similarity": similarity
|
|
||||||
})
|
|
||||||
|
|
||||||
sorted_games = sorted(all_games, key=lambda k: k['similarity'], reverse=True)
|
|
||||||
print("\n ==== Top 20 most similar games: ====")
|
|
||||||
for game in sorted_games[:20]:
|
|
||||||
print(f"- {game['title']}")
|
|
||||||
return sorted_games[:20]
|
|
||||||
# save_results(game_title=game_title, game_list=sorted_games)
|
|
||||||
|
|
||||||
def save_results(game_title, game_list):
|
|
||||||
print("The full list of similar games available in the /results directory\n")
|
|
||||||
with open(f"results/similarity_list_{game_title.lower().replace(' ', '_')}.txt", 'w+') as fp:
|
|
||||||
json.dump(game_list, fp)
|
|
||||||
|
|
||||||
def compare_games(title_1, title_2, df, show_graph=False):
|
|
||||||
game_1 = df.loc[df['name'] == title_1]
|
|
||||||
game_2 = df.loc[df['name'] == title_2]
|
|
||||||
|
|
||||||
categorical_similarity = find_games_categorical_similarity(game_1=game_1, game_2=game_2)
|
|
||||||
numerical_difference = find_games_numerical_similarity(game_1=game_1, game_2=game_2)
|
|
||||||
word_vector_distance = find_games_word_vector_distance(game_1=game_1, game_2=game_2)
|
|
||||||
similarity_score = fuzzy_controler_similiarity(categorical_data=categorical_similarity,
|
|
||||||
numerical_data=numerical_difference,
|
|
||||||
vector_distance=word_vector_distance, show_graph=show_graph)
|
|
||||||
return similarity_score
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
|
|
||||||
df = pd.read_pickle('data/games_processed_vectorized.csv')
|
|
||||||
title_list = df["name"].values.tolist()
|
|
||||||
while True:
|
|
||||||
print("Welcome to Fuzzy Game Reccomender!\nType in a game title and we will find the most similar games from our database")
|
|
||||||
title = input("Enter the title or type 'exit' to leave: ")
|
|
||||||
if title == "exit":
|
|
||||||
break
|
|
||||||
else:
|
|
||||||
calculate_similarities(game_title=title, title_list=title_list, df=df)
|
|
||||||
|
|
||||||
|
|
||||||
@app.route('/')
|
|
||||||
def index():
|
|
||||||
return render_template('index.html')
|
|
||||||
|
|
||||||
|
|
||||||
@app.route('/', methods=['POST'])
|
|
||||||
def form_post():
|
|
||||||
df = pd.read_pickle('data/games_processed_vectorized.csv')
|
|
||||||
first_game = request.form['first_game']
|
|
||||||
second_game = request.form['second_game']
|
|
||||||
third_game = request.form['third_game']
|
|
||||||
processed_text1 = first_game
|
|
||||||
processed_text2 = second_game
|
|
||||||
processed_text3 = third_game
|
|
||||||
|
|
||||||
title_list = df["name"].values.tolist()
|
|
||||||
similarities = calculate_similarities(game_title=processed_text1, title_list=title_list, df=df)
|
|
||||||
|
|
||||||
return similarities
|
|
81049
data/steam_data.csv
Normal file
81049
data/steam_data.csv
Normal file
File diff suppressed because it is too large
Load Diff
BIN
doc/project_doc.pdf
Normal file
BIN
doc/project_doc.pdf
Normal file
Binary file not shown.
@ -91,9 +91,9 @@ def fuzzy_controler_similiarity(categorical_data: str, numerical_data: str, vect
|
|||||||
FSS.set_crisp_output_value("big", 1)
|
FSS.set_crisp_output_value("big", 1)
|
||||||
|
|
||||||
# TODO: add Word_vector_distance to rules
|
# TODO: add Word_vector_distance to rules
|
||||||
R1 = "IF (Categorical_similarity IS average) OR (Numerical_difference IS average) THEN (Similarity IS average)"
|
R1 = "IF (Categorical_similarity IS average) AND (Numerical_difference IS average) THEN (Similarity IS average)"
|
||||||
R2 = "IF (Categorical_similarity IS small) OR (Numerical_difference IS big) THEN (Similarity IS small)"
|
R2 = "IF (Categorical_similarity IS small) AND (Numerical_difference IS big) THEN (Similarity IS small)"
|
||||||
R3 = "IF (Categorical_similarity IS big) OR (Numerical_difference IS small) THEN (Similarity IS big)"
|
R3 = "IF (Categorical_similarity IS big) AND (Numerical_difference IS small) THEN (Similarity IS big)"
|
||||||
|
|
||||||
FSS.add_rules([R1, R2, R3])
|
FSS.add_rules([R1, R2, R3])
|
||||||
|
|
||||||
|
72
main.py
72
main.py
@ -4,8 +4,11 @@ from numpy import dot
|
|||||||
from numpy.linalg import norm
|
from numpy.linalg import norm
|
||||||
import json
|
import json
|
||||||
import multiprocessing
|
import multiprocessing
|
||||||
import tqdm
|
from tqdm.auto import tqdm
|
||||||
|
from sys import argv
|
||||||
|
import sys, getopt
|
||||||
|
import argparse
|
||||||
|
import random
|
||||||
|
|
||||||
def find_games_categorical_similarity(game_1: pd.DataFrame, game_2: pd.DataFrame) -> float:
|
def find_games_categorical_similarity(game_1: pd.DataFrame, game_2: pd.DataFrame) -> float:
|
||||||
game_1_categorical = set(game_1['all_categorical'].tolist()[0])
|
game_1_categorical = set(game_1['all_categorical'].tolist()[0])
|
||||||
@ -25,7 +28,7 @@ def find_games_word_vector_distance(game_1: pd.DataFrame, game_2: pd.DataFrame)
|
|||||||
return round(dot(game_1_vector, game_2_vector) / (norm(game_1_vector) * norm(game_2_vector)), 2)
|
return round(dot(game_1_vector, game_2_vector) / (norm(game_1_vector) * norm(game_2_vector)), 2)
|
||||||
|
|
||||||
|
|
||||||
def calculate_similarities(game_title, title_list, df):
|
def calculate_similarities(game_title, title_list, df, test=False):
|
||||||
if game_title in title_list:
|
if game_title in title_list:
|
||||||
title_list.remove(game_title)
|
title_list.remove(game_title)
|
||||||
|
|
||||||
@ -36,7 +39,7 @@ def calculate_similarities(game_title, title_list, df):
|
|||||||
similarities = []
|
similarities = []
|
||||||
# call the function for each item in parallel with multiprocessing
|
# call the function for each item in parallel with multiprocessing
|
||||||
with multiprocessing.Pool() as pool:
|
with multiprocessing.Pool() as pool:
|
||||||
for result in pool.starmap(compare_games, tqdm.tqdm(args_list, total=len(args_list), desc='Searching')):
|
for result in pool.starmap(compare_games, tqdm(args_list, total=len(args_list), desc='Searching')):
|
||||||
similarities.append(result)
|
similarities.append(result)
|
||||||
|
|
||||||
all_games = []
|
all_games = []
|
||||||
@ -47,6 +50,7 @@ def calculate_similarities(game_title, title_list, df):
|
|||||||
})
|
})
|
||||||
|
|
||||||
sorted_games = sorted(all_games, key=lambda k: k['similarity'], reverse=True)
|
sorted_games = sorted(all_games, key=lambda k: k['similarity'], reverse=True)
|
||||||
|
if (test): return sorted_games[:20]
|
||||||
print("\n ==== Top 20 most similar games: ====")
|
print("\n ==== Top 20 most similar games: ====")
|
||||||
for game in sorted_games[:20]:
|
for game in sorted_games[:20]:
|
||||||
print(f"- {game['title']}")
|
print(f"- {game['title']}")
|
||||||
@ -69,11 +73,63 @@ def compare_games(title_1, title_2, df, show_graph=False):
|
|||||||
vector_distance=word_vector_distance, show_graph=show_graph)
|
vector_distance=word_vector_distance, show_graph=show_graph)
|
||||||
return similarity_score
|
return similarity_score
|
||||||
|
|
||||||
|
def get_game_info_from_df(data_games, game_title):
|
||||||
|
finded_game = data_games.loc[data_games["name"] == game_title]
|
||||||
|
# print(finded_game)
|
||||||
|
result_dict = {
|
||||||
|
"title" : finded_game["name"].values[0],
|
||||||
|
"price" : finded_game["price"].values[0],
|
||||||
|
"all_categorical" : finded_game["all_categorical"].values[0],
|
||||||
|
}
|
||||||
|
return result_dict
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
|
|
||||||
|
def get_game_info(data_game):
|
||||||
|
# finded_game = data_games.loc[data_games["name"] == game_title]
|
||||||
|
# print(finded_game)
|
||||||
|
result_dict = {
|
||||||
|
"title" : data_game["name"],
|
||||||
|
"price" : data_game["price"],
|
||||||
|
"all_categorical" : data_game["all_categorical"],
|
||||||
|
}
|
||||||
|
return result_dict
|
||||||
|
|
||||||
|
def main(argv):
|
||||||
df = pd.read_pickle('data/games_processed_vectorized.csv')
|
df = pd.read_pickle('data/games_processed_vectorized.csv')
|
||||||
title_list = df["name"].values.tolist()
|
title_list = df["name"].values.tolist()
|
||||||
|
|
||||||
|
test_mode = False
|
||||||
|
random_mode = False
|
||||||
|
eval_mode = False
|
||||||
|
eval_random_mode = False
|
||||||
|
|
||||||
|
opts, args = getopt.getopt(argv, "r:", ["pres", "eval", "evalrandom"])
|
||||||
|
for opt, arg in opts:
|
||||||
|
if "--pres" == opt:
|
||||||
|
test_mode = True
|
||||||
|
if "--eval" == opt:
|
||||||
|
eval_mode = True
|
||||||
|
if "--evalrandom" == opt:
|
||||||
|
eval_random_mode = True
|
||||||
|
if "-r" == opt:
|
||||||
|
random_mode = arg
|
||||||
|
if (True == test_mode):
|
||||||
|
game_list = ["Call of Duty®: Modern Warfare® 2", "Project CARS", "DayZ", "STAR WARS™ Jedi Knight - Mysteries of the Sith™", "Overcooked"]
|
||||||
|
if (random_mode): game_list = [random.choice(title_list)]
|
||||||
|
if (eval_mode or eval_random_mode): game_list = [random.choice(title_list) for i in range(10)]
|
||||||
|
result_dict = {"results": []}
|
||||||
|
for item in game_list:
|
||||||
|
if not eval_random_mode:
|
||||||
|
titles_results = calculate_similarities(game_title=item, title_list=title_list, df=df, test=test_mode)
|
||||||
|
if eval_random_mode:
|
||||||
|
titles_results = [{"title": random.choice(title_list)} for i in range(10)]
|
||||||
|
game_result = get_game_info_from_df(df, item)
|
||||||
|
game_result["fuzzy_similiar"] = [get_game_info_from_df(df, title_item["title"]) for title_item in titles_results[:10]]
|
||||||
|
result_dict["results"].append(game_result)
|
||||||
|
with open("results/result.json", "w", encoding="UTF-8") as outfile:
|
||||||
|
json.dump(result_dict, outfile, ensure_ascii=False)
|
||||||
|
|
||||||
|
if (False == test_mode):
|
||||||
while True:
|
while True:
|
||||||
print("Welcome to Fuzzy Game Reccomender!\nType in a game title and we will find the most similar games from our database")
|
print("Welcome to Fuzzy Game Reccomender!\nType in a game title and we will find the most similar games from our database")
|
||||||
title = input("Enter the title or type 'exit' to leave: ")
|
title = input("Enter the title or type 'exit' to leave: ")
|
||||||
@ -81,3 +137,9 @@ if __name__ == '__main__':
|
|||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
calculate_similarities(game_title=title, title_list=title_list, df=df)
|
calculate_similarities(game_title=title, title_list=title_list, df=df)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main(sys.argv[1:])
|
||||||
|
|
||||||
|
|
1
results/result.json
Normal file
1
results/result.json
Normal file
File diff suppressed because one or more lines are too long
@ -1,105 +0,0 @@
|
|||||||
<head>
|
|
||||||
<script>
|
|
||||||
const toggleCheckboxes = document.querySelectorAll('input[type="checkbox"]');
|
|
||||||
|
|
||||||
toggleCheckboxes.forEach(checkbox => {
|
|
||||||
checkbox.addEventListener('change', function() {
|
|
||||||
const target = document.getElementById(this.id.replace('toggle', ''));
|
|
||||||
const label = target.previousElementSibling;
|
|
||||||
label.style.visibility = this.checked ? 'visible' : 'hidden';
|
|
||||||
});
|
|
||||||
});
|
|
||||||
</script>
|
|
||||||
|
|
||||||
<style>
|
|
||||||
h1 {
|
|
||||||
border: 2px #eee solid;
|
|
||||||
color: brown;
|
|
||||||
text-align: center;
|
|
||||||
padding: 10px;
|
|
||||||
}
|
|
||||||
|
|
||||||
html, body {
|
|
||||||
height: 100%;
|
|
||||||
margin: 0;
|
|
||||||
padding: 0;
|
|
||||||
background-color: #ADD8E6; /* Light blue color */
|
|
||||||
}
|
|
||||||
|
|
||||||
form {
|
|
||||||
display: flex;
|
|
||||||
flex-direction: column;
|
|
||||||
align-items: center;
|
|
||||||
justify-content: center;
|
|
||||||
height: 100%;
|
|
||||||
background-color: white;
|
|
||||||
padding: 20px;
|
|
||||||
border-radius: 10px;
|
|
||||||
box-shadow: 0 0 10px gray;
|
|
||||||
}
|
|
||||||
|
|
||||||
input[type="text"] {
|
|
||||||
width: 50%;
|
|
||||||
padding: 10px;
|
|
||||||
margin: 10px 0;
|
|
||||||
font-size: 16px;
|
|
||||||
background-color: lightgray;
|
|
||||||
border: none;
|
|
||||||
border-radius: 5px;
|
|
||||||
}
|
|
||||||
|
|
||||||
input[type="submit"] {
|
|
||||||
padding: 10px 20px;
|
|
||||||
font-size: 16px;
|
|
||||||
background-color: lightblue;
|
|
||||||
color: white;
|
|
||||||
border: none;
|
|
||||||
border-radius: 5px;
|
|
||||||
cursor: pointer;
|
|
||||||
}
|
|
||||||
|
|
||||||
.red-border {
|
|
||||||
border: 2px solid red;
|
|
||||||
display: inline-block;
|
|
||||||
padding: 5px;
|
|
||||||
border-radius: 5px;
|
|
||||||
visibility: hidden;
|
|
||||||
}
|
|
||||||
|
|
||||||
button.toggle-border {
|
|
||||||
padding: 5px 10px;
|
|
||||||
font-size: 14px;
|
|
||||||
background-color: lightgray;
|
|
||||||
border: none;
|
|
||||||
border-radius: 5px;
|
|
||||||
cursor: pointer;
|
|
||||||
margin-left: 10px;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
</style>
|
|
||||||
</head>
|
|
||||||
|
|
||||||
<form action="" method="post">
|
|
||||||
<div>
|
|
||||||
<input type="checkbox" id="first_game_toggle">
|
|
||||||
<label for="first_game_toggle">I don't like this game</label>
|
|
||||||
<label for="first_game" class="red-border">First game:</label>
|
|
||||||
<input type="text" id="first_game" name="first_game">
|
|
||||||
</div>
|
|
||||||
<div>
|
|
||||||
<input type="checkbox" id="second_game_toggle">
|
|
||||||
<label for="second_game_toggle">I don't like this game</label>
|
|
||||||
<label for="second_game" class="red-border">Second game:</label>
|
|
||||||
<input type="text" id="second_game" name="second_game">
|
|
||||||
</div>
|
|
||||||
<div>
|
|
||||||
<input type="checkbox" id="third_game_toggle">
|
|
||||||
<label for="third_game_toggle">I don't like this game</label>
|
|
||||||
<label for="third_game" class="red-border">Second game:</label>
|
|
||||||
<input type="text" id="third_game" name="third_game">
|
|
||||||
</div>
|
|
||||||
<input type="submit" value="Submit">
|
|
||||||
</form>
|
|
||||||
|
|
Loading…
Reference in New Issue
Block a user