Compare commits

..

1 Commits

Author SHA1 Message Date
Adrian Charkiewicz
793a61754e initial commit for checking 3 titles and setting them to like/don't like 2023-02-01 07:20:19 +01:00
9 changed files with 230 additions and 83635 deletions

File diff suppressed because one or more lines are too long

View File

@ -5,23 +5,8 @@
pip install -r requirements.txt pip install -r requirements.txt
python main.py python main.py
#### To run the project in presentation mode:
python main.py --pres
it will generate .json file which can be presented by running all cells of `Fuzzy_presentation.ipynb`
#### Random mode
python main.py --pres -r True
#### Evaluation mode
python main.py --pres --eval
generates result.json file with 10 random games and 10 recomendations for each game, results can be evaluated in `Fuzzy_presentation.ipynb` file, with Jaccard Similiarity
Processed dataset files are already provided, but can be created from the base ``games.csv`` file by running: Processed dataset files are already provided, but can be created from the base ``games.csv`` file by running:
python process_dataset.py python process_dataset.py
If no ``GoogleNews-vectors-negative300.bin`` file is present, only ``games_processed.csv`` will be created. If no ``GoogleNews-vectors-negative300.bin`` file is present, only ``games_processed.csv`` will be created.

109
app.py Normal file
View File

@ -0,0 +1,109 @@
from flask import Flask, render_template, request
import pandas as pd
from fuzzy_controllers import fuzzy_controler_similiarity
from numpy import dot
from numpy.linalg import norm
import json
import multiprocessing
import tqdm
app = Flask(__name__)
def find_games_categorical_similarity(game_1: pd.DataFrame, game_2: pd.DataFrame) -> float:
game_1_categorical = set(game_1['all_categorical'].tolist()[0])
game_2_categorical = set(game_2['all_categorical'].tolist()[0])
return round(len(game_1_categorical & game_2_categorical) / len(game_1_categorical | game_2_categorical), 2)
def find_games_numerical_similarity(game_1: pd.DataFrame, game_2: pd.DataFrame) -> float:
game_1_popularity = float(game_1["fuzzy_popularity"].to_string(index=False))
game_2_popularity = float(game_2["fuzzy_popularity"].to_string(index=False))
return round(abs(game_1_popularity - game_2_popularity), 2)
def find_games_word_vector_distance(game_1: pd.DataFrame, game_2: pd.DataFrame) -> float:
game_1_vector = game_1['all_categorical_vector'].tolist()[0]
game_2_vector = game_2['all_categorical_vector'].tolist()[0]
return round(dot(game_1_vector, game_2_vector) / (norm(game_1_vector) * norm(game_2_vector)), 2)
def calculate_similarities(game_title, title_list, df):
if game_title in title_list:
title_list.remove(game_title)
args_list = []
for compared_title in title_list:
args_list.append((game_title, compared_title, df))
similarities = []
# call the function for each item in parallel with multiprocessing
with multiprocessing.Pool() as pool:
for result in pool.starmap(compare_games, tqdm.tqdm(args_list, total=len(args_list), desc='Searching')):
similarities.append(result)
all_games = []
for title, similarity in zip(title_list, similarities):
all_games.append({
"title": title,
"similarity": similarity
})
sorted_games = sorted(all_games, key=lambda k: k['similarity'], reverse=True)
print("\n ==== Top 20 most similar games: ====")
for game in sorted_games[:20]:
print(f"- {game['title']}")
return sorted_games[:20]
# save_results(game_title=game_title, game_list=sorted_games)
def save_results(game_title, game_list):
print("The full list of similar games available in the /results directory\n")
with open(f"results/similarity_list_{game_title.lower().replace(' ', '_')}.txt", 'w+') as fp:
json.dump(game_list, fp)
def compare_games(title_1, title_2, df, show_graph=False):
game_1 = df.loc[df['name'] == title_1]
game_2 = df.loc[df['name'] == title_2]
categorical_similarity = find_games_categorical_similarity(game_1=game_1, game_2=game_2)
numerical_difference = find_games_numerical_similarity(game_1=game_1, game_2=game_2)
word_vector_distance = find_games_word_vector_distance(game_1=game_1, game_2=game_2)
similarity_score = fuzzy_controler_similiarity(categorical_data=categorical_similarity,
numerical_data=numerical_difference,
vector_distance=word_vector_distance, show_graph=show_graph)
return similarity_score
if __name__ == '__main__':
df = pd.read_pickle('data/games_processed_vectorized.csv')
title_list = df["name"].values.tolist()
while True:
print("Welcome to Fuzzy Game Reccomender!\nType in a game title and we will find the most similar games from our database")
title = input("Enter the title or type 'exit' to leave: ")
if title == "exit":
break
else:
calculate_similarities(game_title=title, title_list=title_list, df=df)
@app.route('/')
def index():
return render_template('index.html')
@app.route('/', methods=['POST'])
def form_post():
df = pd.read_pickle('data/games_processed_vectorized.csv')
first_game = request.form['first_game']
second_game = request.form['second_game']
third_game = request.form['third_game']
processed_text1 = first_game
processed_text2 = second_game
processed_text3 = third_game
title_list = df["name"].values.tolist()
similarities = calculate_similarities(game_title=processed_text1, title_list=title_list, df=df)
return similarities

File diff suppressed because it is too large Load Diff

Binary file not shown.

View File

@ -91,9 +91,9 @@ def fuzzy_controler_similiarity(categorical_data: str, numerical_data: str, vect
FSS.set_crisp_output_value("big", 1) FSS.set_crisp_output_value("big", 1)
# TODO: add Word_vector_distance to rules # TODO: add Word_vector_distance to rules
R1 = "IF (Categorical_similarity IS average) AND (Numerical_difference IS average) THEN (Similarity IS average)" R1 = "IF (Categorical_similarity IS average) OR (Numerical_difference IS average) THEN (Similarity IS average)"
R2 = "IF (Categorical_similarity IS small) AND (Numerical_difference IS big) THEN (Similarity IS small)" R2 = "IF (Categorical_similarity IS small) OR (Numerical_difference IS big) THEN (Similarity IS small)"
R3 = "IF (Categorical_similarity IS big) AND (Numerical_difference IS small) THEN (Similarity IS big)" R3 = "IF (Categorical_similarity IS big) OR (Numerical_difference IS small) THEN (Similarity IS big)"
FSS.add_rules([R1, R2, R3]) FSS.add_rules([R1, R2, R3])

88
main.py
View File

@ -4,11 +4,8 @@ from numpy import dot
from numpy.linalg import norm from numpy.linalg import norm
import json import json
import multiprocessing import multiprocessing
from tqdm.auto import tqdm import tqdm
from sys import argv
import sys, getopt
import argparse
import random
def find_games_categorical_similarity(game_1: pd.DataFrame, game_2: pd.DataFrame) -> float: def find_games_categorical_similarity(game_1: pd.DataFrame, game_2: pd.DataFrame) -> float:
game_1_categorical = set(game_1['all_categorical'].tolist()[0]) game_1_categorical = set(game_1['all_categorical'].tolist()[0])
@ -28,7 +25,7 @@ def find_games_word_vector_distance(game_1: pd.DataFrame, game_2: pd.DataFrame)
return round(dot(game_1_vector, game_2_vector) / (norm(game_1_vector) * norm(game_2_vector)), 2) return round(dot(game_1_vector, game_2_vector) / (norm(game_1_vector) * norm(game_2_vector)), 2)
def calculate_similarities(game_title, title_list, df, test=False): def calculate_similarities(game_title, title_list, df):
if game_title in title_list: if game_title in title_list:
title_list.remove(game_title) title_list.remove(game_title)
@ -39,7 +36,7 @@ def calculate_similarities(game_title, title_list, df, test=False):
similarities = [] similarities = []
# call the function for each item in parallel with multiprocessing # call the function for each item in parallel with multiprocessing
with multiprocessing.Pool() as pool: with multiprocessing.Pool() as pool:
for result in pool.starmap(compare_games, tqdm(args_list, total=len(args_list), desc='Searching')): for result in pool.starmap(compare_games, tqdm.tqdm(args_list, total=len(args_list), desc='Searching')):
similarities.append(result) similarities.append(result)
all_games = [] all_games = []
@ -50,7 +47,6 @@ def calculate_similarities(game_title, title_list, df, test=False):
}) })
sorted_games = sorted(all_games, key=lambda k: k['similarity'], reverse=True) sorted_games = sorted(all_games, key=lambda k: k['similarity'], reverse=True)
if (test): return sorted_games[:20]
print("\n ==== Top 20 most similar games: ====") print("\n ==== Top 20 most similar games: ====")
for game in sorted_games[:20]: for game in sorted_games[:20]:
print(f"- {game['title']}") print(f"- {game['title']}")
@ -73,73 +69,15 @@ def compare_games(title_1, title_2, df, show_graph=False):
vector_distance=word_vector_distance, show_graph=show_graph) vector_distance=word_vector_distance, show_graph=show_graph)
return similarity_score return similarity_score
def get_game_info_from_df(data_games, game_title):
finded_game = data_games.loc[data_games["name"] == game_title]
# print(finded_game)
result_dict = {
"title" : finded_game["name"].values[0],
"price" : finded_game["price"].values[0],
"all_categorical" : finded_game["all_categorical"].values[0],
}
return result_dict
def get_game_info(data_game):
# finded_game = data_games.loc[data_games["name"] == game_title]
# print(finded_game)
result_dict = {
"title" : data_game["name"],
"price" : data_game["price"],
"all_categorical" : data_game["all_categorical"],
}
return result_dict
def main(argv):
df = pd.read_pickle('data/games_processed_vectorized.csv')
title_list = df["name"].values.tolist()
test_mode = False
random_mode = False
eval_mode = False
eval_random_mode = False
opts, args = getopt.getopt(argv, "r:", ["pres", "eval", "evalrandom"])
for opt, arg in opts:
if "--pres" == opt:
test_mode = True
if "--eval" == opt:
eval_mode = True
if "--evalrandom" == opt:
eval_random_mode = True
if "-r" == opt:
random_mode = arg
if (True == test_mode):
game_list = ["Call of Duty®: Modern Warfare® 2", "Project CARS", "DayZ", "STAR WARS™ Jedi Knight - Mysteries of the Sith™", "Overcooked"]
if (random_mode): game_list = [random.choice(title_list)]
if (eval_mode or eval_random_mode): game_list = [random.choice(title_list) for i in range(10)]
result_dict = {"results": []}
for item in game_list:
if not eval_random_mode:
titles_results = calculate_similarities(game_title=item, title_list=title_list, df=df, test=test_mode)
if eval_random_mode:
titles_results = [{"title": random.choice(title_list)} for i in range(10)]
game_result = get_game_info_from_df(df, item)
game_result["fuzzy_similiar"] = [get_game_info_from_df(df, title_item["title"]) for title_item in titles_results[:10]]
result_dict["results"].append(game_result)
with open("results/result.json", "w", encoding="UTF-8") as outfile:
json.dump(result_dict, outfile, ensure_ascii=False)
if (False == test_mode):
while True:
print("Welcome to Fuzzy Game Reccomender!\nType in a game title and we will find the most similar games from our database")
title = input("Enter the title or type 'exit' to leave: ")
if title == "exit":
break
else:
calculate_similarities(game_title=title, title_list=title_list, df=df)
if __name__ == '__main__': if __name__ == '__main__':
main(sys.argv[1:])
df = pd.read_pickle('data/games_processed_vectorized.csv')
title_list = df["name"].values.tolist()
while True:
print("Welcome to Fuzzy Game Reccomender!\nType in a game title and we will find the most similar games from our database")
title = input("Enter the title or type 'exit' to leave: ")
if title == "exit":
break
else:
calculate_similarities(game_title=title, title_list=title_list, df=df)

File diff suppressed because one or more lines are too long

105
templates/index.html Normal file
View File

@ -0,0 +1,105 @@
<head>
<script>
const toggleCheckboxes = document.querySelectorAll('input[type="checkbox"]');
toggleCheckboxes.forEach(checkbox => {
checkbox.addEventListener('change', function() {
const target = document.getElementById(this.id.replace('toggle', ''));
const label = target.previousElementSibling;
label.style.visibility = this.checked ? 'visible' : 'hidden';
});
});
</script>
<style>
h1 {
border: 2px #eee solid;
color: brown;
text-align: center;
padding: 10px;
}
html, body {
height: 100%;
margin: 0;
padding: 0;
background-color: #ADD8E6; /* Light blue color */
}
form {
display: flex;
flex-direction: column;
align-items: center;
justify-content: center;
height: 100%;
background-color: white;
padding: 20px;
border-radius: 10px;
box-shadow: 0 0 10px gray;
}
input[type="text"] {
width: 50%;
padding: 10px;
margin: 10px 0;
font-size: 16px;
background-color: lightgray;
border: none;
border-radius: 5px;
}
input[type="submit"] {
padding: 10px 20px;
font-size: 16px;
background-color: lightblue;
color: white;
border: none;
border-radius: 5px;
cursor: pointer;
}
.red-border {
border: 2px solid red;
display: inline-block;
padding: 5px;
border-radius: 5px;
visibility: hidden;
}
button.toggle-border {
padding: 5px 10px;
font-size: 14px;
background-color: lightgray;
border: none;
border-radius: 5px;
cursor: pointer;
margin-left: 10px;
}
</style>
</head>
<form action="" method="post">
<div>
<input type="checkbox" id="first_game_toggle">
<label for="first_game_toggle">I don't like this game</label>
<label for="first_game" class="red-border">First game:</label>
<input type="text" id="first_game" name="first_game">
</div>
<div>
<input type="checkbox" id="second_game_toggle">
<label for="second_game_toggle">I don't like this game</label>
<label for="second_game" class="red-border">Second game:</label>
<input type="text" id="second_game" name="second_game">
</div>
<div>
<input type="checkbox" id="third_game_toggle">
<label for="third_game_toggle">I don't like this game</label>
<label for="third_game" class="red-border">Second game:</label>
<input type="text" id="third_game" name="third_game">
</div>
<input type="submit" value="Submit">
</form>