compare_to_all_games #1

Merged
s444417 merged 9 commits from compare_to_all_games into master 2023-02-03 00:42:23 +01:00
5 changed files with 82758 additions and 9 deletions
Showing only changes of commit 9ce43cafad - Show all commits

1646
Fuzzy_presentation.ipynb Normal file

File diff suppressed because one or more lines are too long

View File

@ -5,8 +5,14 @@
pip install -r requirements.txt pip install -r requirements.txt
python main.py python main.py
#### To run the project in presentation mode:
python main.py --pres
it will generate .json file which can be presented by running all cells of `Fuzzy_presentation.ipynb`
Processed dataset files are already provided, but can be created from the base ``games.csv`` file by running: Processed dataset files are already provided, but can be created from the base ``games.csv`` file by running:
python process_dataset.py python process_dataset.py
If no ``GoogleNews-vectors-negative300.bin`` file is present, only ``games_processed.csv`` will be created. If no ``GoogleNews-vectors-negative300.bin`` file is present, only ``games_processed.csv`` will be created.

81049
data/steam_data.csv Normal file

File diff suppressed because it is too large Load Diff

65
main.py
View File

@ -5,6 +5,9 @@ from numpy.linalg import norm
import json import json
import multiprocessing import multiprocessing
import tqdm import tqdm
from sys import argv
import sys, getopt
import argparse
def find_games_categorical_similarity(game_1: pd.DataFrame, game_2: pd.DataFrame) -> float: def find_games_categorical_similarity(game_1: pd.DataFrame, game_2: pd.DataFrame) -> float:
@ -25,7 +28,7 @@ def find_games_word_vector_distance(game_1: pd.DataFrame, game_2: pd.DataFrame)
return round(dot(game_1_vector, game_2_vector) / (norm(game_1_vector) * norm(game_2_vector)), 2) return round(dot(game_1_vector, game_2_vector) / (norm(game_1_vector) * norm(game_2_vector)), 2)
def calculate_similarities(game_title, title_list, df): def calculate_similarities(game_title, title_list, df, test=False):
if game_title in title_list: if game_title in title_list:
title_list.remove(game_title) title_list.remove(game_title)
@ -47,6 +50,7 @@ def calculate_similarities(game_title, title_list, df):
}) })
sorted_games = sorted(all_games, key=lambda k: k['similarity'], reverse=True) sorted_games = sorted(all_games, key=lambda k: k['similarity'], reverse=True)
if (test): return sorted_games[:20]
print("\n ==== Top 20 most similar games: ====") print("\n ==== Top 20 most similar games: ====")
for game in sorted_games[:20]: for game in sorted_games[:20]:
print(f"- {game['title']}") print(f"- {game['title']}")
@ -69,15 +73,58 @@ def compare_games(title_1, title_2, df, show_graph=False):
vector_distance=word_vector_distance, show_graph=show_graph) vector_distance=word_vector_distance, show_graph=show_graph)
return similarity_score return similarity_score
def get_game_info_from_df(data_games, game_title):
finded_game = data_games.loc[data_games["name"] == game_title]
# print(finded_game)
result_dict = {
"title" : finded_game["name"].values[0],
"price" : finded_game["price"].values[0],
"all_categorical" : finded_game["all_categorical"].values[0],
}
return result_dict
if __name__ == '__main__':
def get_game_info(data_game):
# finded_game = data_games.loc[data_games["name"] == game_title]
# print(finded_game)
result_dict = {
"title" : data_game["name"],
"price" : data_game["price"],
"all_categorical" : data_game["all_categorical"],
}
return result_dict
def main(argv):
df = pd.read_pickle('data/games_processed_vectorized.csv') df = pd.read_pickle('data/games_processed_vectorized.csv')
title_list = df["name"].values.tolist() title_list = df["name"].values.tolist()
while True:
print("Welcome to Fuzzy Game Reccomender!\nType in a game title and we will find the most similar games from our database") test_mode = False
title = input("Enter the title or type 'exit' to leave: ") opts, args = getopt.getopt(argv, "", ["pres"])
if title == "exit": for opt, arg in opts:
break if "--pres" == opt:
else: test_mode = True
calculate_similarities(game_title=title, title_list=title_list, df=df) if (True == test_mode):
game_list = ["Call of Duty®: Modern Warfare® 2", "Project CARS", "DayZ", "STAR WARS™ Jedi Knight - Mysteries of the Sith™", "Overcooked"]
result_dict = {"results": []}
for item in game_list:
titles_results = calculate_similarities(game_title=item, title_list=title_list, df=df, test=test_mode)
game_result = get_game_info_from_df(df, item)
game_result["fuzzy_similiar"] = [get_game_info_from_df(df, title_item["title"]) for title_item in titles_results[:10]]
result_dict["results"].append(game_result)
with open("results/result.json", "w", encoding="UTF-8") as outfile:
json.dump(result_dict, outfile, ensure_ascii=False)
if (False == test_mode):
while True:
print("Welcome to Fuzzy Game Reccomender!\nType in a game title and we will find the most similar games from our database")
title = input("Enter the title or type 'exit' to leave: ")
if title == "exit":
break
else:
calculate_similarities(game_title=title, title_list=title_list, df=df)
if __name__ == '__main__':
main(sys.argv[1:])

1
results/result.json Normal file

File diff suppressed because one or more lines are too long