add functionality to compare a game to the whole db
This commit is contained in:
parent
c953d55a3e
commit
da43071f7c
38
main.py
38
main.py
@ -2,7 +2,7 @@ import pandas as pd
|
|||||||
from fuzzy_controllers import fuzzy_controler_similiarity
|
from fuzzy_controllers import fuzzy_controler_similiarity
|
||||||
from numpy import dot
|
from numpy import dot
|
||||||
from numpy.linalg import norm
|
from numpy.linalg import norm
|
||||||
|
import json
|
||||||
|
|
||||||
def find_games_categorical_similarity(game_1: pd.DataFrame, game_2: pd.DataFrame) -> float:
|
def find_games_categorical_similarity(game_1: pd.DataFrame, game_2: pd.DataFrame) -> float:
|
||||||
game_1_categorical = set(game_1['all_categorical'].tolist()[0])
|
game_1_categorical = set(game_1['all_categorical'].tolist()[0])
|
||||||
@ -21,6 +21,24 @@ def find_games_word_vector_distance(game_1: pd.DataFrame, game_2: pd.DataFrame)
|
|||||||
game_2_vector = game_2['all_categorical_vector'].tolist()[0]
|
game_2_vector = game_2['all_categorical_vector'].tolist()[0]
|
||||||
return round(dot(game_1_vector, game_2_vector) / (norm(game_1_vector) * norm(game_2_vector)), 2)
|
return round(dot(game_1_vector, game_2_vector) / (norm(game_1_vector) * norm(game_2_vector)), 2)
|
||||||
|
|
||||||
|
def calculate_similarities(game_title, title_list, df):
|
||||||
|
all_games = []
|
||||||
|
for compared_title in title_list:
|
||||||
|
if game_title != compared_title:
|
||||||
|
all_games.append({
|
||||||
|
"title": compared_title,
|
||||||
|
"similarity": compare_games(title_1=game_title, title_2=compared_title, df=df, show_graph=False)
|
||||||
|
})
|
||||||
|
sorted_games = sorted(all_games, key=lambda k: k['similarity'], reverse=True)
|
||||||
|
print("\n ==== Top 20 most similar games: ====")
|
||||||
|
for game in sorted_games[:20]:
|
||||||
|
print(f"- {game['title']}")
|
||||||
|
save_results(game_title=game_title, game_list=sorted_games)
|
||||||
|
|
||||||
|
def save_results(game_title, game_list):
|
||||||
|
print("The full list of similar games available in the /results directory\n")
|
||||||
|
with open(f"results/similarity_list_{game_title.lower().replace(' ', '_')}.txt", 'w+') as fp:
|
||||||
|
json.dump(game_list, fp)
|
||||||
|
|
||||||
def compare_games(title_1: str, title_2: str, df: pd.DataFrame, show_graph: bool = False) -> float:
|
def compare_games(title_1: str, title_2: str, df: pd.DataFrame, show_graph: bool = False) -> float:
|
||||||
game_1 = df.loc[df['name'] == title_1]
|
game_1 = df.loc[df['name'] == title_1]
|
||||||
@ -29,9 +47,6 @@ def compare_games(title_1: str, title_2: str, df: pd.DataFrame, show_graph: bool
|
|||||||
categorical_similarity = find_games_categorical_similarity(game_1=game_1, game_2=game_2)
|
categorical_similarity = find_games_categorical_similarity(game_1=game_1, game_2=game_2)
|
||||||
numerical_difference = find_games_numerical_similarity(game_1=game_1, game_2=game_2)
|
numerical_difference = find_games_numerical_similarity(game_1=game_1, game_2=game_2)
|
||||||
word_vector_distance = find_games_word_vector_distance(game_1=game_1, game_2=game_2)
|
word_vector_distance = find_games_word_vector_distance(game_1=game_1, game_2=game_2)
|
||||||
print(f"Categorical similarity: {categorical_similarity}\nNumerical difference: {numerical_difference}\n"
|
|
||||||
f"Word vector distance: {word_vector_distance}")
|
|
||||||
|
|
||||||
similarity_score = fuzzy_controler_similiarity(categorical_data=categorical_similarity,
|
similarity_score = fuzzy_controler_similiarity(categorical_data=categorical_similarity,
|
||||||
numerical_data=numerical_difference,
|
numerical_data=numerical_difference,
|
||||||
vector_distance=word_vector_distance, show_graph=show_graph)
|
vector_distance=word_vector_distance, show_graph=show_graph)
|
||||||
@ -41,10 +56,13 @@ def compare_games(title_1: str, title_2: str, df: pd.DataFrame, show_graph: bool
|
|||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
||||||
df = pd.read_pickle('data/games_processed_vectorized.csv')
|
df = pd.read_pickle('data/games_processed_vectorized.csv')
|
||||||
|
title_list = df["name"].values.tolist()[:2000]
|
||||||
while True:
|
run_program = True
|
||||||
title_1 = input("Enter title 1: ")
|
while run_program:
|
||||||
title_2 = input("Enter title 2: ")
|
print("Welcome to Fuzzy Game Reccomender!\nType in a game title and we will find the most similar games from our database")
|
||||||
similarity_score = compare_games(title_1=title_1, title_2=title_2, df=df, show_graph=False)
|
title = input("Enter the title or type 'exit' to leave: ")
|
||||||
print(f'Similarity_score: {similarity_score}')
|
if title == "exit":
|
||||||
|
run_program = False
|
||||||
|
else:
|
||||||
|
calculate_similarities(game_title=title, title_list=title_list, df=df)
|
||||||
|
|
||||||
|
@ -35,7 +35,7 @@ def replace_with_vector(row, w2v):
|
|||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
df = pd.read_csv('data/games.csv')
|
df = pd.read_csv('data/games.csv')
|
||||||
|
df = df.drop_duplicates(subset=['name'])
|
||||||
df['positive_percentage'] = df.apply(
|
df['positive_percentage'] = df.apply(
|
||||||
lambda row: calculate_positive_percentage(row.positive_ratings, row.negative_ratings), axis=1)
|
lambda row: calculate_positive_percentage(row.positive_ratings, row.negative_ratings), axis=1)
|
||||||
df['owners'] = df.apply(lambda row: owners_average_max_min(row.owners), axis=1)
|
df['owners'] = df.apply(lambda row: owners_average_max_min(row.owners), axis=1)
|
||||||
|
0
results/.gitkeep
Normal file
0
results/.gitkeep
Normal file
Loading…
Reference in New Issue
Block a user