51 lines
2.4 KiB
Python
51 lines
2.4 KiB
Python
import pandas as pd
|
|
from fuzzy_controllers import fuzzy_controler_similiarity
|
|
from numpy import dot
|
|
from numpy.linalg import norm
|
|
|
|
|
|
def find_games_categorical_similarity(game_1: pd.DataFrame, game_2: pd.DataFrame) -> float:
|
|
game_1_categorical = set(game_1['all_categorical'].tolist()[0])
|
|
game_2_categorical = set(game_2['all_categorical'].tolist()[0])
|
|
return round(len(game_1_categorical & game_2_categorical) / len(game_1_categorical | game_2_categorical), 2)
|
|
|
|
|
|
def find_games_numerical_similarity(game_1: pd.DataFrame, game_2: pd.DataFrame) -> float:
|
|
game_1_popularity = float(game_1["fuzzy_popularity"].to_string(index=False))
|
|
game_2_popularity = float(game_2["fuzzy_popularity"].to_string(index=False))
|
|
return round(abs(game_1_popularity - game_2_popularity), 2)
|
|
|
|
|
|
def find_games_word_vector_distance(game_1: pd.DataFrame, game_2: pd.DataFrame) -> float:
|
|
game_1_vector = game_1['all_categorical_vector'].tolist()[0]
|
|
game_2_vector = game_2['all_categorical_vector'].tolist()[0]
|
|
return round(dot(game_1_vector, game_2_vector) / (norm(game_1_vector) * norm(game_2_vector)), 2)
|
|
|
|
|
|
def compare_games(title_1: str, title_2: str, df: pd.DataFrame, show_graph: bool = False) -> float:
|
|
game_1 = df.loc[df['name'] == title_1]
|
|
game_2 = df.loc[df['name'] == title_2]
|
|
|
|
categorical_similarity = find_games_categorical_similarity(game_1=game_1, game_2=game_2)
|
|
numerical_difference = find_games_numerical_similarity(game_1=game_1, game_2=game_2)
|
|
word_vector_distance = find_games_word_vector_distance(game_1=game_1, game_2=game_2)
|
|
print(f"Categorical similarity: {categorical_similarity}\nNumerical difference: {numerical_difference}\n"
|
|
f"Word vector distance: {word_vector_distance}")
|
|
|
|
similarity_score = fuzzy_controler_similiarity(categorical_data=categorical_similarity,
|
|
numerical_data=numerical_difference,
|
|
vector_distance=word_vector_distance, show_graph=show_graph)
|
|
return similarity_score
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
df = pd.read_pickle('data/games_processed_vectorized.csv')
|
|
|
|
while True:
|
|
title_1 = input("Enter title 1: ")
|
|
title_2 = input("Enter title 2: ")
|
|
similarity_score = compare_games(title_1=title_1, title_2=title_2, df=df, show_graph=False)
|
|
print(f'Similarity_score: {similarity_score}')
|
|
|