diff --git a/app.py b/app.py new file mode 100644 index 0000000..a811850 --- /dev/null +++ b/app.py @@ -0,0 +1,109 @@ +from flask import Flask, render_template, request +import pandas as pd +from fuzzy_controllers import fuzzy_controler_similiarity +from numpy import dot +from numpy.linalg import norm +import json +import multiprocessing +import tqdm + +app = Flask(__name__) + + + +def find_games_categorical_similarity(game_1: pd.DataFrame, game_2: pd.DataFrame) -> float: + game_1_categorical = set(game_1['all_categorical'].tolist()[0]) + game_2_categorical = set(game_2['all_categorical'].tolist()[0]) + return round(len(game_1_categorical & game_2_categorical) / len(game_1_categorical | game_2_categorical), 2) + + +def find_games_numerical_similarity(game_1: pd.DataFrame, game_2: pd.DataFrame) -> float: + game_1_popularity = float(game_1["fuzzy_popularity"].to_string(index=False)) + game_2_popularity = float(game_2["fuzzy_popularity"].to_string(index=False)) + return round(abs(game_1_popularity - game_2_popularity), 2) + + +def find_games_word_vector_distance(game_1: pd.DataFrame, game_2: pd.DataFrame) -> float: + game_1_vector = game_1['all_categorical_vector'].tolist()[0] + game_2_vector = game_2['all_categorical_vector'].tolist()[0] + return round(dot(game_1_vector, game_2_vector) / (norm(game_1_vector) * norm(game_2_vector)), 2) + + +def calculate_similarities(game_title, title_list, df): + if game_title in title_list: + title_list.remove(game_title) + + args_list = [] + for compared_title in title_list: + args_list.append((game_title, compared_title, df)) + + similarities = [] + # call the function for each item in parallel with multiprocessing + with multiprocessing.Pool() as pool: + for result in pool.starmap(compare_games, tqdm.tqdm(args_list, total=len(args_list), desc='Searching')): + similarities.append(result) + + all_games = [] + for title, similarity in zip(title_list, similarities): + all_games.append({ + "title": title, + "similarity": similarity + }) + + sorted_games = sorted(all_games, key=lambda k: k['similarity'], reverse=True) + print("\n ==== Top 20 most similar games: ====") + for game in sorted_games[:20]: + print(f"- {game['title']}") + return sorted_games[:20] + # save_results(game_title=game_title, game_list=sorted_games) + +def save_results(game_title, game_list): + print("The full list of similar games available in the /results directory\n") + with open(f"results/similarity_list_{game_title.lower().replace(' ', '_')}.txt", 'w+') as fp: + json.dump(game_list, fp) + +def compare_games(title_1, title_2, df, show_graph=False): + game_1 = df.loc[df['name'] == title_1] + game_2 = df.loc[df['name'] == title_2] + + categorical_similarity = find_games_categorical_similarity(game_1=game_1, game_2=game_2) + numerical_difference = find_games_numerical_similarity(game_1=game_1, game_2=game_2) + word_vector_distance = find_games_word_vector_distance(game_1=game_1, game_2=game_2) + similarity_score = fuzzy_controler_similiarity(categorical_data=categorical_similarity, + numerical_data=numerical_difference, + vector_distance=word_vector_distance, show_graph=show_graph) + return similarity_score + + +if __name__ == '__main__': + + df = pd.read_pickle('data/games_processed_vectorized.csv') + title_list = df["name"].values.tolist() + while True: + print("Welcome to Fuzzy Game Reccomender!\nType in a game title and we will find the most similar games from our database") + title = input("Enter the title or type 'exit' to leave: ") + if title == "exit": + break + else: + calculate_similarities(game_title=title, title_list=title_list, df=df) + + +@app.route('/') +def index(): + return render_template('index.html') + + +@app.route('/', methods=['POST']) +def form_post(): + df = pd.read_pickle('data/games_processed_vectorized.csv') + first_game = request.form['first_game'] + second_game = request.form['second_game'] + third_game = request.form['third_game'] + processed_text1 = first_game + processed_text2 = second_game + processed_text3 = third_game + + title_list = df["name"].values.tolist() + similarities = calculate_similarities(game_title=processed_text1, title_list=title_list, df=df) + + return similarities diff --git a/templates/index.html b/templates/index.html new file mode 100644 index 0000000..33e1d27 --- /dev/null +++ b/templates/index.html @@ -0,0 +1,105 @@ + + + + + + +
+
+ + + + +
+
+ + + + +
+
+ + + + +
+ +
+