compare_to_all_games #1

Merged
s444417 merged 9 commits from compare_to_all_games into master 2023-02-03 00:42:23 +01:00
3 changed files with 536 additions and 21 deletions
Showing only changes of commit 8f3a3f7bb1 - Show all commits

File diff suppressed because one or more lines are too long

View File

@ -10,6 +10,10 @@
python main.py --pres
it will generate .json file which can be presented by running all cells of `Fuzzy_presentation.ipynb`
#### Random mode
python main.py --pres -r True
Processed dataset files are already provided, but can be created from the base ``games.csv`` file by running:
python process_dataset.py

13
main.py
View File

@ -4,11 +4,11 @@ from numpy import dot
from numpy.linalg import norm
import json
import multiprocessing
import tqdm
from tqdm.auto import tqdm
from sys import argv
import sys, getopt
import argparse
import random
def find_games_categorical_similarity(game_1: pd.DataFrame, game_2: pd.DataFrame) -> float:
game_1_categorical = set(game_1['all_categorical'].tolist()[0])
@ -39,7 +39,7 @@ def calculate_similarities(game_title, title_list, df, test=False):
similarities = []
# call the function for each item in parallel with multiprocessing
with multiprocessing.Pool() as pool:
for result in pool.starmap(compare_games, tqdm.tqdm(args_list, total=len(args_list), desc='Searching')):
for result in pool.starmap(compare_games, tqdm(args_list, total=len(args_list), desc='Searching')):
similarities.append(result)
all_games = []
@ -99,12 +99,17 @@ def main(argv):
title_list = df["name"].values.tolist()
test_mode = False
opts, args = getopt.getopt(argv, "", ["pres"])
random_mode = False
opts, args = getopt.getopt(argv, "r:", ["pres"])
for opt, arg in opts:
if "--pres" == opt:
test_mode = True
if "-r" == opt:
random_mode = arg
if (True == test_mode):
game_list = ["Call of Duty®: Modern Warfare® 2", "Project CARS", "DayZ", "STAR WARS™ Jedi Knight - Mysteries of the Sith™", "Overcooked"]
if (random_mode): game_list = [random.choice(title_list)]
result_dict = {"results": []}
for item in game_list:
titles_results = calculate_similarities(game_title=item, title_list=title_list, df=df, test=test_mode)