diff --git a/search-engine.py b/search-engine.py index 4b69879..6872f26 100644 --- a/search-engine.py +++ b/search-engine.py @@ -39,7 +39,7 @@ def okapi_bm25(query, document_vectors, vectorizer: TfidfVectorizer): def parse_owners(data): data = str(data) if data == 'nan': - return 1.0 + return 1000.0 return float(data.split('-')[1]) @@ -67,9 +67,11 @@ while True: vectorizer.inverse_transform similarities = okapi_bm25(query_vector, document_vectors, vectorizer) if enable_popularity: - popularities = steam_data.join(steam_data_names, on='appid', lsuffix='name')['owners'].map(parse_owners).values + popularities = steam_data.join(steam_data_names, on='appid', lsuffix='name', how='left')['owners'].map(parse_owners).values popularities_normalized = popularities / np.linalg.norm(popularities) similarities = np.multiply(similarities, popularities_normalized) + + similarities = similarities / np.linalg.norm(similarities) exec_time = time.time() - start_time results_count = len([x for x in similarities if x > 0])