Better normalization
This commit is contained in:
parent
f14696c41b
commit
c84af9ba00
@ -39,7 +39,7 @@ def okapi_bm25(query, document_vectors, vectorizer: TfidfVectorizer):
|
||||
def parse_owners(data):
|
||||
data = str(data)
|
||||
if data == 'nan':
|
||||
return 1.0
|
||||
return 1000.0
|
||||
|
||||
return float(data.split('-')[1])
|
||||
|
||||
@ -67,9 +67,11 @@ while True:
|
||||
vectorizer.inverse_transform
|
||||
similarities = okapi_bm25(query_vector, document_vectors, vectorizer)
|
||||
if enable_popularity:
|
||||
popularities = steam_data.join(steam_data_names, on='appid', lsuffix='name')['owners'].map(parse_owners).values
|
||||
popularities = steam_data.join(steam_data_names, on='appid', lsuffix='name', how='left')['owners'].map(parse_owners).values
|
||||
popularities_normalized = popularities / np.linalg.norm(popularities)
|
||||
similarities = np.multiply(similarities, popularities_normalized)
|
||||
|
||||
similarities = similarities / np.linalg.norm(similarities)
|
||||
exec_time = time.time() - start_time
|
||||
|
||||
results_count = len([x for x in similarities if x > 0])
|
||||
|
Loading…
Reference in New Issue
Block a user