Better normalization
This commit is contained in:
parent
f14696c41b
commit
c84af9ba00
@ -39,7 +39,7 @@ def okapi_bm25(query, document_vectors, vectorizer: TfidfVectorizer):
|
|||||||
def parse_owners(data):
|
def parse_owners(data):
|
||||||
data = str(data)
|
data = str(data)
|
||||||
if data == 'nan':
|
if data == 'nan':
|
||||||
return 1.0
|
return 1000.0
|
||||||
|
|
||||||
return float(data.split('-')[1])
|
return float(data.split('-')[1])
|
||||||
|
|
||||||
@ -67,9 +67,11 @@ while True:
|
|||||||
vectorizer.inverse_transform
|
vectorizer.inverse_transform
|
||||||
similarities = okapi_bm25(query_vector, document_vectors, vectorizer)
|
similarities = okapi_bm25(query_vector, document_vectors, vectorizer)
|
||||||
if enable_popularity:
|
if enable_popularity:
|
||||||
popularities = steam_data.join(steam_data_names, on='appid', lsuffix='name')['owners'].map(parse_owners).values
|
popularities = steam_data.join(steam_data_names, on='appid', lsuffix='name', how='left')['owners'].map(parse_owners).values
|
||||||
popularities_normalized = popularities / np.linalg.norm(popularities)
|
popularities_normalized = popularities / np.linalg.norm(popularities)
|
||||||
similarities = np.multiply(similarities, popularities_normalized)
|
similarities = np.multiply(similarities, popularities_normalized)
|
||||||
|
|
||||||
|
similarities = similarities / np.linalg.norm(similarities)
|
||||||
exec_time = time.time() - start_time
|
exec_time = time.time() - start_time
|
||||||
|
|
||||||
results_count = len([x for x in similarities if x > 0])
|
results_count = len([x for x in similarities if x > 0])
|
||||||
|
Loading…
Reference in New Issue
Block a user