549 KiB
549 KiB
import helpers
import pandas as pd
import numpy as np
import scipy.sparse as sparse
from collections import defaultdict
from itertools import chain
import random
import time
import matplotlib.pyplot as plt
import implicit
import evaluation_measures as ev
train_read=pd.read_csv('./Datasets/ml-100k/train.csv', sep='\t', header=None)
test_read=pd.read_csv('./Datasets/ml-100k/test.csv', sep='\t', header=None)
train_ui, test_ui, user_code_id, user_id_code, item_code_id, item_id_code = helpers.data_to_csr(train_read, test_read)
alpha = 30
train_ui*=alpha
train_iu=train_ui.transpose().tocsr()
model = implicit.als.AlternatingLeastSquares(factors=200, regularization=0.1, iterations=10)
model.fit(train_iu)
HBox(children=(FloatProgress(value=0.0, max=10.0), HTML(value='')))
def top_k_recommendations(model, user_code_id, item_code_id, topK=10):
recommendations=[]
for u in range(train_ui.shape[0]):
u_recommended_items=model.recommend(u, train_ui, N=10, filter_already_liked_items=True)
recommendations.append([user_code_id[u]]+list(chain(*u_recommended_items)))
reco=pd.DataFrame(recommendations)
reco.iloc[:,1::2]=reco.iloc[:,1::2].applymap(lambda x: item_code_id[x])
return reco
def estimate(model, user_code_id, item_code_id, test_ui):
result=[]
for user, item in zip(*test_ui.nonzero()):
result.append([user_code_id[user], item_code_id[item],
model.rank_items(userid=user, user_items=train_ui, selected_items=[item])[0][1]])
return result
reco=top_k_recommendations(model, user_code_id, item_code_id, topK=10)
reco.to_csv('Recommendations generated/ml-100k/Ready_ImplicitALS_reco.csv', index=False, header=False)
estimations_df=pd.DataFrame(estimate(model, user_code_id, item_code_id, test_ui))
estimations_df.to_csv('Recommendations generated/ml-100k/Ready_ImplicitALS_estimations.csv', index=False, header=False)
import evaluation_measures as ev
import imp
imp.reload(ev)
estimations_df=pd.read_csv('Recommendations generated/ml-100k/Ready_ImplicitALS_estimations.csv', header=None)
reco=np.loadtxt('Recommendations generated/ml-100k/Ready_ImplicitALS_reco.csv', delimiter=',')
ev.evaluate(test=pd.read_csv('./Datasets/ml-100k/test.csv', sep='\t', header=None),
estimations_df=estimations_df,
reco=reco,
super_reactions=[4,5])
#also you can just type ev.evaluate_all(estimations_df, reco) - I put above values as default
943it [00:00, 7273.15it/s]
RMSE | MAE | precision | recall | F_1 | F_05 | precision_super | recall_super | NDCG | mAP | MRR | LAUC | HR | Reco in test | Test coverage | Shannon | Gini | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 3.266101 | 3.065824 | 0.255037 | 0.188653 | 0.176852 | 0.201189 | 0.166631 | 0.214925 | 0.305908 | 0.172546 | 0.523871 | 0.591709 | 0.889714 | 1.0 | 0.502886 | 5.722957 | 0.827507 |
Hiperparameters tuning
Number of latent factors
from tqdm import tqdm
result=[]
for factors in tqdm([i for i in np.arange(25,400,25)]):
train_read=pd.read_csv('./Datasets/ml-100k/train.csv', sep='\t', header=None)
test_read=pd.read_csv('./Datasets/ml-100k/test.csv', sep='\t', header=None)
train_ui, test_ui, user_code_id, user_id_code, item_code_id, item_id_code = helpers.data_to_csr(train_read, test_read)
train_ui*=100
train_iu=train_ui.transpose().tocsr()
model = implicit.als.AlternatingLeastSquares(factors=factors, regularization=0.1, iterations=10)
model.fit(train_iu, show_progress=False)
reco=top_k_recommendations(model, user_code_id, item_code_id, topK=10)
estimations_df=pd.DataFrame(estimate(model, user_code_id, item_code_id, test_ui))
to_append=ev.evaluate(test=pd.read_csv('./Datasets/ml-100k/test.csv', sep='\t', header=None),
estimations_df=estimations_df,
reco=np.array(reco),
super_reactions=[4,5])
to_append.insert(0, "Factors", factors)
result.append(to_append)
result=pd.concat(result)
result
0%| | 0/15 [00:00<?, ?it/s] 0it [00:00, ?it/s][A 943it [00:00, 7627.67it/s][A 7%|▋ | 1/15 [00:01<00:24, 1.72s/it] 0it [00:00, ?it/s][A 943it [00:00, 7209.44it/s][A 13%|█▎ | 2/15 [00:03<00:22, 1.73s/it] 0it [00:00, ?it/s][A 943it [00:00, 7676.76it/s][A 20%|██ | 3/15 [00:05<00:20, 1.73s/it] 0it [00:00, ?it/s][A 943it [00:00, 6846.90it/s][A 27%|██▋ | 4/15 [00:06<00:19, 1.74s/it] 0it [00:00, ?it/s][A 943it [00:00, 7293.55it/s][A 33%|███▎ | 5/15 [00:08<00:17, 1.78s/it] 0it [00:00, ?it/s][A 943it [00:00, 7443.26it/s][A 40%|████ | 6/15 [00:10<00:16, 1.81s/it] 0it [00:00, ?it/s][A 943it [00:00, 6977.23it/s][A 47%|████▋ | 7/15 [00:12<00:14, 1.85s/it] 0it [00:00, ?it/s][A 943it [00:00, 7493.35it/s][A 53%|█████▎ | 8/15 [00:14<00:13, 1.86s/it] 0it [00:00, ?it/s][A 943it [00:00, 7549.26it/s][A 60%|██████ | 9/15 [00:16<00:11, 1.90s/it] 0it [00:00, ?it/s][A 943it [00:00, 7077.91it/s][A 67%|██████▋ | 10/15 [00:18<00:09, 1.98s/it] 0it [00:00, ?it/s][A 943it [00:00, 7225.27it/s][A 73%|███████▎ | 11/15 [00:20<00:08, 2.05s/it] 0it [00:00, ?it/s][A 943it [00:00, 7431.09it/s][A 80%|████████ | 12/15 [00:23<00:06, 2.12s/it] 0it [00:00, ?it/s][A 943it [00:00, 7203.73it/s][A 87%|████████▋ | 13/15 [00:25<00:04, 2.17s/it] 0it [00:00, ?it/s][A 943it [00:00, 7066.09it/s][A 93%|█████████▎| 14/15 [00:27<00:02, 2.24s/it] 0it [00:00, ?it/s][A 943it [00:00, 7179.03it/s][A 100%|██████████| 15/15 [00:30<00:00, 2.03s/it]
Factors | RMSE | MAE | precision | recall | F_1 | F_05 | precision_super | recall_super | NDCG | mAP | MRR | LAUC | HR | Reco in test | Test coverage | Shannon | Gini | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 25 | 2.846560 | 2.620794 | 0.105726 | 0.108047 | 0.090518 | 0.093488 | 0.061481 | 0.123853 | 0.116915 | 0.048083 | 0.203663 | 0.550716 | 0.624602 | 0.996819 | 0.715007 | 6.465360 | 0.644456 |
0 | 50 | 2.898666 | 2.677181 | 0.143690 | 0.138828 | 0.119410 | 0.125049 | 0.089700 | 0.154579 | 0.164112 | 0.074820 | 0.284589 | 0.566289 | 0.750795 | 0.999258 | 0.664502 | 6.275680 | 0.699156 |
0 | 75 | 2.944450 | 2.726341 | 0.171898 | 0.154471 | 0.135551 | 0.145599 | 0.104936 | 0.173039 | 0.195845 | 0.092338 | 0.337621 | 0.574228 | 0.802757 | 0.999894 | 0.598846 | 6.057144 | 0.757071 |
0 | 100 | 2.985948 | 2.769517 | 0.183775 | 0.155463 | 0.140189 | 0.152755 | 0.110193 | 0.167837 | 0.215259 | 0.106274 | 0.383384 | 0.574794 | 0.813362 | 1.000000 | 0.558442 | 5.920294 | 0.788368 |
0 | 125 | 3.017333 | 2.802986 | 0.198940 | 0.165227 | 0.149710 | 0.164644 | 0.127575 | 0.183066 | 0.234839 | 0.119023 | 0.416181 | 0.579735 | 0.830329 | 0.999788 | 0.532468 | 5.813474 | 0.811387 |
0 | 150 | 3.042801 | 2.830367 | 0.214528 | 0.168468 | 0.155574 | 0.173686 | 0.139378 | 0.189273 | 0.247685 | 0.126369 | 0.424573 | 0.581421 | 0.856840 | 1.000000 | 0.507937 | 5.727541 | 0.825294 |
0 | 175 | 3.063186 | 2.851980 | 0.218664 | 0.170133 | 0.157283 | 0.176077 | 0.141202 | 0.197503 | 0.254178 | 0.132146 | 0.435676 | 0.582275 | 0.854719 | 1.000000 | 0.497835 | 5.681921 | 0.833476 |
0 | 200 | 3.089628 | 2.878716 | 0.220042 | 0.168505 | 0.156406 | 0.176143 | 0.142597 | 0.188596 | 0.257188 | 0.134919 | 0.443163 | 0.581471 | 0.845175 | 0.999894 | 0.490620 | 5.645500 | 0.838308 |
0 | 225 | 3.102684 | 2.892941 | 0.221209 | 0.167369 | 0.155187 | 0.175486 | 0.140773 | 0.185437 | 0.256898 | 0.134811 | 0.441676 | 0.580896 | 0.855779 | 1.000000 | 0.474026 | 5.628248 | 0.842938 |
0 | 250 | 3.110198 | 2.901659 | 0.221421 | 0.166139 | 0.154255 | 0.175268 | 0.145064 | 0.193650 | 0.257955 | 0.136844 | 0.436043 | 0.580283 | 0.851538 | 1.000000 | 0.482684 | 5.552392 | 0.850104 |
0 | 275 | 3.123178 | 2.915596 | 0.234146 | 0.163006 | 0.158760 | 0.183134 | 0.152146 | 0.183701 | 0.270489 | 0.144723 | 0.472533 | 0.578769 | 0.853659 | 1.000000 | 0.460317 | 5.597279 | 0.847138 |
0 | 300 | 3.130343 | 2.923515 | 0.237116 | 0.167547 | 0.160936 | 0.185078 | 0.154292 | 0.192522 | 0.272852 | 0.149091 | 0.460155 | 0.581044 | 0.841994 | 1.000000 | 0.457431 | 5.534284 | 0.855633 |
0 | 325 | 3.141945 | 2.934988 | 0.232025 | 0.165168 | 0.157315 | 0.181076 | 0.147639 | 0.179435 | 0.272440 | 0.150512 | 0.462026 | 0.579861 | 0.840933 | 1.000000 | 0.451659 | 5.500822 | 0.860569 |
0 | 350 | 3.149635 | 2.943592 | 0.232662 | 0.167366 | 0.158349 | 0.181597 | 0.151717 | 0.186675 | 0.270835 | 0.146983 | 0.463859 | 0.580944 | 0.853659 | 1.000000 | 0.458153 | 5.490997 | 0.860762 |
0 | 375 | 3.161007 | 2.955454 | 0.234571 | 0.167177 | 0.159795 | 0.183107 | 0.150536 | 0.180057 | 0.269211 | 0.147040 | 0.451392 | 0.580851 | 0.851538 | 0.999894 | 0.460317 | 5.478842 | 0.860159 |
### import matplotlib.pyplot as plt
metrics=list(result.columns[[i not in ['Factors'] for i in result.columns]])
charts_per_row=6
charts_per_column=3
fig, axes = plt.subplots(nrows=charts_per_row, ncols=charts_per_column,figsize=(18, 7*charts_per_row ))
import itertools
to_iter=[i for i in itertools.product(range(charts_per_row), range(charts_per_column))]
for i in range(len(metrics)):
df=result[['Factors', metrics[i]]]
df.plot(ax=axes[to_iter[i]], title=metrics[i], x=0, y=1)
Alpha
from tqdm import tqdm
result=[]
for alpha in tqdm([1, 3, 7]+[i for i in np.arange(10,200,20)]):
train_read=pd.read_csv('./Datasets/ml-100k/train.csv', sep='\t', header=None)
test_read=pd.read_csv('./Datasets/ml-100k/test.csv', sep='\t', header=None)
train_ui, test_ui, user_code_id, user_id_code, item_code_id, item_id_code = helpers.data_to_csr(train_read, test_read)
train_ui*=alpha
train_iu=train_ui.transpose().tocsr()
model = implicit.als.AlternatingLeastSquares(factors=factors, regularization=0.1, iterations=10)
model.fit(train_iu, show_progress=False)
reco=top_k_recommendations(model, user_code_id, item_code_id, topK=10)
estimations_df=pd.DataFrame(estimate(model, user_code_id, item_code_id, test_ui))
to_append=ev.evaluate(test=pd.read_csv('./Datasets/ml-100k/test.csv', sep='\t', header=None),
estimations_df=estimations_df,
reco=np.array(reco),
super_reactions=[4,5])
to_append.insert(0, "Alpha", alpha)
result.append(to_append)
result=pd.concat(result)
result
0%| | 0/13 [00:00<?, ?it/s] 0it [00:00, ?it/s][A 943it [00:00, 7477.64it/s][A 8%|▊ | 1/13 [00:02<00:31, 2.61s/it] 0it [00:00, ?it/s][A 943it [00:00, 7366.79it/s][A 15%|█▌ | 2/13 [00:05<00:28, 2.58s/it] 0it [00:00, ?it/s][A 943it [00:00, 7482.23it/s][A 23%|██▎ | 3/13 [00:07<00:25, 2.58s/it] 0it [00:00, ?it/s][A 943it [00:00, 7401.22it/s][A 31%|███ | 4/13 [00:10<00:23, 2.58s/it] 0it [00:00, ?it/s][A 943it [00:00, 6881.15it/s][A 38%|███▊ | 5/13 [00:12<00:20, 2.59s/it] 0it [00:00, ?it/s][A 943it [00:00, 6983.24it/s][A 46%|████▌ | 6/13 [00:15<00:17, 2.56s/it] 0it [00:00, ?it/s][A 943it [00:00, 6942.52it/s][A 54%|█████▍ | 7/13 [00:17<00:15, 2.55s/it] 0it [00:00, ?it/s][A 943it [00:00, 7235.29it/s][A 62%|██████▏ | 8/13 [00:20<00:12, 2.53s/it] 0it [00:00, ?it/s][A 943it [00:00, 7101.42it/s][A 69%|██████▉ | 9/13 [00:22<00:10, 2.53s/it] 0it [00:00, ?it/s][A 943it [00:00, 7151.07it/s][A 77%|███████▋ | 10/13 [00:25<00:07, 2.51s/it] 0it [00:00, ?it/s][A 943it [00:00, 7058.24it/s][A 85%|████████▍ | 11/13 [00:27<00:04, 2.49s/it] 0it [00:00, ?it/s][A 943it [00:00, 7171.05it/s][A 92%|█████████▏| 12/13 [00:30<00:02, 2.48s/it] 0it [00:00, ?it/s][A 943it [00:00, 6831.82it/s][A 100%|██████████| 13/13 [00:32<00:00, 2.52s/it]
Alpha | RMSE | MAE | precision | recall | F_1 | F_05 | precision_super | recall_super | NDCG | mAP | MRR | LAUC | HR | Reco in test | Test coverage | Shannon | Gini | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1 | 3.667401 | 3.489285 | 0.097349 | 0.072948 | 0.068621 | 0.077305 | 0.054614 | 0.071923 | 0.115946 | 0.050679 | 0.253435 | 0.533195 | 0.577943 | 0.994804 | 0.730880 | 6.699624 | 0.535431 |
0 | 3 | 3.628888 | 3.449626 | 0.142418 | 0.099087 | 0.095250 | 0.109958 | 0.086159 | 0.105848 | 0.170625 | 0.082786 | 0.347226 | 0.546454 | 0.698834 | 0.994910 | 0.712121 | 6.639279 | 0.568322 |
0 | 7 | 3.557600 | 3.376851 | 0.219618 | 0.145557 | 0.142994 | 0.167132 | 0.142489 | 0.165904 | 0.259872 | 0.143493 | 0.465442 | 0.570010 | 0.820785 | 0.997137 | 0.629870 | 6.325220 | 0.691428 |
0 | 10 | 3.507947 | 3.325719 | 0.255143 | 0.174045 | 0.168845 | 0.196237 | 0.170386 | 0.197982 | 0.307968 | 0.179416 | 0.542711 | 0.584408 | 0.863203 | 0.998197 | 0.579365 | 6.075866 | 0.756129 |
0 | 30 | 3.359788 | 3.168804 | 0.271262 | 0.191732 | 0.184405 | 0.211858 | 0.182833 | 0.228368 | 0.323901 | 0.187954 | 0.549506 | 0.593310 | 0.889714 | 0.999470 | 0.494949 | 5.612128 | 0.840695 |
0 | 50 | 3.273254 | 3.075697 | 0.265217 | 0.188703 | 0.179673 | 0.206738 | 0.174571 | 0.214519 | 0.315700 | 0.182200 | 0.532506 | 0.591772 | 0.872747 | 1.000000 | 0.466089 | 5.493297 | 0.856776 |
0 | 70 | 3.214601 | 3.012592 | 0.249629 | 0.175516 | 0.169005 | 0.194526 | 0.162661 | 0.202930 | 0.292262 | 0.163398 | 0.501654 | 0.585098 | 0.862142 | 0.999788 | 0.450938 | 5.482008 | 0.860232 |
0 | 90 | 3.174570 | 2.970676 | 0.244751 | 0.171145 | 0.164571 | 0.190198 | 0.162876 | 0.199225 | 0.283543 | 0.156381 | 0.479877 | 0.582885 | 0.849417 | 0.999894 | 0.450216 | 5.491347 | 0.861497 |
0 | 110 | 3.138148 | 2.931536 | 0.229692 | 0.164445 | 0.156457 | 0.179431 | 0.151931 | 0.191769 | 0.267423 | 0.144461 | 0.460625 | 0.579477 | 0.845175 | 1.000000 | 0.458153 | 5.474450 | 0.861871 |
0 | 130 | 3.114034 | 2.904438 | 0.225133 | 0.158999 | 0.152533 | 0.175715 | 0.149571 | 0.183970 | 0.258418 | 0.138422 | 0.438959 | 0.576722 | 0.826087 | 1.000000 | 0.449495 | 5.474419 | 0.862658 |
0 | 150 | 3.089790 | 2.880051 | 0.219300 | 0.155447 | 0.149104 | 0.171284 | 0.144635 | 0.176639 | 0.250976 | 0.130455 | 0.445470 | 0.574904 | 0.851538 | 1.000000 | 0.461760 | 5.473903 | 0.864300 |
0 | 170 | 3.070975 | 2.858780 | 0.216331 | 0.158103 | 0.149290 | 0.170456 | 0.141416 | 0.181804 | 0.247910 | 0.129594 | 0.427431 | 0.576235 | 0.831389 | 1.000000 | 0.463925 | 5.500412 | 0.857992 |
0 | 190 | 3.055852 | 2.843086 | 0.207317 | 0.149367 | 0.142287 | 0.162816 | 0.132833 | 0.163011 | 0.234566 | 0.119859 | 0.405354 | 0.571824 | 0.831389 | 1.000000 | 0.467532 | 5.523357 | 0.857033 |
### import matplotlib.pyplot as plt
metrics=list(result.columns[[i not in ['Alpha'] for i in result.columns]])
charts_per_row=6
charts_per_column=3
fig, axes = plt.subplots(nrows=charts_per_row, ncols=charts_per_column,figsize=(18, 7*charts_per_row ))
import itertools
to_iter=[i for i in itertools.product(range(charts_per_row), range(charts_per_column))]
for i in range(len(metrics)):
df=result[['Alpha', metrics[i]]]
df.plot(ax=axes[to_iter[i]], title=metrics[i], x=0, y=1)
import evaluation_measures as ev
dir_path="Recommendations generated/ml-100k/"
super_reactions=[4,5]
test=pd.read_csv('./Datasets/ml-100k/test.csv', sep='\t', header=None)
ev.evaluate_all(test, dir_path, super_reactions)
943it [00:00, 7384.16it/s] 943it [00:00, 7881.42it/s] 943it [00:00, 8618.41it/s] 943it [00:00, 7851.41it/s] 943it [00:00, 8173.68it/s] 943it [00:00, 7964.65it/s] 943it [00:00, 6842.04it/s] 943it [00:00, 6556.41it/s] 943it [00:00, 8900.33it/s]
Model | RMSE | MAE | precision | recall | F_1 | F_05 | precision_super | recall_super | NDCG | mAP | MRR | LAUC | HR | Reco in test | Test coverage | Shannon | Gini | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | Ready_LightFMpureMF | 7.953192 | 7.462008 | 0.334464 | 0.219997 | 0.217225 | 0.254981 | 0.233798 | 0.266952 | 0.398778 | 0.263058 | 0.629129 | 0.607709 | 0.913043 | 1.000000 | 0.275613 | 5.085818 | 0.913665 |
0 | Ready_LightFM | 162.707436 | 160.855483 | 0.340827 | 0.217682 | 0.217990 | 0.258010 | 0.243884 | 0.260663 | 0.403850 | 0.268266 | 0.637590 | 0.606568 | 0.898197 | 1.000000 | 0.351371 | 5.366291 | 0.885046 |
0 | Ready_ImplicitALS | 3.266101 | 3.065824 | 0.255037 | 0.188653 | 0.176852 | 0.201189 | 0.166631 | 0.214925 | 0.305908 | 0.172546 | 0.523871 | 0.591709 | 0.889714 | 1.000000 | 0.502886 | 5.722957 | 0.827507 |
0 | Self_TopPop | 2.508258 | 2.217909 | 0.188865 | 0.116919 | 0.118732 | 0.141584 | 0.130472 | 0.137473 | 0.214651 | 0.111707 | 0.400939 | 0.555546 | 0.765642 | 1.000000 | 0.038961 | 3.159079 | 0.987317 |
0 | Ready_LightFMcontent | 182.471340 | 180.405210 | 0.160339 | 0.101224 | 0.102198 | 0.121074 | 0.102682 | 0.112455 | 0.180079 | 0.087429 | 0.337825 | 0.547572 | 0.704136 | 0.974973 | 0.264791 | 4.909893 | 0.926201 |
0 | Ready_Baseline | 0.949459 | 0.752487 | 0.091410 | 0.037652 | 0.046030 | 0.061286 | 0.079614 | 0.056463 | 0.095957 | 0.043178 | 0.198193 | 0.515501 | 0.437964 | 1.000000 | 0.033911 | 2.836513 | 0.991139 |
0 | Self_GlobalAvg | 1.125760 | 0.943534 | 0.061188 | 0.025968 | 0.031383 | 0.041343 | 0.040558 | 0.032107 | 0.067695 | 0.027470 | 0.171187 | 0.509546 | 0.384942 | 1.000000 | 0.025974 | 2.711772 | 0.992003 |
0 | Ready_Random | 1.514355 | 1.216383 | 0.049735 | 0.022300 | 0.025782 | 0.033598 | 0.028219 | 0.021751 | 0.054383 | 0.021119 | 0.133978 | 0.507680 | 0.339343 | 0.986957 | 0.177489 | 5.088670 | 0.907676 |
0 | Self_BaselineUI | 0.967585 | 0.762740 | 0.000954 | 0.000170 | 0.000278 | 0.000463 | 0.000644 | 0.000189 | 0.000752 | 0.000168 | 0.001677 | 0.496424 | 0.009544 | 0.600530 | 0.005051 | 1.803126 | 0.996380 |