{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Self made SVD" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "import helpers\n", "import pandas as pd\n", "import numpy as np\n", "import scipy.sparse as sparse\n", "from collections import defaultdict\n", "from itertools import chain\n", "import random\n", "\n", "train_read=pd.read_csv('./Datasets/ml-100k/train.csv', sep='\\t', header=None)\n", "test_read=pd.read_csv('./Datasets/ml-100k/test.csv', sep='\\t', header=None)\n", "train_ui, test_ui, user_code_id, user_id_code, item_code_id, item_id_code = helpers.data_to_csr(train_read, test_read)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "# Done similarly to https://github.com/albertauyeung/matrix-factorization-in-python\n", "from tqdm import tqdm\n", "\n", "class SVD():\n", " \n", " def __init__(self, train_ui, learning_rate, regularization, nb_factors, iterations):\n", " self.train_ui=train_ui\n", " self.uir=list(zip(*[train_ui.nonzero()[0],train_ui.nonzero()[1], train_ui.data]))\n", " \n", " self.learning_rate=learning_rate\n", " self.regularization=regularization\n", " self.iterations=iterations\n", " self.nb_users, self.nb_items=train_ui.shape\n", " self.nb_ratings=train_ui.nnz\n", " self.nb_factors=nb_factors\n", " \n", " self.Pu=np.random.normal(loc=0, scale=1./self.nb_factors, size=(self.nb_users, self.nb_factors))\n", " self.Qi=np.random.normal(loc=0, scale=1./self.nb_factors, size=(self.nb_items, self.nb_factors))\n", "\n", " def train(self, test_ui=None):\n", " if test_ui!=None:\n", " self.test_uir=list(zip(*[test_ui.nonzero()[0],test_ui.nonzero()[1], test_ui.data]))\n", " \n", " self.learning_process=[]\n", " pbar = tqdm(range(self.iterations))\n", " for i in pbar:\n", " pbar.set_description(f'Epoch {i} RMSE: {self.learning_process[-1][1] if i>0 else 0}. Training epoch {i+1}...')\n", " np.random.shuffle(self.uir)\n", " self.sgd(self.uir)\n", " if test_ui==None:\n", " self.learning_process.append([i+1, self.RMSE_total(self.uir)])\n", " else:\n", " self.learning_process.append([i+1, self.RMSE_total(self.uir), self.RMSE_total(self.test_uir)])\n", " \n", " def sgd(self, uir):\n", " \n", " for u, i, score in uir:\n", " # Computer prediction and error\n", " prediction = self.get_rating(u,i)\n", " e = (score - prediction)\n", " \n", " # Update user and item latent feature matrices\n", " Pu_update=self.learning_rate * (e * self.Qi[i] - self.regularization * self.Pu[u])\n", " Qi_update=self.learning_rate * (e * self.Pu[u] - self.regularization * self.Qi[i])\n", " \n", " self.Pu[u] += Pu_update\n", " self.Qi[i] += Qi_update\n", " \n", " def get_rating(self, u, i):\n", " prediction = self.Pu[u].dot(self.Qi[i].T)\n", " return prediction\n", " \n", " def RMSE_total(self, uir):\n", " RMSE=0\n", " for u,i, score in uir:\n", " prediction = self.get_rating(u,i)\n", " RMSE+=(score - prediction)**2\n", " return np.sqrt(RMSE/len(uir))\n", " \n", " def estimations(self):\n", " self.estimations=\\\n", " np.dot(self.Pu,self.Qi.T)\n", "\n", " def recommend(self, user_code_id, item_code_id, topK=10):\n", " \n", " top_k = defaultdict(list)\n", " for nb_user, user in enumerate(self.estimations):\n", " \n", " user_rated=self.train_ui.indices[self.train_ui.indptr[nb_user]:self.train_ui.indptr[nb_user+1]]\n", " for item, score in enumerate(user):\n", " if item not in user_rated and not np.isnan(score):\n", " top_k[user_code_id[nb_user]].append((item_code_id[item], score))\n", " result=[]\n", " # Let's choose k best items in the format: (user, item1, score1, item2, score2, ...)\n", " for uid, item_scores in top_k.items():\n", " item_scores.sort(key=lambda x: x[1], reverse=True)\n", " result.append([uid]+list(chain(*item_scores[:topK])))\n", " return result\n", " \n", " def estimate(self, user_code_id, item_code_id, test_ui):\n", " result=[]\n", " for user, item in zip(*test_ui.nonzero()):\n", " result.append([user_code_id[user], item_code_id[item], \n", " self.estimations[user,item] if not np.isnan(self.estimations[user,item]) else 1])\n", " return result" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Epoch 39 RMSE: 0.7471068280984748. Training epoch 40...: 100%|█████████████████████████| 40/40 [02:29<00:00, 3.74s/it] df=pd.DataFrame(model.learning_process).iloc[:,:2]
df.columns=['epoch', 'train_RMSE']
plt.plot('epoch', 'train_RMSE', data=df, color='blue')
plt.legend() \u001b[0mdata\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mdf\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcolor\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m'blue'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;31mModuleNotFoundError\u001b[0m: No module named 'matplotlib'" ] } ], "source": [ "import matplotlib.pyplot as plt\n", "\n", "df=pd.DataFrame(model.learning_process).iloc[:,:2]\n", "df.columns=['epoch', 'train_RMSE']\n", "plt.plot('epoch', 'train_RMSE', data=df, color='blue')\n", "plt.legend()" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": 
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "import matplotlib.pyplot as plt\n", "\n", "df=pd.DataFrame(model.learning_process[10:], columns=['epoch', 'train_RMSE', 'test_RMSE'])\n", "plt.plot('epoch', 'train_RMSE', data=df, color='blue')\n", "plt.plot('epoch', 'test_RMSE', data=df, color='yellow', linestyle='dashed')\n", "plt.legend()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Saving and evaluating recommendations" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "model.estimations()\n", "\n", "top_n=pd.DataFrame(model.recommend(user_code_id, item_code_id, topK=10))\n", "\n", "top_n.to_csv('Recommendations generated/ml-100k/Self_SVD_reco.csv', index=False, header=False)\n", "\n", "estimations=pd.DataFrame(model.estimate(user_code_id, item_code_id, test_ui))\n", "estimations.to_csv('Recommendations generated/ml-100k/Self_SVD_estimations.csv', index=False, header=False)" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "943it [00:00, 4261.36it/s]\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
RMSEMAEprecisionrecallF_1F_05precision_superrecall_superNDCGmAPMRRLAUCHRHR2Reco in testTest coverageShannonGini
\n", "
" ], "text/plain": [ " RMSE MAE precision recall F_1 F_05 \\\n", "0 0.914521 0.71768 0.102757 0.043043 0.052432 0.069515 \n", "\n", " precision_super recall_super NDCG mAP MRR LAUC \\\n", "0 0.094528 0.075122 0.106751 0.051431 0.198701 0.518248 \n", "\n", " HR HR2 Reco in test Test coverage Shannon Gini \n", "0 0.462354 0.255567 0.854931 0.147186 3.888926 0.972044 " ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import evaluation_measures as ev\n", "\n", "estimations_df=pd.read_csv('Recommendations generated/ml-100k/Self_SVD_estimations.csv', header=None)\n", "reco=np.loadtxt('Recommendations generated/ml-100k/Self_SVD_reco.csv', delimiter=',')\n", "\n", "ev.evaluate(test=pd.read_csv('./Datasets/ml-100k/test.csv', sep='\\t', header=None),\n", " estimations_df=estimations_df, \n", " reco=reco,\n", " super_reactions=[4,5])" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "943it [00:00, 5504.80it/s]\n", "943it [00:00, 4588.89it/s]\n", "943it [00:00, 3546.71it/s]\n", "943it [00:00, 3802.69it/s]\n", "943it [00:00, 3533.79it/s]\n", "943it [00:00, 3587.29it/s]\n", "943it [00:00, 3825.53it/s]\n", "943it [00:00, 3495.58it/s]\n", "943it [00:00, 3725.91it/s]\n", "943it [00:00, 3820.07it/s]\n", "943it [00:00, 3632.69it/s]\n", "943it [00:00, 3564.35it/s]\n", "943it [00:00, 3651.79it/s]\n", "943it [00:00, 3835.91it/s]\n", "943it [00:00, 4391.98it/s]\n", "943it [00:00, 3026.85it/s]\n", "943it [00:00, 2492.44it/s]\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ModelRMSEMAEprecisionrecallF_1F_05precision_superrecall_superNDCGmAPMRRLAUCHRHR2Reco in testTest coverageShannonGini
\n", "
" ], "text/plain": [ " Model RMSE MAE precision recall F_1 \\\n", "0 Self_TopPop 2.508258 2.217909 0.188865 0.116919 0.118732 \n", "0 Self_SVDBaseline 3.642710 3.477031 0.137858 0.083447 0.084155 \n", "0 Self_SVD 0.914521 0.717680 0.102757 0.043043 0.052432 \n", "0 Ready_Baseline 0.949459 0.752487 0.091410 0.037652 0.046030 \n", "0 Self_GlobalAvg 1.125760 0.943534 0.061188 0.025968 0.031383 \n", "0 Ready_Random 1.517593 1.220181 0.046023 0.019038 0.023118 \n", "0 Ready_I-KNN 1.030386 0.813067 0.026087 0.006908 0.010593 \n", "0 Ready_I-KNNWithMeans 0.955921 0.754037 0.004984 0.003225 0.003406 \n", "0 Ready_I-KNNWithZScore 0.957701 0.752387 0.003712 0.001994 0.002380 \n", "0 Self_I-KNNBaseline39 0.935520 0.737631 0.002757 0.000856 0.001230 \n", "0 Self_I-KNNBaseline38 0.935685 0.737828 0.002651 0.000837 0.001197 \n", "0 Ready_I-KNNBaseline 0.935327 0.737424 0.002545 0.000755 0.001105 \n", "0 Ready_U-KNN 1.023495 0.807913 0.000742 0.000205 0.000305 \n", "0 Self_TopRated 2.508258 2.217909 0.000954 0.000188 0.000298 \n", "0 Self_BaselineIU 0.958136 0.754051 0.000954 0.000188 0.000298 \n", "0 Self_BaselineUI 0.967585 0.762740 0.000954 0.000170 0.000278 \n", "0 Self_IKNN 1.018363 0.808793 0.000318 0.000108 0.000140 \n", "\n", " F_05 precision_super recall_super NDCG mAP MRR \\\n", "0 0.141584 0.130472 0.137473 0.214651 0.111707 0.400939 \n", "0 0.101113 0.108476 0.109680 0.164872 0.083459 0.338033 \n", "0 0.069515 0.094528 0.075122 0.106751 0.051431 0.198701 \n", "0 0.061286 0.079614 0.056463 0.095957 0.043178 0.198193 \n", "0 0.041343 0.040558 0.032107 0.067695 0.027470 0.171187 \n", "0 0.030734 0.029292 0.021639 0.050818 0.019958 0.126646 \n", "0 0.016046 0.021137 0.009522 0.024214 0.008958 0.048068 \n", "0 0.003956 0.004506 0.003861 0.006815 0.002906 0.020332 \n", "0 0.002919 0.003433 0.002401 0.005137 0.002158 0.016458 \n", "0 0.001758 0.002468 0.001048 0.003899 0.001620 0.013296 \n", "0 0.001702 0.002361 0.001020 0.003635 0.001443 0.012589 \n", "0 0.001602 0.002253 0.000930 0.003444 0.001362 0.011760 \n", "0 0.000449 0.000536 0.000198 0.000845 0.000274 0.002744 \n", "0 0.000481 0.000644 0.000223 0.001043 0.000335 0.003348 \n", "0 0.000481 0.000644 0.000223 0.001043 0.000335 0.003348 \n", "0 0.000463 0.000644 0.000189 0.000752 0.000168 0.001677 \n", "0 0.000189 0.000000 0.000000 0.000214 0.000037 0.000368 \n", "\n", " LAUC HR HR2 Reco in test Test coverage Shannon \\\n", "0 0.555546 0.765642 0.492047 1.000000 0.038961 3.159079 \n", "0 0.538614 0.634146 0.359491 0.999788 0.275613 5.134751 \n", "0 0.518248 0.462354 0.255567 0.854931 0.147186 3.888926 \n", "0 0.515501 0.437964 0.239661 1.000000 0.033911 2.836513 \n", "0 0.509546 0.384942 0.142100 1.000000 0.025974 2.711772 \n", "0 0.506031 0.305408 0.111347 0.988547 0.174603 5.082383 \n", "0 0.499885 0.154825 0.072110 0.402333 0.434343 5.133650 \n", "0 0.497969 0.039236 0.007423 0.587699 0.071429 2.699278 \n", "0 0.497349 0.027572 0.007423 0.389926 0.067821 2.475747 \n", "0 0.496775 0.022269 0.005302 0.483351 0.059885 2.235102 \n", "0 0.496765 0.022269 0.004242 0.483245 0.059163 2.235851 \n", "0 0.496724 0.021209 0.004242 0.482821 0.059885 2.232578 \n", "0 0.496441 0.007423 0.000000 0.602121 0.010823 2.089186 \n", "0 0.496433 0.009544 0.000000 0.699046 0.005051 1.945910 \n", "0 0.496433 0.009544 0.000000 0.699046 0.005051 1.945910 \n", "0 0.496424 0.009544 0.000000 0.600530 0.005051 1.803126 \n", "0 0.496391 0.003181 0.000000 0.392153 0.115440 4.174741 \n", "\n", " Gini \n", "0 0.987317 \n", "0 0.909655 \n", "0 0.972044 \n", "0 0.991139 \n", "0 0.992003 \n", "0 0.908434 \n", "0 0.877999 \n", "0 0.991353 \n", "0 0.992793 \n", "0 0.994479 \n", "0 0.994507 \n", "0 0.994487 \n", "0 0.995706 \n", "0 0.995669 \n", "0 0.995669 \n", "0 0.996380 \n", "0 0.965327 " ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import imp\n", "imp.reload(ev)\n", "\n", "import evaluation_measures as ev\n", "dir_path=\"Recommendations generated/ml-100k/\"\n", "super_reactions=[4,5]\n", "test=pd.read_csv('./Datasets/ml-100k/test.csv', sep='\\t', header=None)\n", "\n", "ev.evaluate_all(test, dir_path, super_reactions)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Embeddings" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[1, 2],\n", " [3, 4]])" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ "array([[0.4472136 , 0.89442719],\n", " [0.6 , 0.8 ]])" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "x=np.array([[1,2],[3,4]])\n", "display(x)\n", "x/np.linalg.norm(x, axis=1)[:,None]" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
010511.00000010521052Dracula: Dead and Loving It (1995)Comedy, Horror
111770.95130311781178Major Payne (1994)Comedy
212900.95048912911291Celtic Pride (1996)Comedy
313750.94986413761376Meet Wally Sparks (1997)Comedy
414890.94737514901490Fausto (1993)Comedy
514950.94736814961496Carpool (1996)Comedy, Crime
614970.94734714981498Farmer & Chase (1995)Comedy
714900.94682914911491Tough and Deadly (1995)Action, Drama, Thriller
813200.94615213211321Open Season (1996)Comedy
914870.94542514881488Germinal (1993)Drama
\n", "
" ], "text/plain": [ " code score item_id id title \\\n", "0 1051 1.000000 1052 1052 Dracula: Dead and Loving It (1995) \n", "1 1177 0.951303 1178 1178 Major Payne (1994) \n", "2 1290 0.950489 1291 1291 Celtic Pride (1996) \n", "3 1375 0.949864 1376 1376 Meet Wally Sparks (1997) \n", "4 1489 0.947375 1490 1490 Fausto (1993) \n", "5 1495 0.947368 1496 1496 Carpool (1996) \n", "6 1497 0.947347 1498 1498 Farmer & Chase (1995) \n", "7 1490 0.946829 1491 1491 Tough and Deadly (1995) \n", "8 1320 0.946152 1321 1321 Open Season (1996) \n", "9 1487 0.945425 1488 1488 Germinal (1993) \n", "\n", " genres \n", "0 Comedy, Horror \n", "1 Comedy \n", "2 Comedy \n", "3 Comedy \n", "4 Comedy \n", "5 Comedy, Crime \n", "6 Comedy \n", "7 Action, Drama, Thriller \n", "8 Comedy \n", "9 Drama " ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "item=random.choice(list(set(train_ui.indices)))\n", "\n", "embeddings_norm=model.Qi/np.linalg.norm(model.Qi, axis=1)[:,None] # we do not mean-center here\n", "# omitting normalization also makes sense, but items with a greater magnitude will be recommended more often\n", "\n", "similarity_scores=np.dot(embeddings_norm,embeddings_norm[item].T)\n", "top_similar_items=pd.DataFrame(enumerate(similarity_scores), columns=['code', 'score'])\\\n", ".sort_values(by=['score'], ascending=[False])[:10]\n", "\n", "top_similar_items['item_id']=top_similar_items['code'].apply(lambda x: item_code_id[x])\n", "\n", "items=pd.read_csv('./Datasets/ml-100k/movies.csv')\n", "\n", "result=pd.merge(top_similar_items, items, left_on='item_id', right_on='id')\n", "\n", "result" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# project task 5: implement SVD on top baseline (as it is in Surprise library)" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "# making changes to our implementation by considering additional parameters in the gradient descent procedure \n", "# seems to be the fastest option\n", "# please save the output in 'Recommendations generated/ml-100k/Self_SVDBaseline_reco.csv' and\n", "# 'Recommendations generated/ml-100k/Self_SVDBaseline_estimations.csv'" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [], "source": [ "# Done similarly to https://github.com/albertauyeung/matrix-factorization-in-python\n", "from tqdm import tqdm\n", "\n", "class SVDBaseline():\n", " \n", " def __init__(self, train_ui, learning_rate, regularization, nb_factors, iterations):\n", " self.train_ui=train_ui\n", " self.uir=list(zip(*[train_ui.nonzero()[0],train_ui.nonzero()[1], train_ui.data]))\n", " \n", " self.learning_rate=learning_rate\n", " self.regularization=regularization\n", " self.iterations=iterations\n", " self.nb_users, self.nb_items=train_ui.shape\n", " self.nb_ratings=train_ui.nnz\n", " self.nb_factors=nb_factors\n", " \n", " self.Pu=np.random.normal(loc=0, scale=1./self.nb_factors, size=(self.nb_users, self.nb_factors))\n", " self.Qi=np.random.normal(loc=0, scale=1./self.nb_factors, size=(self.nb_items, self.nb_factors))\n", "\n", " self.b_u = np.zeros(self.nb_users)\n", " self.b_i = np.zeros(self.nb_items)\n", " \n", " def train(self, test_ui=None):\n", " if test_ui!=None:\n", " self.test_uir=list(zip(*[test_ui.nonzero()[0],test_ui.nonzero()[1], test_ui.data]))\n", " \n", " self.learning_process=[]\n", " pbar = tqdm(range(self.iterations))\n", " for i in pbar:\n", " pbar.set_description(f'Epoch {i} RMSE: {self.learning_process[-1][1] if i>0 else 0}. Training epoch {i+1}...')\n", " np.random.shuffle(self.uir)\n", " self.sgd(self.uir)\n", " if test_ui==None:\n", " self.learning_process.append([i+1, self.RMSE_total(self.uir)])\n", " else:\n", " self.learning_process.append([i+1, self.RMSE_total(self.uir), self.RMSE_total(self.test_uir)])\n", " \n", " def sgd(self, uir):\n", " \n", " for u, i, score in uir:\n", " # Computer prediction and error\n", " prediction = self.get_rating(u,i)\n", " e = (score - prediction)\n", " \n", " \n", " b_u_update = self.learning_rate * (e - self.regularization * self.b_u[u])\n", " b_i_update = self.learning_rate * (e - self.regularization * self.b_i[i])\n", " \n", " self.b_u[u] += b_u_update\n", " self.b_i[i] += b_i_update\n", " # Update user and item latent feature matrices\n", " Pu_update=self.learning_rate * (e * self.Qi[i] - self.regularization * self.Pu[u])\n", " Qi_update=self.learning_rate * (e * self.Pu[u] - self.regularization * self.Qi[i])\n", " \n", " self.Pu[u] += Pu_update\n", " self.Qi[i] += Qi_update\n", " \n", " def get_rating(self, u, i):\n", " prediction = self.b_u[u] + self.b_i[i] + self.Pu[u].dot(self.Qi[i].T)\n", " return prediction\n", " \n", " def RMSE_total(self, uir):\n", " RMSE=0\n", " for u,i, score in uir:\n", " prediction = self.get_rating(u,i)\n", " RMSE+=(score - prediction)**2\n", " return np.sqrt(RMSE/len(uir))\n", " \n", " def estimations(self):\n", " self.estimations=\\\n", " self.b_u[:,np.newaxis] + self.b_i[np.newaxis:,] + np.dot(self.Pu,self.Qi.T)\n", "\n", " def recommend(self, user_code_id, item_code_id, topK=10):\n", " \n", " top_k = defaultdict(list)\n", " for nb_user, user in enumerate(self.estimations):\n", " \n", " user_rated=self.train_ui.indices[self.train_ui.indptr[nb_user]:self.train_ui.indptr[nb_user+1]]\n", " for item, score in enumerate(user):\n", " if item not in user_rated and not np.isnan(score):\n", " top_k[user_code_id[nb_user]].append((item_code_id[item], score))\n", " result=[]\n", " # Let's choose k best items in the format: (user, item1, score1, item2, score2, ...)\n", " for uid, item_scores in top_k.items():\n", " item_scores.sort(key=lambda x: x[1], reverse=True)\n", " result.append([uid]+list(chain(*item_scores[:topK])))\n", " return result\n", " \n", " def estimate(self, user_code_id, item_code_id, test_ui):\n", " result=[]\n", " for user, item in zip(*test_ui.nonzero()):\n", " result.append([user_code_id[user], item_code_id[item], \n", " self.estimations[user,item] if not np.isnan(self.estimations[user,item]) else 1])\n", " return result" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Epoch 39 RMSE: 0.7820631219900416. Training epoch 40...: 100%|█████████████████████████| 40/40 [03:33<00:00, 5.34s/it]\n" ] } ], "source": [ "model=SVDBaseline(train_ui, learning_rate=0.005, regularization=0.02, nb_factors=100, iterations=40)\n", "model.train(test_ui)" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [], "source": [ "model.estimations()\n", "\n", "top_n=pd.DataFrame(model.recommend(user_code_id, item_code_id, topK=10))\n", "\n", "top_n.to_csv('Recommendations generated/ml-100k/Self_SVDBaseline_reco.csv', index=False, header=False)\n", "\n", "estimations=pd.DataFrame(model.estimate(user_code_id, item_code_id, test_ui))\n", "estimations.to_csv('Recommendations generated/ml-100k/Self_SVDBaseline_estimations.csv', index=False, header=False)" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "943it [00:00, 3891.04it/s]\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
RMSEMAEprecisionrecallF_1F_05precision_superrecall_superNDCGmAPMRRLAUCHRHR2Reco in testTest coverageShannonGini
\n", "
" ], "text/plain": [ " RMSE MAE precision recall F_1 F_05 \\\n", "0 0.913253 0.719475 0.10509 0.043952 0.053454 0.070803 \n", "\n", " precision_super recall_super NDCG mAP MRR LAUC \\\n", "0 0.095279 0.073469 0.118152 0.058739 0.244096 0.518714 \n", "\n", " HR HR2 Reco in test Test coverage Shannon Gini \n", "0 0.471898 0.279958 0.999682 0.111111 3.572421 0.980655 " ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import evaluation_measures as ev\n", "\n", "estimations_df=pd.read_csv('Recommendations generated/ml-100k/Self_SVDBaseline_estimations.csv', header=None)\n", "reco=np.loadtxt('Recommendations generated/ml-100k/Self_SVDBaseline_reco.csv', delimiter=',')\n", "\n", "ev.evaluate(test=pd.read_csv('./Datasets/ml-100k/test.csv', sep='\\t', header=None),\n", " estimations_df=estimations_df, \n", " reco=reco,\n", " super_reactions=[4,5])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Ready-made SVD - Surprise implementation" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### SVD" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Generating predictions...\n", "Generating top N recommendations...\n", "Generating predictions...\n" ] } ], "source": [ "import helpers\n", "import surprise as sp\n", "import imp\n", "imp.reload(helpers)\n", "\n", "algo = sp.SVD(biased=False) # to use unbiased version\n", "\n", "helpers.ready_made(algo, reco_path='Recommendations generated/ml-100k/Ready_SVD_reco.csv',\n", " estimations_path='Recommendations generated/ml-100k/Ready_SVD_estimations.csv')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### SVD biased - on top baseline" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Generating predictions...\n", "Generating top N recommendations...\n", "Generating predictions...\n" ] } ], "source": [ "import helpers\n", "import surprise as sp\n", "import imp\n", "imp.reload(helpers)\n", "\n", "algo = sp.SVD() # default is biased=True\n", "\n", "helpers.ready_made(algo, reco_path='Recommendations generated/ml-100k/Ready_SVDBiased_reco.csv',\n", " estimations_path='Recommendations generated/ml-100k/Ready_SVDBiased_estimations.csv')" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "943it [00:00, 3972.80it/s]\n", "943it [00:00, 3608.86it/s]\n", "943it [00:00, 3514.94it/s]\n", "943it [00:00, 3447.85it/s]\n", "943it [00:00, 3615.55it/s]\n", "943it [00:00, 3364.78it/s]\n", "943it [00:00, 3508.24it/s]\n", "943it [00:00, 3394.08it/s]\n", "943it [00:00, 3294.51it/s]\n", "943it [00:00, 3636.65it/s]\n", "943it [00:00, 3356.18it/s]\n", "943it [00:00, 3364.83it/s]\n", "943it [00:00, 3438.26it/s]\n", "943it [00:00, 3642.63it/s]\n", "943it [00:00, 3294.49it/s]\n", "943it [00:00, 3205.15it/s]\n", "943it [00:00, 3737.24it/s]\n", "943it [00:00, 3456.46it/s]\n", "943it [00:00, 3528.07it/s]\n", "943it [00:00, 3495.27it/s]\n", "943it [00:00, 3321.11it/s]\n", "943it [00:00, 2405.91it/s]\n", "943it [00:00, 2676.16it/s]\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ModelRMSEMAEprecisionrecallF_1F_05precision_superrecall_superNDCGmAPMRRLAUCHRHR2Reco in testTest coverageShannonGini
\n", "
" ], "text/plain": [ " Model RMSE MAE precision recall F_1 \\\n", "0 Self_TopPop 2.508258 2.217909 0.188865 0.116919 0.118732 \n", "0 Self_SVDBaseline 0.913253 0.719475 0.105090 0.043952 0.053454 \n", "0 Self_SVD 0.914521 0.717680 0.102757 0.043043 0.052432 \n", "0 Ready_Baseline 0.949459 0.752487 0.091410 0.037652 0.046030 \n", "0 Self_GlobalAvg 1.125760 0.943534 0.061188 0.025968 0.031383 \n", "0 Ready_Random 1.517593 1.220181 0.046023 0.019038 0.023118 \n", "0 Ready_I-KNN 1.030386 0.813067 0.026087 0.006908 0.010593 \n", "0 Ready_I-KNNWithMeans 0.955921 0.754037 0.004984 0.003225 0.003406 \n", "0 Ready_I-KNNWithZScore 0.957701 0.752387 0.003712 0.001994 0.002380 \n", "0 Self_I-KNNBaseline45 0.935268 0.737543 0.003075 0.001044 0.001450 \n", "0 Self_I-KNNBaseline42 0.935028 0.737210 0.002969 0.000980 0.001374 \n", "0 Self_I-KNNBaseline43 0.935241 0.737463 0.002863 0.000952 0.001331 \n", "0 Self_I-KNNBaseline44 0.935259 0.737530 0.002969 0.000902 0.001305 \n", "0 Self_I-KNNBaseline39 0.935520 0.737631 0.002757 0.000856 0.001230 \n", "0 Self_I-KNNBaseline38 0.935685 0.737828 0.002651 0.000837 0.001197 \n", "0 Self_I-KNNBaseline41 0.935205 0.737439 0.002651 0.000774 0.001138 \n", "0 Ready_I-KNNBaseline 0.935327 0.737424 0.002545 0.000755 0.001105 \n", "0 Self_I-KNNBaseline40 0.935327 0.737424 0.002545 0.000755 0.001105 \n", "0 Ready_U-KNN 1.023495 0.807913 0.000742 0.000205 0.000305 \n", "0 Self_TopRated 2.508258 2.217909 0.000954 0.000188 0.000298 \n", "0 Self_BaselineIU 0.958136 0.754051 0.000954 0.000188 0.000298 \n", "0 Self_BaselineUI 0.967585 0.762740 0.000954 0.000170 0.000278 \n", "0 Self_IKNN 1.018363 0.808793 0.000318 0.000108 0.000140 \n", "\n", " F_05 precision_super recall_super NDCG mAP MRR \\\n", "0 0.141584 0.130472 0.137473 0.214651 0.111707 0.400939 \n", "0 0.070803 0.095279 0.073469 0.118152 0.058739 0.244096 \n", "0 0.069515 0.094528 0.075122 0.106751 0.051431 0.198701 \n", "0 0.061286 0.079614 0.056463 0.095957 0.043178 0.198193 \n", "0 0.041343 0.040558 0.032107 0.067695 0.027470 0.171187 \n", "0 0.030734 0.029292 0.021639 0.050818 0.019958 0.126646 \n", "0 0.016046 0.021137 0.009522 0.024214 0.008958 0.048068 \n", "0 0.003956 0.004506 0.003861 0.006815 0.002906 0.020332 \n", "0 0.002919 0.003433 0.002401 0.005137 0.002158 0.016458 \n", "0 0.002016 0.002790 0.001317 0.004287 0.001812 0.014189 \n", "0 0.001929 0.002682 0.001217 0.004069 0.001677 0.013349 \n", "0 0.001862 0.002575 0.001186 0.004014 0.001663 0.013467 \n", "0 0.001880 0.002682 0.001129 0.004215 0.001823 0.013977 \n", "0 0.001758 0.002468 0.001048 0.003899 0.001620 0.013296 \n", "0 0.001702 0.002361 0.001020 0.003635 0.001443 0.012589 \n", "0 0.001658 0.002361 0.000959 0.003537 0.001435 0.011494 \n", "0 0.001602 0.002253 0.000930 0.003444 0.001362 0.011760 \n", "0 0.001602 0.002253 0.000930 0.003444 0.001362 0.011760 \n", "0 0.000449 0.000536 0.000198 0.000845 0.000274 0.002744 \n", "0 0.000481 0.000644 0.000223 0.001043 0.000335 0.003348 \n", "0 0.000481 0.000644 0.000223 0.001043 0.000335 0.003348 \n", "0 0.000463 0.000644 0.000189 0.000752 0.000168 0.001677 \n", "0 0.000189 0.000000 0.000000 0.000214 0.000037 0.000368 \n", "\n", " LAUC HR HR2 Reco in test Test coverage Shannon \\\n", "0 0.555546 0.765642 0.492047 1.000000 0.038961 3.159079 \n", "0 0.518714 0.471898 0.279958 0.999682 0.111111 3.572421 \n", "0 0.518248 0.462354 0.255567 0.854931 0.147186 3.888926 \n", "0 0.515501 0.437964 0.239661 1.000000 0.033911 2.836513 \n", "0 0.509546 0.384942 0.142100 1.000000 0.025974 2.711772 \n", "0 0.506031 0.305408 0.111347 0.988547 0.174603 5.082383 \n", "0 0.499885 0.154825 0.072110 0.402333 0.434343 5.133650 \n", "0 0.497969 0.039236 0.007423 0.587699 0.071429 2.699278 \n", "0 0.497349 0.027572 0.007423 0.389926 0.067821 2.475747 \n", "0 0.496871 0.024390 0.005302 0.482609 0.058442 2.225340 \n", "0 0.496838 0.023330 0.006363 0.481972 0.059163 2.227849 \n", "0 0.496824 0.023330 0.005302 0.482609 0.055556 2.225996 \n", "0 0.496799 0.023330 0.005302 0.482397 0.057720 2.225495 \n", "0 0.496775 0.022269 0.005302 0.483351 0.059885 2.235102 \n", "0 0.496765 0.022269 0.004242 0.483245 0.059163 2.235851 \n", "0 0.496734 0.021209 0.005302 0.482503 0.057720 2.228123 \n", "0 0.496724 0.021209 0.004242 0.482821 0.059885 2.232578 \n", "0 0.496724 0.021209 0.004242 0.482821 0.059885 2.232578 \n", "0 0.496441 0.007423 0.000000 0.602121 0.010823 2.089186 \n", "0 0.496433 0.009544 0.000000 0.699046 0.005051 1.945910 \n", "0 0.496433 0.009544 0.000000 0.699046 0.005051 1.945910 \n", "0 0.496424 0.009544 0.000000 0.600530 0.005051 1.803126 \n", "0 0.496391 0.003181 0.000000 0.392153 0.115440 4.174741 \n", "\n", " Gini \n", "0 0.987317 \n", "0 0.980655 \n", "0 0.972044 \n", "0 0.991139 \n", "0 0.992003 \n", "0 0.908434 \n", "0 0.877999 \n", "0 0.991353 \n", "0 0.992793 \n", "0 0.994599 \n", "0 0.994531 \n", "0 0.994623 \n", "0 0.994598 \n", "0 0.994479 \n", "0 0.994507 \n", "0 0.994555 \n", "0 0.994487 \n", "0 0.994487 \n", "0 0.995706 \n", "0 0.995669 \n", "0 0.995669 \n", "0 0.996380 \n", "0 0.965327 " ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import imp\n", "imp.reload(ev)\n", "\n", "import evaluation_measures as ev\n", "dir_path=\"Recommendations generated/ml-100k/\"\n", "super_reactions=[4,5]\n", "test=pd.read_csv('./Datasets/ml-100k/test.csv', sep='\\t', header=None)\n", "\n", "ev.evaluate_all(test, dir_path, super_reactions)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.5" } }, "nbformat": 4, "nbformat_minor": 4 }