{ "cells": [ { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "import helpers\n", "import pandas as pd\n", "import numpy as np\n", "import scipy.sparse as sparse\n", "from collections import defaultdict\n", "from itertools import chain\n", "import random\n", "import time\n", "import matplotlib.pyplot as plt\n", "\n", "train_read=pd.read_csv('./Datasets/ml-100k/train.csv', sep='\\t', header=None)\n", "test_read=pd.read_csv('./Datasets/ml-100k/test.csv', sep='\\t', header=None)\n", "train_ui, test_ui, user_code_id, user_id_code, item_code_id, item_id_code = helpers.data_to_csr(train_read, test_read)" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "<943x1682 sparse matrix of type ''\n", "\twith 80000 stored elements in Compressed Sparse Row format>" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train_ui" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Let's prepare user and item features" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Item features" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
0123456789...14151617181920212223
01Toy Story (1995)01-Jan-1995NaNhttp://us.imdb.com/M/title-exact?Toy%20Story%2...00011...0000000000
12GoldenEye (1995)01-Jan-1995NaNhttp://us.imdb.com/M/title-exact?GoldenEye%20(...01100...0000000100
23Four Rooms (1995)01-Jan-1995NaNhttp://us.imdb.com/M/title-exact?Four%20Rooms%...00000...0000000100
\n", "

3 rows × 24 columns

\n", "
" ], "text/plain": [ " 0 1 2 3 \\\n", "0 1 Toy Story (1995) 01-Jan-1995 NaN \n", "1 2 GoldenEye (1995) 01-Jan-1995 NaN \n", "2 3 Four Rooms (1995) 01-Jan-1995 NaN \n", "\n", " 4 5 6 7 8 9 ... \\\n", "0 http://us.imdb.com/M/title-exact?Toy%20Story%2... 0 0 0 1 1 ... \n", "1 http://us.imdb.com/M/title-exact?GoldenEye%20(... 0 1 1 0 0 ... \n", "2 http://us.imdb.com/M/title-exact?Four%20Rooms%... 0 0 0 0 0 ... \n", "\n", " 14 15 16 17 18 19 20 21 22 23 \n", "0 0 0 0 0 0 0 0 0 0 0 \n", "1 0 0 0 0 0 0 0 1 0 0 \n", "2 0 0 0 0 0 0 0 1 0 0 \n", "\n", "[3 rows x 24 columns]" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "movies = pd.read_csv('./Datasets/ml-100k/u.item', sep='|', encoding='latin-1', header=None)\n", "movies[:3]" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
id_1id_2id_3id_4id_5id_6id_7id_8id_9id_10...date_30-Mar-1996date_30-May-1997date_30-Nov-1996date_30-Oct-1995date_30-Oct-1996date_31-Dec-1997date_31-Jan-1997date_31-Jul-1996date_31-May-1996date_4-Feb-1971
01000000000...0000000000
10100000000...0000000000
20010000000...0000000000
\n", "

3 rows × 1922 columns

\n", "
" ], "text/plain": [ " id_1 id_2 id_3 id_4 id_5 id_6 id_7 id_8 id_9 id_10 ... \\\n", "0 1 0 0 0 0 0 0 0 0 0 ... \n", "1 0 1 0 0 0 0 0 0 0 0 ... \n", "2 0 0 1 0 0 0 0 0 0 0 ... \n", "\n", " date_30-Mar-1996 date_30-May-1997 date_30-Nov-1996 date_30-Oct-1995 \\\n", "0 0 0 0 0 \n", "1 0 0 0 0 \n", "2 0 0 0 0 \n", "\n", " date_30-Oct-1996 date_31-Dec-1997 date_31-Jan-1997 date_31-Jul-1996 \\\n", "0 0 0 0 0 \n", "1 0 0 0 0 \n", "2 0 0 0 0 \n", "\n", " date_31-May-1996 date_4-Feb-1971 \n", "0 0 0 \n", "1 0 0 \n", "2 0 0 \n", "\n", "[3 rows x 1922 columns]" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "movies=movies.astype(object)\n", "id_date=pd.get_dummies(movies[[0,2]], ['id', 'date'])\n", "id_date[:3]" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
01
0unknown0
1Action1
2Adventure2
\n", "
" ], "text/plain": [ " 0 1\n", "0 unknown 0\n", "1 Action 1\n", "2 Adventure 2" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "genres = pd.read_csv('./Datasets/ml-100k/u.genre', sep='|', header=None,\n", " encoding='latin-1')\n", "genres[:3]" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "item_genres=movies[np.arange(5,24)]\n", "item_genres.columns=list(genres[0])" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
id_1id_2id_3id_4id_5id_6id_7id_8id_9id_10...FantasyFilm-NoirHorrorMusicalMysteryRomanceSci-FiThrillerWarWestern
01000000000...0000000000
10100000000...0000000100
20010000000...0000000100
30001000000...0000000000
40000100000...0000000100
..................................................................
16770000000000...0000000000
16780000000000...0000010100
16790000000000...0000010000
16800000000000...0000000000
16810000000000...0000000000
\n", "

1682 rows × 1941 columns

\n", "
" ], "text/plain": [ " id_1 id_2 id_3 id_4 id_5 id_6 id_7 id_8 id_9 id_10 ... \\\n", "0 1 0 0 0 0 0 0 0 0 0 ... \n", "1 0 1 0 0 0 0 0 0 0 0 ... \n", "2 0 0 1 0 0 0 0 0 0 0 ... \n", "3 0 0 0 1 0 0 0 0 0 0 ... \n", "4 0 0 0 0 1 0 0 0 0 0 ... \n", "... ... ... ... ... ... ... ... ... ... ... ... \n", "1677 0 0 0 0 0 0 0 0 0 0 ... \n", "1678 0 0 0 0 0 0 0 0 0 0 ... \n", "1679 0 0 0 0 0 0 0 0 0 0 ... \n", "1680 0 0 0 0 0 0 0 0 0 0 ... \n", "1681 0 0 0 0 0 0 0 0 0 0 ... \n", "\n", " Fantasy Film-Noir Horror Musical Mystery Romance Sci-Fi Thriller \\\n", "0 0 0 0 0 0 0 0 0 \n", "1 0 0 0 0 0 0 0 1 \n", "2 0 0 0 0 0 0 0 1 \n", "3 0 0 0 0 0 0 0 0 \n", "4 0 0 0 0 0 0 0 1 \n", "... ... ... ... ... ... ... ... ... \n", "1677 0 0 0 0 0 0 0 0 \n", "1678 0 0 0 0 0 1 0 1 \n", "1679 0 0 0 0 0 1 0 0 \n", "1680 0 0 0 0 0 0 0 0 \n", "1681 0 0 0 0 0 0 0 0 \n", "\n", " War Western \n", "0 0 0 \n", "1 0 0 \n", "2 0 0 \n", "3 0 0 \n", "4 0 0 \n", "... ... ... \n", "1677 0 0 \n", "1678 0 0 \n", "1679 0 0 \n", "1680 0 0 \n", "1681 0 0 \n", "\n", "[1682 rows x 1941 columns]" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "item_features_df=pd.concat([id_date, item_genres], axis=1).astype(int)\n", "item_features_df" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "<1682x1941 sparse matrix of type ''\n", "\twith 6256 stored elements in Compressed Sparse Row format>" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "item_features=sparse.csr_matrix(item_features_df.values)\n", "item_features" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### User features" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
01234
0124Mtechnician85711
1253Fother94043
2323Mwriter32067
\n", "
" ], "text/plain": [ " 0 1 2 3 4\n", "0 1 24 M technician 85711\n", "1 2 53 F other 94043\n", "2 3 23 M writer 32067" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "users = pd.read_csv('./Datasets/ml-100k/u.user', sep='|', encoding='latin-1', header=None)\n", "users[:3]" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
id_1id_2id_3id_4id_5id_6id_7id_8id_9id_10...FantasyFilm-NoirHorrorMusicalMysteryRomanceSci-FiThrillerWarWestern
01000000000...0000000000
10100000000...0000000100
20010000000...0000000100
\n", "

3 rows × 1941 columns

\n", "
" ], "text/plain": [ " id_1 id_2 id_3 id_4 id_5 id_6 id_7 id_8 id_9 id_10 ... Fantasy \\\n", "0 1 0 0 0 0 0 0 0 0 0 ... 0 \n", "1 0 1 0 0 0 0 0 0 0 0 ... 0 \n", "2 0 0 1 0 0 0 0 0 0 0 ... 0 \n", "\n", " Film-Noir Horror Musical Mystery Romance Sci-Fi Thriller War \\\n", "0 0 0 0 0 0 0 0 0 \n", "1 0 0 0 0 0 0 1 0 \n", "2 0 0 0 0 0 0 1 0 \n", "\n", " Western \n", "0 0 \n", "1 0 \n", "2 0 \n", "\n", "[3 rows x 1941 columns]" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "users=users.astype(object)\n", "user_features_df=pd.get_dummies(users, ['id', 'age', 'sex','profesion','zip_code'])\n", "item_features_df[:3]" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "<943x1682 sparse matrix of type ''\n", "\twith 80000 stored elements in Compressed Sparse Row format>" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train_ui" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "<943x1822 sparse matrix of type ''\n", "\twith 4715 stored elements in Compressed Sparse Row format>" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "user_features=sparse.csr_matrix(user_features_df.values)\n", "user_features" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Model" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### LightFM with user and item features" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "logistic\n", "Train precision: 0.09\n", "Test precision: 0.03\n", "bpr\n", "Train precision: 0.57\n", "Test precision: 0.24\n", "warp\n", "Train precision: 0.63\n", "Test precision: 0.34\n" ] } ], "source": [ "from lightfm import LightFM\n", "from lightfm.evaluation import precision_at_k\n", "\n", "for loss in ['logistic', 'bpr', 'warp']:\n", "\n", " model = LightFM(no_components=10, loss=loss)\n", " model.fit(train_ui, user_features=user_features, item_features=item_features, epochs=30, num_threads=4)\n", "\n", " print(loss)\n", " print(\"Train precision: %.2f\" % precision_at_k(model, test_interactions=train_ui, \n", " user_features=user_features, item_features=item_features, k=10).mean())\n", " print(\"Test precision: %.2f\" % precision_at_k(model, test_interactions=test_ui, train_interactions=train_ui,\n", " user_features=user_features, item_features=item_features, k=10).mean())" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [], "source": [ "def top_k_recommendations(model, user_features, item_features, user_code_id, item_code_id, topK=10):\n", " result=[]\n", " for user_code in range(test_ui.shape[0]):\n", " user_rated=train_ui.indices[train_ui.indptr[user_code]:train_ui.indptr[user_code+1]]\n", " scores = model.predict(user_code, np.arange(train_ui.shape[1]), user_features=user_features, item_features=item_features)\n", " \n", " scores[user_rated]=-np.inf # to put rated items at the end of the list\n", " \n", " top_items=[item_code_id[item] for item in np.argsort(-scores)[:topK]]\n", " result.append([user_code_id[user_code]]+list(chain(*zip(top_items,-np.sort(-scores)[:topK]))))\n", " return result\n", "\n", "def estimate(model, user_features, item_features, user_code_id, item_code_id, test_ui):\n", " result=[]\n", " for user, item in zip(*test_ui.nonzero()):\n", " result.append([user_code_id[user], item_code_id[item], \n", " model.predict(user, np.array([item]), user_features=user_features, item_features=item_features)[0]])\n", " return result" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [], "source": [ "top_n=pd.DataFrame(top_k_recommendations(model=model, user_features=user_features, item_features=item_features, user_code_id=user_code_id, item_code_id=item_code_id, topK=10))\n", "top_n.to_csv('Recommendations generated/ml-100k/Ready_LightFM_reco.csv', index=False, header=False)\n", "\n", "estimations=pd.DataFrame(estimate(model=model, user_features=user_features, item_features=item_features, user_code_id=user_code_id, item_code_id=item_code_id, test_ui=test_ui))\n", "estimations.to_csv('Recommendations generated/ml-100k/Ready_LightFM_estimations.csv', index=False, header=False)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Pure MF with LightFM" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [], "source": [ "item_features_interactions=sparse.csr_matrix(item_features_df[[item_feature for item_feature in item_features_df.columns \n", " if 'id_' in item_feature]].values)\n", "user_features_interactions=sparse.csr_matrix(user_features_df[[user_feature for user_feature in user_features_df.columns \n", " if 'id_' in user_feature]].values)" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Train precision: 0.63\n", "Test precision: 0.33\n" ] } ], "source": [ "from lightfm import LightFM\n", "\n", "model = LightFM(loss='warp')\n", "model.fit(train_ui, user_features=user_features_interactions, item_features=item_features_interactions, epochs=30, num_threads=4)\n", "\n", "from lightfm.evaluation import precision_at_k\n", "\n", "print(\"Train precision: %.2f\" % precision_at_k(model, test_interactions=train_ui, k=10).mean())\n", "print(\"Test precision: %.2f\" % precision_at_k(model, test_interactions=test_ui, train_interactions=train_ui, k=10).mean())" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [], "source": [ "top_n=pd.DataFrame(top_k_recommendations(model=model, user_features=user_features_interactions, item_features=item_features_interactions, user_code_id=user_code_id, item_code_id=item_code_id, topK=10))\n", "top_n.to_csv('Recommendations generated/ml-100k/Ready_LightFMpureMF_reco.csv', index=False, header=False)\n", "\n", "estimations=pd.DataFrame(estimate(model=model, user_features=user_features_interactions, item_features=item_features_interactions, user_code_id=user_code_id, item_code_id=item_code_id, test_ui=test_ui))\n", "estimations.to_csv('Recommendations generated/ml-100k/Ready_LightFMpureMF_estimations.csv', index=False, header=False)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### LightFM with user/item attributes only (without treating id as a feature)" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [], "source": [ "item_features_only=sparse.csr_matrix(item_features_df[[item_feature for item_feature in item_features_df.columns \n", " if 'id_' not in item_feature]].values)\n", "user_features_only=sparse.csr_matrix(user_features_df[[user_feature for user_feature in user_features_df.columns \n", " if 'id_' not in user_feature]].values)" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Train precision: 0.40\n", "Test precision: 0.16\n" ] } ], "source": [ "from lightfm import LightFM\n", "\n", "model = LightFM(loss='warp')\n", "model.fit(train_ui, user_features=user_features_only, item_features=item_features_only, epochs=30, num_threads=4)\n", "\n", "from lightfm.evaluation import precision_at_k\n", "\n", "print(\"Train precision: %.2f\" % precision_at_k(model, test_interactions=train_ui, \n", " user_features=user_features_only, item_features=item_features_only, k=10).mean())\n", "print(\"Test precision: %.2f\" % precision_at_k(model, test_interactions=test_ui, train_interactions=train_ui,\n", " user_features=user_features_only, item_features=item_features_only, k=10).mean())" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [], "source": [ "top_n=pd.DataFrame(top_k_recommendations(model=model, user_features=user_features_only, item_features=item_features_only, user_code_id=user_code_id, item_code_id=item_code_id, topK=10))\n", "top_n.to_csv('Recommendations generated/ml-100k/Ready_LightFMcontent_reco.csv', index=False, header=False)\n", "\n", "estimations=pd.DataFrame(estimate(model=model, user_features=user_features_only, item_features=item_features_only, user_code_id=user_code_id, item_code_id=item_code_id, test_ui=test_ui))\n", "estimations.to_csv('Recommendations generated/ml-100k/Ready_LightFMcontent_estimations.csv', index=False, header=False)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "943it [00:00, 8294.93it/s]\n", "943it [00:00, 7480.38it/s]\n", "943it [00:00, 8182.78it/s]\n", "943it [00:00, 7942.50it/s]\n", "943it [00:00, 7571.16it/s]\n", "943it [00:00, 7715.40it/s]\n", "943it [00:00, 8094.16it/s]\n", "943it [00:00, 9015.90it/s]\n", "943it [00:00, 7848.42it/s]\n", "943it [00:00, 7401.02it/s]\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ModelRMSEMAEprecisionrecallF_1F_05precision_superrecall_super
0Ready_LightFMpureMF7.9531927.4620080.3344640.2199970.2172250.2549810.2337980.266952
0Ready_LightFM162.707436160.8554830.3408270.2176820.2179900.2580100.2438840.260663
0Self_P33.7024463.5272730.2821850.1920920.1867490.2169800.2041850.240096
0Ready_ImplicitALS3.2661013.0658240.2550370.1886530.1768520.2011890.1666310.214925
0Self_TopPop2.5082582.2179090.1888650.1169190.1187320.1415840.1304720.137473
0Ready_LightFMcontent182.471340180.4052100.1603390.1012240.1021980.1210740.1026820.112455
0Ready_Baseline0.9494590.7524870.0914100.0376520.0460300.0612860.0796140.056463
0Self_GlobalAvg1.1257600.9435340.0611880.0259680.0313830.0413430.0405580.032107
0Ready_Random1.5143551.2163830.0497350.0223000.0257820.0335980.0282190.021751
0Self_BaselineUI0.9675850.7627400.0009540.0001700.0002780.0004630.0006440.000189
\n", "
" ], "text/plain": [ " Model RMSE MAE precision recall \\\n", "0 Ready_LightFMpureMF 7.953192 7.462008 0.334464 0.219997 \n", "0 Ready_LightFM 162.707436 160.855483 0.340827 0.217682 \n", "0 Self_P3 3.702446 3.527273 0.282185 0.192092 \n", "0 Ready_ImplicitALS 3.266101 3.065824 0.255037 0.188653 \n", "0 Self_TopPop 2.508258 2.217909 0.188865 0.116919 \n", "0 Ready_LightFMcontent 182.471340 180.405210 0.160339 0.101224 \n", "0 Ready_Baseline 0.949459 0.752487 0.091410 0.037652 \n", "0 Self_GlobalAvg 1.125760 0.943534 0.061188 0.025968 \n", "0 Ready_Random 1.514355 1.216383 0.049735 0.022300 \n", "0 Self_BaselineUI 0.967585 0.762740 0.000954 0.000170 \n", "\n", " F_1 F_05 precision_super recall_super \n", "0 0.217225 0.254981 0.233798 0.266952 \n", "0 0.217990 0.258010 0.243884 0.260663 \n", "0 0.186749 0.216980 0.204185 0.240096 \n", "0 0.176852 0.201189 0.166631 0.214925 \n", "0 0.118732 0.141584 0.130472 0.137473 \n", "0 0.102198 0.121074 0.102682 0.112455 \n", "0 0.046030 0.061286 0.079614 0.056463 \n", "0 0.031383 0.041343 0.040558 0.032107 \n", "0 0.025782 0.033598 0.028219 0.021751 \n", "0 0.000278 0.000463 0.000644 0.000189 " ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ModelNDCGmAPMRRLAUCHRReco in testTest coverageShannonGini
0Ready_LightFMpureMF0.3987780.2630580.6291290.6077090.9130431.0000000.2756135.0858180.913665
0Ready_LightFM0.4038500.2682660.6375900.6065680.8981971.0000000.3513715.3662910.885046
0Self_P30.3391140.2049050.5721570.5935440.8759281.0000000.0772013.8758920.974947
0Ready_ImplicitALS0.3059080.1725460.5238710.5917090.8897141.0000000.5028865.7229570.827507
0Self_TopPop0.2146510.1117070.4009390.5555460.7656421.0000000.0389613.1590790.987317
0Ready_LightFMcontent0.1800790.0874290.3378250.5475720.7041360.9749730.2647914.9098930.926201
0Ready_Baseline0.0959570.0431780.1981930.5155010.4379641.0000000.0339112.8365130.991139
0Self_GlobalAvg0.0676950.0274700.1711870.5095460.3849421.0000000.0259742.7117720.992003
0Ready_Random0.0543830.0211190.1339780.5076800.3393430.9869570.1774895.0886700.907676
0Self_BaselineUI0.0007520.0001680.0016770.4964240.0095440.6005300.0050511.8031260.996380
\n", "
" ], "text/plain": [ " Model NDCG mAP MRR LAUC HR \\\n", "0 Ready_LightFMpureMF 0.398778 0.263058 0.629129 0.607709 0.913043 \n", "0 Ready_LightFM 0.403850 0.268266 0.637590 0.606568 0.898197 \n", "0 Self_P3 0.339114 0.204905 0.572157 0.593544 0.875928 \n", "0 Ready_ImplicitALS 0.305908 0.172546 0.523871 0.591709 0.889714 \n", "0 Self_TopPop 0.214651 0.111707 0.400939 0.555546 0.765642 \n", "0 Ready_LightFMcontent 0.180079 0.087429 0.337825 0.547572 0.704136 \n", "0 Ready_Baseline 0.095957 0.043178 0.198193 0.515501 0.437964 \n", "0 Self_GlobalAvg 0.067695 0.027470 0.171187 0.509546 0.384942 \n", "0 Ready_Random 0.054383 0.021119 0.133978 0.507680 0.339343 \n", "0 Self_BaselineUI 0.000752 0.000168 0.001677 0.496424 0.009544 \n", "\n", " Reco in test Test coverage Shannon Gini \n", "0 1.000000 0.275613 5.085818 0.913665 \n", "0 1.000000 0.351371 5.366291 0.885046 \n", "0 1.000000 0.077201 3.875892 0.974947 \n", "0 1.000000 0.502886 5.722957 0.827507 \n", "0 1.000000 0.038961 3.159079 0.987317 \n", "0 0.974973 0.264791 4.909893 0.926201 \n", "0 1.000000 0.033911 2.836513 0.991139 \n", "0 1.000000 0.025974 2.711772 0.992003 \n", "0 0.986957 0.177489 5.088670 0.907676 \n", "0 0.600530 0.005051 1.803126 0.996380 " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "import evaluation_measures as ev\n", "\n", "dir_path=\"Recommendations generated/ml-100k/\"\n", "super_reactions=[4,5]\n", "test=pd.read_csv('./Datasets/ml-100k/test.csv', sep='\\t', header=None)\n", "\n", "df=ev.evaluate_all(test, dir_path, super_reactions)\n", "display(df.iloc[:,:9])\n", "display(df.iloc[:,np.append(0,np.arange(9, df.shape[1]))])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.9" } }, "nbformat": 4, "nbformat_minor": 4 }