{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import helpers\n", "import pandas as pd\n", "import numpy as np\n", "import scipy.sparse as sparse\n", "from collections import defaultdict\n", "from itertools import chain\n", "import random\n", "import time\n", "import matplotlib.pyplot as plt\n", "\n", "train_read=pd.read_csv('./Datasets/ml-100k/train.csv', sep='\\t', header=None)\n", "test_read=pd.read_csv('./Datasets/ml-100k/test.csv', sep='\\t', header=None)\n", "train_ui, test_ui, user_code_id, user_id_code, item_code_id, item_id_code = helpers.data_to_csr(train_read, test_read)" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "<943x1682 sparse matrix of type ''\n", "\twith 80000 stored elements in Compressed Sparse Row format>" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train_ui" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Let's prepare user and item features" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Item features" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
0123456789...14151617181920212223
01Toy Story (1995)01-Jan-1995NaNhttp://us.imdb.com/M/title-exact?Toy%20Story%2...00011...0000000000
12GoldenEye (1995)01-Jan-1995NaNhttp://us.imdb.com/M/title-exact?GoldenEye%20(...01100...0000000100
23Four Rooms (1995)01-Jan-1995NaNhttp://us.imdb.com/M/title-exact?Four%20Rooms%...00000...0000000100
\n", "

3 rows × 24 columns

\n", "
" ], "text/plain": [ " 0 1 2 3 \\\n", "0 1 Toy Story (1995) 01-Jan-1995 NaN \n", "1 2 GoldenEye (1995) 01-Jan-1995 NaN \n", "2 3 Four Rooms (1995) 01-Jan-1995 NaN \n", "\n", " 4 5 6 7 8 9 ... \\\n", "0 http://us.imdb.com/M/title-exact?Toy%20Story%2... 0 0 0 1 1 ... \n", "1 http://us.imdb.com/M/title-exact?GoldenEye%20(... 0 1 1 0 0 ... \n", "2 http://us.imdb.com/M/title-exact?Four%20Rooms%... 0 0 0 0 0 ... \n", "\n", " 14 15 16 17 18 19 20 21 22 23 \n", "0 0 0 0 0 0 0 0 0 0 0 \n", "1 0 0 0 0 0 0 0 1 0 0 \n", "2 0 0 0 0 0 0 0 1 0 0 \n", "\n", "[3 rows x 24 columns]" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "movies = pd.read_csv('./Datasets/ml-100k/u.item', sep='|', encoding='latin-1', header=None)\n", "movies[:3]" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
id_1id_2id_3id_4id_5id_6id_7id_8id_9id_10...date_30-Mar-1996date_30-May-1997date_30-Nov-1996date_30-Oct-1995date_30-Oct-1996date_31-Dec-1997date_31-Jan-1997date_31-Jul-1996date_31-May-1996date_4-Feb-1971
01000000000...0000000000
10100000000...0000000000
20010000000...0000000000
\n", "

3 rows × 1922 columns

\n", "
" ], "text/plain": [ " id_1 id_2 id_3 id_4 id_5 id_6 id_7 id_8 id_9 id_10 ... \\\n", "0 1 0 0 0 0 0 0 0 0 0 ... \n", "1 0 1 0 0 0 0 0 0 0 0 ... \n", "2 0 0 1 0 0 0 0 0 0 0 ... \n", "\n", " date_30-Mar-1996 date_30-May-1997 date_30-Nov-1996 date_30-Oct-1995 \\\n", "0 0 0 0 0 \n", "1 0 0 0 0 \n", "2 0 0 0 0 \n", "\n", " date_30-Oct-1996 date_31-Dec-1997 date_31-Jan-1997 date_31-Jul-1996 \\\n", "0 0 0 0 0 \n", "1 0 0 0 0 \n", "2 0 0 0 0 \n", "\n", " date_31-May-1996 date_4-Feb-1971 \n", "0 0 0 \n", "1 0 0 \n", "2 0 0 \n", "\n", "[3 rows x 1922 columns]" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "movies=movies.astype(object)\n", "id_date=pd.get_dummies(movies[[0,2]], ['id', 'date'])\n", "id_date[:3]" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
01
0unknown0
1Action1
2Adventure2
\n", "
" ], "text/plain": [ " 0 1\n", "0 unknown 0\n", "1 Action 1\n", "2 Adventure 2" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "genres = pd.read_csv('./Datasets/ml-100k/u.genre', sep='|', header=None,\n", " encoding='latin-1')\n", "genres[:3]" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "item_genres=movies[np.arange(5,24)]\n", "item_genres.columns=list(genres[0])" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
id_1id_2id_3id_4id_5id_6id_7id_8id_9id_10...FantasyFilm-NoirHorrorMusicalMysteryRomanceSci-FiThrillerWarWestern
01000000000...0000000000
10100000000...0000000100
20010000000...0000000100
30001000000...0000000000
40000100000...0000000100
..................................................................
16770000000000...0000000000
16780000000000...0000010100
16790000000000...0000010000
16800000000000...0000000000
16810000000000...0000000000
\n", "

1682 rows × 1941 columns

\n", "
" ], "text/plain": [ " id_1 id_2 id_3 id_4 id_5 id_6 id_7 id_8 id_9 id_10 ... \\\n", "0 1 0 0 0 0 0 0 0 0 0 ... \n", "1 0 1 0 0 0 0 0 0 0 0 ... \n", "2 0 0 1 0 0 0 0 0 0 0 ... \n", "3 0 0 0 1 0 0 0 0 0 0 ... \n", "4 0 0 0 0 1 0 0 0 0 0 ... \n", "... ... ... ... ... ... ... ... ... ... ... ... \n", "1677 0 0 0 0 0 0 0 0 0 0 ... \n", "1678 0 0 0 0 0 0 0 0 0 0 ... \n", "1679 0 0 0 0 0 0 0 0 0 0 ... \n", "1680 0 0 0 0 0 0 0 0 0 0 ... \n", "1681 0 0 0 0 0 0 0 0 0 0 ... \n", "\n", " Fantasy Film-Noir Horror Musical Mystery Romance Sci-Fi Thriller \\\n", "0 0 0 0 0 0 0 0 0 \n", "1 0 0 0 0 0 0 0 1 \n", "2 0 0 0 0 0 0 0 1 \n", "3 0 0 0 0 0 0 0 0 \n", "4 0 0 0 0 0 0 0 1 \n", "... ... ... ... ... ... ... ... ... \n", "1677 0 0 0 0 0 0 0 0 \n", "1678 0 0 0 0 0 1 0 1 \n", "1679 0 0 0 0 0 1 0 0 \n", "1680 0 0 0 0 0 0 0 0 \n", "1681 0 0 0 0 0 0 0 0 \n", "\n", " War Western \n", "0 0 0 \n", "1 0 0 \n", "2 0 0 \n", "3 0 0 \n", "4 0 0 \n", "... ... ... \n", "1677 0 0 \n", "1678 0 0 \n", "1679 0 0 \n", "1680 0 0 \n", "1681 0 0 \n", "\n", "[1682 rows x 1941 columns]" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "item_features_df=pd.concat([id_date, item_genres], axis=1).astype(int)\n", "item_features_df" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "<1682x1941 sparse matrix of type ''\n", "\twith 6256 stored elements in Compressed Sparse Row format>" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "item_features=sparse.csr_matrix(item_features_df.values)\n", "item_features" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### User features" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
01234
0124Mtechnician85711
1253Fother94043
2323Mwriter32067
\n", "
" ], "text/plain": [ " 0 1 2 3 4\n", "0 1 24 M technician 85711\n", "1 2 53 F other 94043\n", "2 3 23 M writer 32067" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "users = pd.read_csv('./Datasets/ml-100k/u.user', sep='|', encoding='latin-1', header=None)\n", "users[:3]" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
id_1id_2id_3id_4id_5id_6id_7id_8id_9id_10...FantasyFilm-NoirHorrorMusicalMysteryRomanceSci-FiThrillerWarWestern
01000000000...0000000000
10100000000...0000000100
20010000000...0000000100
\n", "

3 rows × 1941 columns

\n", "
" ], "text/plain": [ " id_1 id_2 id_3 id_4 id_5 id_6 id_7 id_8 id_9 id_10 ... Fantasy \\\n", "0 1 0 0 0 0 0 0 0 0 0 ... 0 \n", "1 0 1 0 0 0 0 0 0 0 0 ... 0 \n", "2 0 0 1 0 0 0 0 0 0 0 ... 0 \n", "\n", " Film-Noir Horror Musical Mystery Romance Sci-Fi Thriller War \\\n", "0 0 0 0 0 0 0 0 0 \n", "1 0 0 0 0 0 0 1 0 \n", "2 0 0 0 0 0 0 1 0 \n", "\n", " Western \n", "0 0 \n", "1 0 \n", "2 0 \n", "\n", "[3 rows x 1941 columns]" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "users=users.astype(object)\n", "user_features_df=pd.get_dummies(users, ['id', 'age', 'sex','profesion','zip_code'])\n", "item_features_df[:3]" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "<943x1682 sparse matrix of type ''\n", "\twith 80000 stored elements in Compressed Sparse Row format>" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train_ui" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "<943x1822 sparse matrix of type ''\n", "\twith 4715 stored elements in Compressed Sparse Row format>" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "user_features=sparse.csr_matrix(user_features_df.values)\n", "user_features" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Model" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### LightFM with user and item features" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "ename": "ModuleNotFoundError", "evalue": "No module named 'lightfm'", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)", "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[1;32mfrom\u001b[0m \u001b[0mlightfm\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mLightFM\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 2\u001b[0m \u001b[1;32mfrom\u001b[0m \u001b[0mlightfm\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mevaluation\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mprecision_at_k\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 3\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 4\u001b[0m \u001b[1;32mfor\u001b[0m \u001b[0mloss\u001b[0m \u001b[1;32min\u001b[0m \u001b[1;33m[\u001b[0m\u001b[1;34m'logistic'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m'bpr'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m'warp'\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 5\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;31mModuleNotFoundError\u001b[0m: No module named 'lightfm'" ] } ], "source": [ "from lightfm import LightFM\n", "from lightfm.evaluation import precision_at_k\n", "\n", "for loss in ['logistic', 'bpr', 'warp']:\n", "\n", " model = LightFM(no_components=10, loss=loss)\n", " model.fit(train_ui, user_features=user_features, item_features=item_features, epochs=30, num_threads=4)\n", "\n", " print(loss)\n", " print(\"Train precision: %.2f\" % precision_at_k(model, test_interactions=train_ui, \n", " user_features=user_features, item_features=item_features, k=10).mean())\n", " print(\"Test precision: %.2f\" % precision_at_k(model, test_interactions=test_ui, train_interactions=train_ui,\n", " user_features=user_features, item_features=item_features, k=10).mean())" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def top_k_recommendations(model, user_features, item_features, user_code_id, item_code_id, topK=10):\n", " result=[]\n", " for user_code in range(test_ui.shape[0]):\n", " user_rated=train_ui.indices[train_ui.indptr[user_code]:train_ui.indptr[user_code+1]]\n", " scores = model.predict(user_code, np.arange(train_ui.shape[1]), user_features=user_features, item_features=item_features)\n", " \n", " scores[user_rated]=-np.inf # to put rated items at the end of the list\n", " \n", " top_items=[item_code_id[item] for item in np.argsort(-scores)[:topK]]\n", " result.append([user_code_id[user_code]]+list(chain(*zip(top_items,-np.sort(-scores)[:topK]))))\n", " return result\n", "\n", "def estimate(model, user_features, item_features, user_code_id, item_code_id, test_ui):\n", " result=[]\n", " for user, item in zip(*test_ui.nonzero()):\n", " result.append([user_code_id[user], item_code_id[item], \n", " model.predict(user, np.array([item]), user_features=user_features, item_features=item_features)[0]])\n", " return result" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "ename": "NameError", "evalue": "name 'top_k_recommendations' is not defined", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)", "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mtop_n\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mpd\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mDataFrame\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtop_k_recommendations\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmodel\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mmodel\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0muser_features\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0muser_features\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mitem_features\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mitem_features\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0muser_code_id\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0muser_code_id\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mitem_code_id\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mitem_code_id\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtopK\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m10\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 2\u001b[0m \u001b[0mtop_n\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mto_csv\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'Recommendations generated/ml-100k/Ready_LightFM_reco.csv'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mindex\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mFalse\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mheader\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mFalse\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 3\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 4\u001b[0m \u001b[0mestimations\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mpd\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mDataFrame\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mestimate\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmodel\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mmodel\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0muser_features\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0muser_features\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mitem_features\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mitem_features\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0muser_code_id\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0muser_code_id\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mitem_code_id\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mitem_code_id\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtest_ui\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mtest_ui\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 5\u001b[0m \u001b[0mestimations\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mto_csv\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'Recommendations generated/ml-100k/Ready_LightFM_estimations.csv'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mindex\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mFalse\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mheader\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mFalse\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;31mNameError\u001b[0m: name 'top_k_recommendations' is not defined" ] } ], "source": [ "top_n=pd.DataFrame(top_k_recommendations(model=model, user_features=user_features, item_features=item_features, user_code_id=user_code_id, item_code_id=item_code_id, topK=10))\n", "top_n.to_csv('Recommendations generated/ml-100k/Ready_LightFM_reco.csv', index=False, header=False)\n", "\n", "estimations=pd.DataFrame(estimate(model=model, user_features=user_features, item_features=item_features, user_code_id=user_code_id, item_code_id=item_code_id, test_ui=test_ui))\n", "estimations.to_csv('Recommendations generated/ml-100k/Ready_LightFM_estimations.csv', index=False, header=False)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Pure MF with LightFM" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "item_features_interactions=sparse.csr_matrix(item_features_df[[item_feature for item_feature in item_features_df.columns \n", " if 'id_' in item_feature]].values)\n", "user_features_interactions=sparse.csr_matrix(user_features_df[[user_feature for user_feature in user_features_df.columns \n", " if 'id_' in user_feature]].values)" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "ename": "ModuleNotFoundError", "evalue": "No module named 'lightfm'", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)", "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[1;32mfrom\u001b[0m \u001b[0mlightfm\u001b[0m \u001b[1;32mimport\u001b[0m \u001b[0mLightFM\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 2\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 3\u001b[0m \u001b[0mmodel\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mLightFM\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mloss\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m'warp'\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 4\u001b[0m \u001b[0mmodel\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfit\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtrain_ui\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0muser_features\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0muser_features_interactions\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mitem_features\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mitem_features_interactions\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mepochs\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m30\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mnum_threads\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m4\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 5\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;31mModuleNotFoundError\u001b[0m: No module named 'lightfm'" ] } ], "source": [ "from lightfm import LightFM\n", "\n", "model = LightFM(loss='warp')\n", "model.fit(train_ui, user_features=user_features_interactions, item_features=item_features_interactions, epochs=30, num_threads=4)\n", "\n", "from lightfm.evaluation import precision_at_k\n", "\n", "print(\"Train precision: %.2f\" % precision_at_k(model, test_interactions=train_ui, k=10).mean())\n", "print(\"Test precision: %.2f\" % precision_at_k(model, test_interactions=test_ui, train_interactions=train_ui, k=10).mean())" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "ename": "NameError", "evalue": "name 'top_k_recommendations' is not defined", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)", "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mtop_n\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mpd\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mDataFrame\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtop_k_recommendations\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmodel\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mmodel\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0muser_features\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0muser_features_interactions\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mitem_features\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mitem_features_interactions\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0muser_code_id\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0muser_code_id\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mitem_code_id\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mitem_code_id\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtopK\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m10\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 2\u001b[0m \u001b[0mtop_n\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mto_csv\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'Recommendations generated/ml-100k/Ready_LightFMpureMF_reco.csv'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mindex\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mFalse\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mheader\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mFalse\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 3\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 4\u001b[0m \u001b[0mestimations\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mpd\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mDataFrame\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mestimate\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmodel\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mmodel\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0muser_features\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0muser_features_interactions\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mitem_features\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mitem_features_interactions\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0muser_code_id\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0muser_code_id\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mitem_code_id\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mitem_code_id\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtest_ui\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mtest_ui\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 5\u001b[0m \u001b[0mestimations\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mto_csv\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'Recommendations generated/ml-100k/Ready_LightFMpureMF_estimations.csv'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mindex\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mFalse\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mheader\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mFalse\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;31mNameError\u001b[0m: name 'top_k_recommendations' is not defined" ] } ], "source": [ "top_n=pd.DataFrame(top_k_recommendations(model=model, user_features=user_features_interactions, item_features=item_features_interactions, user_code_id=user_code_id, item_code_id=item_code_id, topK=10))\n", "top_n.to_csv('Recommendations generated/ml-100k/Ready_LightFMpureMF_reco.csv', index=False, header=False)\n", "\n", "estimations=pd.DataFrame(estimate(model=model, user_features=user_features_interactions, item_features=item_features_interactions, user_code_id=user_code_id, item_code_id=item_code_id, test_ui=test_ui))\n", "estimations.to_csv('Recommendations generated/ml-100k/Ready_LightFMpureMF_estimations.csv', index=False, header=False)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### LightFM with user/item attributes only (without treating id as a feature)" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [], "source": [ "item_features_only=sparse.csr_matrix(item_features_df[[item_feature for item_feature in item_features_df.columns \n", " if 'id_' not in item_feature]].values)\n", "user_features_only=sparse.csr_matrix(user_features_df[[user_feature for user_feature in user_features_df.columns \n", " if 'id_' not in user_feature]].values)" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "c:\\users\\adrian\\appdata\\local\\programs\\python\\python38-32\\lib\\site-packages\\lightfm\\_lightfm_fast.py:9: UserWarning: LightFM was compiled without OpenMP support. Only a single thread will be used.\n", " warnings.warn('LightFM was compiled without OpenMP support. '\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Train precision: 0.39\n", "Test precision: 0.16\n" ] } ], "source": [ "from lightfm import LightFM\n", "\n", "model = LightFM(loss='warp')\n", "model.fit(train_ui, user_features=user_features_only, item_features=item_features_only, epochs=30, num_threads=4)\n", "\n", "from lightfm.evaluation import precision_at_k\n", "\n", "print(\"Train precision: %.2f\" % precision_at_k(model, test_interactions=train_ui, \n", " user_features=user_features_only, item_features=item_features_only, k=10).mean())\n", "print(\"Test precision: %.2f\" % precision_at_k(model, test_interactions=test_ui, train_interactions=train_ui,\n", " user_features=user_features_only, item_features=item_features_only, k=10).mean())" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "ename": "NameError", "evalue": "name 'top_k_recommendations' is not defined", "output_type": "error", "traceback": [ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)", "\u001b[1;32m\u001b[0m in \u001b[0;36m\u001b[1;34m\u001b[0m\n\u001b[1;32m----> 1\u001b[1;33m \u001b[0mtop_n\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mpd\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mDataFrame\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mtop_k_recommendations\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmodel\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mmodel\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0muser_features\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0muser_features_only\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mitem_features\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mitem_features_only\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0muser_code_id\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0muser_code_id\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mitem_code_id\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mitem_code_id\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtopK\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;36m10\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m 2\u001b[0m \u001b[0mtop_n\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mto_csv\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'Recommendations generated/ml-100k/Ready_LightFMcontent_reco.csv'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mindex\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mFalse\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mheader\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mFalse\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 3\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 4\u001b[0m \u001b[0mestimations\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mpd\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mDataFrame\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mestimate\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mmodel\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mmodel\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0muser_features\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0muser_features_only\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mitem_features\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mitem_features_only\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0muser_code_id\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0muser_code_id\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mitem_code_id\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mitem_code_id\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mtest_ui\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mtest_ui\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m 5\u001b[0m \u001b[0mestimations\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mto_csv\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m'Recommendations generated/ml-100k/Ready_LightFMcontent_estimations.csv'\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mindex\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mFalse\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mheader\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mFalse\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n", "\u001b[1;31mNameError\u001b[0m: name 'top_k_recommendations' is not defined" ] } ], "source": [ "top_n=pd.DataFrame(top_k_recommendations(model=model, user_features=user_features_only, item_features=item_features_only, user_code_id=user_code_id, item_code_id=item_code_id, topK=10))\n", "top_n.to_csv('Recommendations generated/ml-100k/Ready_LightFMcontent_reco.csv', index=False, header=False)\n", "\n", "estimations=pd.DataFrame(estimate(model=model, user_features=user_features_only, item_features=item_features_only, user_code_id=user_code_id, item_code_id=item_code_id, test_ui=test_ui))\n", "estimations.to_csv('Recommendations generated/ml-100k/Ready_LightFMcontent_estimations.csv', index=False, header=False)" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "943it [00:00, 10622.43it/s]\n", "943it [00:00, 9747.58it/s]\n", "943it [00:00, 10554.65it/s]\n", "943it [00:00, 9450.92it/s]\n", "943it [00:00, 10058.79it/s]\n", "943it [00:00, 10744.58it/s]\n", "943it [00:00, 10390.37it/s]\n", "943it [00:00, 10578.65it/s]\n", "943it [00:00, 11388.05it/s]\n", "943it [00:00, 11256.24it/s]\n", "943it [00:00, 10166.93it/s]\n", "943it [00:00, 10388.40it/s]\n", "943it [00:00, 10058.69it/s]\n", "943it [00:00, 10740.81it/s]\n", "943it [00:00, 9636.75it/s]\n", "943it [00:00, 10511.93it/s]\n", "943it [00:00, 10864.37it/s]\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ModelRMSEMAEprecisionrecallF_1F_05precision_superrecall_super
0Self_RP3Beta3.7045893.5293970.2867440.1965240.1911170.2213750.2139480.251263
0Self_P33.7024463.5272730.2821850.1920920.1867490.2169800.2041850.240096
0Self_TopPop2.5082582.2179090.1888650.1169190.1187320.1415840.1304720.137473
0Ready_SVD0.9491650.7466670.0939550.0449690.0511970.0654740.0839060.073996
0Self_SVD0.9163300.7201530.1033930.0444550.0531770.0700730.0938840.079366
0Ready_Baseline0.9494590.7524870.0914100.0376520.0460300.0612860.0796140.056463
0Ready_SVDBiased0.9381460.7399170.0865320.0370670.0448320.0588770.0780040.057865
0Self_GlobalAvg1.1257600.9435340.0611880.0259680.0313830.0413430.0405580.032107
0Ready_Random1.5100301.2118480.0500530.0223670.0259840.0337270.0306870.023255
0Ready_I-KNN1.0303860.8130670.0260870.0069080.0105930.0160460.0211370.009522
0Ready_I-KNNWithZScore0.9577010.7523870.0037120.0019940.0023800.0029190.0034330.002401
0Ready_I-KNNBaseline0.9353270.7374240.0025450.0007550.0011050.0016020.0022530.000930
0Ready_I-KNNWithMeans0.9353270.7374240.0025450.0007550.0011050.0016020.0022530.000930
0Ready_U-KNN1.0234950.8079130.0007420.0002050.0003050.0004490.0005360.000198
0Self_TopRated2.5082582.2179090.0009540.0001880.0002980.0004810.0006440.000223
0Self_BaselineUI0.9675850.7627400.0009540.0001700.0002780.0004630.0006440.000189
0Self_IKNN1.0183630.8087930.0003180.0001080.0001400.0001890.0000000.000000
\n", "
" ], "text/plain": [ " Model RMSE MAE precision recall F_1 \\\n", "0 Self_RP3Beta 3.704589 3.529397 0.286744 0.196524 0.191117 \n", "0 Self_P3 3.702446 3.527273 0.282185 0.192092 0.186749 \n", "0 Self_TopPop 2.508258 2.217909 0.188865 0.116919 0.118732 \n", "0 Ready_SVD 0.949165 0.746667 0.093955 0.044969 0.051197 \n", "0 Self_SVD 0.916330 0.720153 0.103393 0.044455 0.053177 \n", "0 Ready_Baseline 0.949459 0.752487 0.091410 0.037652 0.046030 \n", "0 Ready_SVDBiased 0.938146 0.739917 0.086532 0.037067 0.044832 \n", "0 Self_GlobalAvg 1.125760 0.943534 0.061188 0.025968 0.031383 \n", "0 Ready_Random 1.510030 1.211848 0.050053 0.022367 0.025984 \n", "0 Ready_I-KNN 1.030386 0.813067 0.026087 0.006908 0.010593 \n", "0 Ready_I-KNNWithZScore 0.957701 0.752387 0.003712 0.001994 0.002380 \n", "0 Ready_I-KNNBaseline 0.935327 0.737424 0.002545 0.000755 0.001105 \n", "0 Ready_I-KNNWithMeans 0.935327 0.737424 0.002545 0.000755 0.001105 \n", "0 Ready_U-KNN 1.023495 0.807913 0.000742 0.000205 0.000305 \n", "0 Self_TopRated 2.508258 2.217909 0.000954 0.000188 0.000298 \n", "0 Self_BaselineUI 0.967585 0.762740 0.000954 0.000170 0.000278 \n", "0 Self_IKNN 1.018363 0.808793 0.000318 0.000108 0.000140 \n", "\n", " F_05 precision_super recall_super \n", "0 0.221375 0.213948 0.251263 \n", "0 0.216980 0.204185 0.240096 \n", "0 0.141584 0.130472 0.137473 \n", "0 0.065474 0.083906 0.073996 \n", "0 0.070073 0.093884 0.079366 \n", "0 0.061286 0.079614 0.056463 \n", "0 0.058877 0.078004 0.057865 \n", "0 0.041343 0.040558 0.032107 \n", "0 0.033727 0.030687 0.023255 \n", "0 0.016046 0.021137 0.009522 \n", "0 0.002919 0.003433 0.002401 \n", "0 0.001602 0.002253 0.000930 \n", "0 0.001602 0.002253 0.000930 \n", "0 0.000449 0.000536 0.000198 \n", "0 0.000481 0.000644 0.000223 \n", "0 0.000463 0.000644 0.000189 \n", "0 0.000189 0.000000 0.000000 " ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ModelNDCGmAPMRRLAUCHRReco in testTest coverageShannonGini
0Self_RP3Beta0.3445980.2078360.5879530.5957700.8854720.9981970.1933624.2918210.960775
0Self_P30.3391140.2049050.5721570.5935440.8759281.0000000.0772013.8758920.974947
0Self_TopPop0.2146510.1117070.4009390.5555460.7656421.0000000.0389613.1590790.987317
0Ready_SVD0.1046720.0482110.2207570.5191870.4835630.9979850.2049064.4089130.954288
0Self_SVD0.1077920.0512810.2002100.5189570.4750800.8530220.1471863.9113560.971196
0Ready_Baseline0.0959570.0431780.1981930.5155010.4379641.0000000.0339112.8365130.991139
0Ready_SVDBiased0.0945830.0430130.2023910.5152020.4337220.9960760.1666674.1683540.964092
0Self_GlobalAvg0.0676950.0274700.1711870.5095460.3849421.0000000.0259742.7117720.992003
0Ready_Random0.0553920.0216020.1376900.5077130.3382820.9879110.1875905.1118780.906685
0Ready_I-KNN0.0242140.0089580.0480680.4998850.1548250.4023330.4343435.1336500.877999
0Ready_I-KNNWithZScore0.0051370.0021580.0164580.4973490.0275720.3899260.0678212.4757470.992793
0Ready_I-KNNBaseline0.0034440.0013620.0117600.4967240.0212090.4828210.0598852.2325780.994487
0Ready_I-KNNWithMeans0.0034440.0013620.0117600.4967240.0212090.4828210.0598852.2325780.994487
0Ready_U-KNN0.0008450.0002740.0027440.4964410.0074230.6021210.0108232.0891860.995706
0Self_TopRated0.0010430.0003350.0033480.4964330.0095440.6990460.0050511.9459100.995669
0Self_BaselineUI0.0007520.0001680.0016770.4964240.0095440.6005300.0050511.8031260.996380
0Self_IKNN0.0002140.0000370.0003680.4963910.0031810.3921530.1154404.1747410.965327
\n", "
" ], "text/plain": [ " Model NDCG mAP MRR LAUC HR \\\n", "0 Self_RP3Beta 0.344598 0.207836 0.587953 0.595770 0.885472 \n", "0 Self_P3 0.339114 0.204905 0.572157 0.593544 0.875928 \n", "0 Self_TopPop 0.214651 0.111707 0.400939 0.555546 0.765642 \n", "0 Ready_SVD 0.104672 0.048211 0.220757 0.519187 0.483563 \n", "0 Self_SVD 0.107792 0.051281 0.200210 0.518957 0.475080 \n", "0 Ready_Baseline 0.095957 0.043178 0.198193 0.515501 0.437964 \n", "0 Ready_SVDBiased 0.094583 0.043013 0.202391 0.515202 0.433722 \n", "0 Self_GlobalAvg 0.067695 0.027470 0.171187 0.509546 0.384942 \n", "0 Ready_Random 0.055392 0.021602 0.137690 0.507713 0.338282 \n", "0 Ready_I-KNN 0.024214 0.008958 0.048068 0.499885 0.154825 \n", "0 Ready_I-KNNWithZScore 0.005137 0.002158 0.016458 0.497349 0.027572 \n", "0 Ready_I-KNNBaseline 0.003444 0.001362 0.011760 0.496724 0.021209 \n", "0 Ready_I-KNNWithMeans 0.003444 0.001362 0.011760 0.496724 0.021209 \n", "0 Ready_U-KNN 0.000845 0.000274 0.002744 0.496441 0.007423 \n", "0 Self_TopRated 0.001043 0.000335 0.003348 0.496433 0.009544 \n", "0 Self_BaselineUI 0.000752 0.000168 0.001677 0.496424 0.009544 \n", "0 Self_IKNN 0.000214 0.000037 0.000368 0.496391 0.003181 \n", "\n", " Reco in test Test coverage Shannon Gini \n", "0 0.998197 0.193362 4.291821 0.960775 \n", "0 1.000000 0.077201 3.875892 0.974947 \n", "0 1.000000 0.038961 3.159079 0.987317 \n", "0 0.997985 0.204906 4.408913 0.954288 \n", "0 0.853022 0.147186 3.911356 0.971196 \n", "0 1.000000 0.033911 2.836513 0.991139 \n", "0 0.996076 0.166667 4.168354 0.964092 \n", "0 1.000000 0.025974 2.711772 0.992003 \n", "0 0.987911 0.187590 5.111878 0.906685 \n", "0 0.402333 0.434343 5.133650 0.877999 \n", "0 0.389926 0.067821 2.475747 0.992793 \n", "0 0.482821 0.059885 2.232578 0.994487 \n", "0 0.482821 0.059885 2.232578 0.994487 \n", "0 0.602121 0.010823 2.089186 0.995706 \n", "0 0.699046 0.005051 1.945910 0.995669 \n", "0 0.600530 0.005051 1.803126 0.996380 \n", "0 0.392153 0.115440 4.174741 0.965327 " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "import evaluation_measures as ev\n", "\n", "dir_path=\"Recommendations generated/ml-100k/\"\n", "super_reactions=[4,5]\n", "test=pd.read_csv('./Datasets/ml-100k/test.csv', sep='\\t', header=None)\n", "\n", "df=ev.evaluate_all(test, dir_path, super_reactions)\n", "display(df.iloc[:,:9])\n", "display(df.iloc[:,np.append(0,np.arange(9, df.shape[1]))])" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.3" } }, "nbformat": 4, "nbformat_minor": 4 }