systemy_rekomendacyjne/P3. k-nearest neighbours.ipynb

{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Self made simplified I-KNN"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import helpers\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "import scipy.sparse as sparse\n",
    "from collections import defaultdict\n",
    "from itertools import chain\n",
    "import random\n",
    "\n",
    "train_read=pd.read_csv('./Datasets/ml-100k/train.csv', sep='\\t', header=None)\n",
    "test_read=pd.read_csv('./Datasets/ml-100k/test.csv', sep='\\t', header=None)\n",
    "train_ui, test_ui, user_code_id, user_id_code, item_code_id, item_id_code = helpers.data_to_csr(train_read, test_read)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "class IKNN():\n",
    "    \n",
    "    def fit(self, train_ui):\n",
    "        self.train_ui=train_ui\n",
    "        \n",
    "        train_iu=train_ui.transpose()\n",
    "        norms=np.linalg.norm(train_iu.A, axis=1) # here we compute lenth of each item ratings vector\n",
    "        norms=np.vectorize(lambda x: max(x,1))(norms[:,None]) # to avoid dividing by zero\n",
    "\n",
    "        normalized_train_iu=sparse.csr_matrix(train_iu/norms)\n",
    "\n",
    "        self.similarity_matrix_ii=normalized_train_iu*normalized_train_iu.transpose()\n",
    "        \n",
    "        self.estimations=np.array(train_ui*self.similarity_matrix_ii/((train_ui>0)*self.similarity_matrix_ii))\n",
    "        \n",
    "    def recommend(self, user_code_id, item_code_id, topK=10):\n",
    "        \n",
    "        top_k = defaultdict(list)\n",
    "        for nb_user, user in enumerate(self.estimations):\n",
    "            \n",
    "            user_rated=self.train_ui.indices[self.train_ui.indptr[nb_user]:self.train_ui.indptr[nb_user+1]]\n",
    "            for item, score in enumerate(user):\n",
    "                if item not in user_rated and not np.isnan(score):\n",
    "                    top_k[user_code_id[nb_user]].append((item_code_id[item], score))\n",
    "        result=[]\n",
    "        # Let's choose k best items in the format: (user, item1, score1, item2, score2, ...)\n",
    "        for uid, item_scores in top_k.items():\n",
    "            item_scores.sort(key=lambda x: x[1], reverse=True)\n",
    "            result.append([uid]+list(chain(*item_scores[:topK])))\n",
    "        return result\n",
    "    \n",
    "    def estimate(self, user_code_id, item_code_id, test_ui):\n",
    "        result=[]\n",
    "        for user, item in zip(*test_ui.nonzero()):\n",
    "            result.append([user_code_id[user], item_code_id[item], \n",
    "                           self.estimations[user,item] if not np.isnan(self.estimations[user,item]) else 1])\n",
    "        return result"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "toy train ui:\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "array([[3, 4, 0, 0, 5, 0, 0, 4],\n",
       "       [0, 1, 2, 3, 0, 0, 0, 0],\n",
       "       [0, 0, 0, 5, 0, 3, 4, 0]], dtype=int64)"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "similarity matrix:\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "array([[1.        , 0.9701425 , 0.        , 0.        , 1.        ,\n",
       "        0.        , 0.        , 1.        ],\n",
       "       [0.9701425 , 1.        , 0.24253563, 0.12478355, 0.9701425 ,\n",
       "        0.        , 0.        , 0.9701425 ],\n",
       "       [0.        , 0.24253563, 1.        , 0.51449576, 0.        ,\n",
       "        0.        , 0.        , 0.        ],\n",
       "       [0.        , 0.12478355, 0.51449576, 1.        , 0.        ,\n",
       "        0.85749293, 0.85749293, 0.        ],\n",
       "       [1.        , 0.9701425 , 0.        , 0.        , 1.        ,\n",
       "        0.        , 0.        , 1.        ],\n",
       "       [0.        , 0.        , 0.        , 0.85749293, 0.        ,\n",
       "        1.        , 1.        , 0.        ],\n",
       "       [0.        , 0.        , 0.        , 0.85749293, 0.        ,\n",
       "        1.        , 1.        , 0.        ],\n",
       "       [1.        , 0.9701425 , 0.        , 0.        , 1.        ,\n",
       "        0.        , 0.        , 1.        ]])"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "estimations matrix:\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "array([[4.        , 4.        , 4.        , 4.        , 4.        ,\n",
       "               nan,        nan, 4.        ],\n",
       "       [1.        , 1.35990333, 2.15478388, 2.53390319, 1.        ,\n",
       "        3.        , 3.        , 1.        ],\n",
       "       [       nan, 5.        , 5.        , 4.05248907,        nan,\n",
       "        3.95012863, 3.95012863,        nan]])"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/plain": [
       "[[0, 20, 4.0, 30, 4.0],\n",
       " [10, 50, 3.0, 60, 3.0, 0, 1.0, 40, 1.0, 70, 1.0],\n",
       " [20, 10, 5.0, 20, 5.0]]"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# toy example\n",
    "toy_train_read=pd.read_csv('./Datasets/toy-example/train.csv', sep='\\t', header=None, names=['user', 'item', 'rating', 'timestamp'])\n",
    "toy_test_read=pd.read_csv('./Datasets/toy-example/test.csv', sep='\\t', header=None, names=['user', 'item', 'rating', 'timestamp'])\n",
    "\n",
    "toy_train_ui, toy_test_ui, toy_user_code_id, toy_user_id_code, \\\n",
    "toy_item_code_id, toy_item_id_code = helpers.data_to_csr(toy_train_read, toy_test_read)\n",
    "\n",
    "\n",
    "model=IKNN()\n",
    "model.fit(toy_train_ui)\n",
    "\n",
    "print('toy train ui:')\n",
    "display(toy_train_ui.A)\n",
    "\n",
    "print('similarity matrix:')\n",
    "display(model.similarity_matrix_ii.A)\n",
    "\n",
    "print('estimations matrix:')\n",
    "display(model.estimations)\n",
    "\n",
    "model.recommend(toy_user_code_id, toy_item_code_id)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "model=IKNN()\n",
    "model.fit(train_ui)\n",
    "\n",
    "top_n=pd.DataFrame(model.recommend(user_code_id, item_code_id, topK=10))\n",
    "\n",
    "top_n.to_csv('Recommendations generated/ml-100k/Self_IKNN_reco.csv', index=False, header=False)\n",
    "\n",
    "estimations=pd.DataFrame(model.estimate(user_code_id, item_code_id, test_ui))\n",
    "estimations.to_csv('Recommendations generated/ml-100k/Self_IKNN_estimations.csv', index=False, header=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "943it [00:00, 6719.05it/s]\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>RMSE</th>\n",
       "      <th>MAE</th>\n",
       "      <th>precision</th>\n",
       "      <th>recall</th>\n",
       "      <th>F_1</th>\n",
       "      <th>F_05</th>\n",
       "      <th>precision_super</th>\n",
       "      <th>recall_super</th>\n",
       "      <th>NDCG</th>\n",
       "      <th>mAP</th>\n",
       "      <th>MRR</th>\n",
       "      <th>LAUC</th>\n",
       "      <th>HR</th>\n",
       "      <th>H2R</th>\n",
       "      <th>Reco in test</th>\n",
       "      <th>Test coverage</th>\n",
       "      <th>Shannon</th>\n",
       "      <th>Gini</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1.018363</td>\n",
       "      <td>0.808793</td>\n",
       "      <td>0.000318</td>\n",
       "      <td>0.000108</td>\n",
       "      <td>0.00014</td>\n",
       "      <td>0.000189</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.000214</td>\n",
       "      <td>0.000037</td>\n",
       "      <td>0.000368</td>\n",
       "      <td>0.496391</td>\n",
       "      <td>0.003181</td>\n",
       "      <td>0.0</td>\n",
       "      <td>0.392153</td>\n",
       "      <td>0.11544</td>\n",
       "      <td>4.174741</td>\n",
       "      <td>0.965327</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "       RMSE       MAE  precision    recall      F_1      F_05  \\\n",
       "0  1.018363  0.808793   0.000318  0.000108  0.00014  0.000189   \n",
       "\n",
       "   precision_super  recall_super      NDCG       mAP       MRR      LAUC  \\\n",
       "0              0.0           0.0  0.000214  0.000037  0.000368  0.496391   \n",
       "\n",
       "         HR  H2R  Reco in test  Test coverage   Shannon      Gini  \n",
       "0  0.003181  0.0      0.392153        0.11544  4.174741  0.965327  "
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import evaluation_measures as ev\n",
    "estimations_df=pd.read_csv('Recommendations generated/ml-100k/Self_IKNN_estimations.csv', header=None)\n",
    "reco=np.loadtxt('Recommendations generated/ml-100k/Self_IKNN_reco.csv', delimiter=',')\n",
    "\n",
    "ev.evaluate(test=pd.read_csv('./Datasets/ml-100k/test.csv', sep='\\t', header=None),\n",
    "            estimations_df=estimations_df, \n",
    "            reco=reco,\n",
    "            super_reactions=[4,5])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "943it [00:00, 7023.03it/s]\n",
      "943it [00:00, 6323.02it/s]\n",
      "943it [00:00, 6003.69it/s]\n",
      "943it [00:00, 6582.48it/s]\n",
      "943it [00:00, 5623.69it/s]\n",
      "943it [00:00, 6775.77it/s]\n",
      "943it [00:00, 6119.28it/s]\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Model</th>\n",
       "      <th>RMSE</th>\n",
       "      <th>MAE</th>\n",
       "      <th>precision</th>\n",
       "      <th>recall</th>\n",
       "      <th>F_1</th>\n",
       "      <th>F_05</th>\n",
       "      <th>precision_super</th>\n",
       "      <th>recall_super</th>\n",
       "      <th>NDCG</th>\n",
       "      <th>mAP</th>\n",
       "      <th>MRR</th>\n",
       "      <th>LAUC</th>\n",
       "      <th>HR</th>\n",
       "      <th>H2R</th>\n",
       "      <th>Reco in test</th>\n",
       "      <th>Test coverage</th>\n",
       "      <th>Shannon</th>\n",
       "      <th>Gini</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Self_TopPop</td>\n",
       "      <td>2.508258</td>\n",
       "      <td>2.217909</td>\n",
       "      <td>0.188865</td>\n",
       "      <td>0.116919</td>\n",
       "      <td>0.118732</td>\n",
       "      <td>0.141584</td>\n",
       "      <td>0.130472</td>\n",
       "      <td>0.137473</td>\n",
       "      <td>0.214651</td>\n",
       "      <td>0.111707</td>\n",
       "      <td>0.400939</td>\n",
       "      <td>0.555546</td>\n",
       "      <td>0.765642</td>\n",
       "      <td>0.492047</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.038961</td>\n",
       "      <td>3.159079</td>\n",
       "      <td>0.987317</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Ready_Baseline</td>\n",
       "      <td>0.949459</td>\n",
       "      <td>0.752487</td>\n",
       "      <td>0.091410</td>\n",
       "      <td>0.037652</td>\n",
       "      <td>0.046030</td>\n",
       "      <td>0.061286</td>\n",
       "      <td>0.079614</td>\n",
       "      <td>0.056463</td>\n",
       "      <td>0.095957</td>\n",
       "      <td>0.043178</td>\n",
       "      <td>0.198193</td>\n",
       "      <td>0.515501</td>\n",
       "      <td>0.437964</td>\n",
       "      <td>0.239661</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.033911</td>\n",
       "      <td>2.836513</td>\n",
       "      <td>0.991139</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Self_GlobalAvg</td>\n",
       "      <td>1.125760</td>\n",
       "      <td>0.943534</td>\n",
       "      <td>0.061188</td>\n",
       "      <td>0.025968</td>\n",
       "      <td>0.031383</td>\n",
       "      <td>0.041343</td>\n",
       "      <td>0.040558</td>\n",
       "      <td>0.032107</td>\n",
       "      <td>0.067695</td>\n",
       "      <td>0.027470</td>\n",
       "      <td>0.171187</td>\n",
       "      <td>0.509546</td>\n",
       "      <td>0.384942</td>\n",
       "      <td>0.142100</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.025974</td>\n",
       "      <td>2.711772</td>\n",
       "      <td>0.992003</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Ready_Random</td>\n",
       "      <td>1.524954</td>\n",
       "      <td>1.223352</td>\n",
       "      <td>0.045599</td>\n",
       "      <td>0.021181</td>\n",
       "      <td>0.024585</td>\n",
       "      <td>0.031518</td>\n",
       "      <td>0.027897</td>\n",
       "      <td>0.021931</td>\n",
       "      <td>0.048111</td>\n",
       "      <td>0.017381</td>\n",
       "      <td>0.119005</td>\n",
       "      <td>0.507096</td>\n",
       "      <td>0.330859</td>\n",
       "      <td>0.091198</td>\n",
       "      <td>0.988123</td>\n",
       "      <td>0.181818</td>\n",
       "      <td>5.100792</td>\n",
       "      <td>0.906866</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Self_TopRated</td>\n",
       "      <td>NaN</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0.032025</td>\n",
       "      <td>0.012674</td>\n",
       "      <td>0.015714</td>\n",
       "      <td>0.021183</td>\n",
       "      <td>0.028433</td>\n",
       "      <td>0.018573</td>\n",
       "      <td>0.022741</td>\n",
       "      <td>0.005328</td>\n",
       "      <td>0.031602</td>\n",
       "      <td>0.502764</td>\n",
       "      <td>0.237540</td>\n",
       "      <td>0.065748</td>\n",
       "      <td>0.697031</td>\n",
       "      <td>0.014430</td>\n",
       "      <td>2.220811</td>\n",
       "      <td>0.995173</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Self_BaselineUI</td>\n",
       "      <td>0.967585</td>\n",
       "      <td>0.762740</td>\n",
       "      <td>0.000954</td>\n",
       "      <td>0.000170</td>\n",
       "      <td>0.000278</td>\n",
       "      <td>0.000463</td>\n",
       "      <td>0.000644</td>\n",
       "      <td>0.000189</td>\n",
       "      <td>0.000752</td>\n",
       "      <td>0.000168</td>\n",
       "      <td>0.001677</td>\n",
       "      <td>0.496424</td>\n",
       "      <td>0.009544</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.600530</td>\n",
       "      <td>0.005051</td>\n",
       "      <td>1.803126</td>\n",
       "      <td>0.996380</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Self_IKNN</td>\n",
       "      <td>1.018363</td>\n",
       "      <td>0.808793</td>\n",
       "      <td>0.000318</td>\n",
       "      <td>0.000108</td>\n",
       "      <td>0.000140</td>\n",
       "      <td>0.000189</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.000214</td>\n",
       "      <td>0.000037</td>\n",
       "      <td>0.000368</td>\n",
       "      <td>0.496391</td>\n",
       "      <td>0.003181</td>\n",
       "      <td>0.000000</td>\n",
       "      <td>0.392153</td>\n",
       "      <td>0.115440</td>\n",
       "      <td>4.174741</td>\n",
       "      <td>0.965327</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "             Model      RMSE       MAE  precision    recall       F_1  \\\n",
       "0      Self_TopPop  2.508258  2.217909   0.188865  0.116919  0.118732   \n",
       "0   Ready_Baseline  0.949459  0.752487   0.091410  0.037652  0.046030   \n",
       "0   Self_GlobalAvg  1.125760  0.943534   0.061188  0.025968  0.031383   \n",
       "0     Ready_Random  1.524954  1.223352   0.045599  0.021181  0.024585   \n",
       "0    Self_TopRated       NaN       NaN   0.032025  0.012674  0.015714   \n",
       "0  Self_BaselineUI  0.967585  0.762740   0.000954  0.000170  0.000278   \n",
       "0        Self_IKNN  1.018363  0.808793   0.000318  0.000108  0.000140   \n",
       "\n",
       "       F_05  precision_super  recall_super      NDCG       mAP       MRR  \\\n",
       "0  0.141584         0.130472      0.137473  0.214651  0.111707  0.400939   \n",
       "0  0.061286         0.079614      0.056463  0.095957  0.043178  0.198193   \n",
       "0  0.041343         0.040558      0.032107  0.067695  0.027470  0.171187   \n",
       "0  0.031518         0.027897      0.021931  0.048111  0.017381  0.119005   \n",
       "0  0.021183         0.028433      0.018573  0.022741  0.005328  0.031602   \n",
       "0  0.000463         0.000644      0.000189  0.000752  0.000168  0.001677   \n",
       "0  0.000189         0.000000      0.000000  0.000214  0.000037  0.000368   \n",
       "\n",
       "       LAUC        HR       H2R  Reco in test  Test coverage   Shannon  \\\n",
       "0  0.555546  0.765642  0.492047      1.000000       0.038961  3.159079   \n",
       "0  0.515501  0.437964  0.239661      1.000000       0.033911  2.836513   \n",
       "0  0.509546  0.384942  0.142100      1.000000       0.025974  2.711772   \n",
       "0  0.507096  0.330859  0.091198      0.988123       0.181818  5.100792   \n",
       "0  0.502764  0.237540  0.065748      0.697031       0.014430  2.220811   \n",
       "0  0.496424  0.009544  0.000000      0.600530       0.005051  1.803126   \n",
       "0  0.496391  0.003181  0.000000      0.392153       0.115440  4.174741   \n",
       "\n",
       "       Gini  \n",
       "0  0.987317  \n",
       "0  0.991139  \n",
       "0  0.992003  \n",
       "0  0.906866  \n",
       "0  0.995173  \n",
       "0  0.996380  \n",
       "0  0.965327  "
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import imp\n",
    "imp.reload(ev)\n",
    "\n",
    "import evaluation_measures as ev\n",
    "dir_path=\"Recommendations generated/ml-100k/\"\n",
    "super_reactions=[4,5]\n",
    "test=pd.read_csv('./Datasets/ml-100k/test.csv', sep='\\t', header=None)\n",
    "\n",
    "ev.evaluate_all(test, dir_path, super_reactions)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Ready-made KNNs - Surprise implementation"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### I-KNN - basic"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "import helpers\n",
    "import surprise as sp\n",
    "import imp\n",
    "imp.reload(helpers)\n",
    "\n",
    "sim_options = {'name': 'cosine',\n",
    "              'user_based': False}  # compute similarities between items\n",
    "algo = sp.KNNBasic(sim_options=sim_options)\n",
    "\n",
    "helpers.ready_made(algo, reco_path='Recommendations generated/ml-100k/Ready_I-KNN_reco.csv',\n",
    "          estimations_path='Recommendations generated/ml-100k/Ready_I-KNN_estimations.csv')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### U-KNN - basic"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import helpers\n",
    "import surprise as sp\n",
    "import imp\n",
    "imp.reload(helpers)\n",
    "\n",
    "sim_options = {'name': 'cosine',\n",
    "              'user_based': True}  # compute similarities between users\n",
    "algo = sp.KNNBasic(sim_options=sim_options)\n",
    "\n",
    "helpers.ready_made(algo, reco_path='Recommendations generated/ml-100k/Ready_U-KNN_reco.csv',\n",
    "          estimations_path='Recommendations generated/ml-100k/Ready_U-KNN_estimations.csv')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### I-KNN - on top baseline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import helpers\n",
    "import surprise as sp\n",
    "import imp\n",
    "imp.reload(helpers)\n",
    "\n",
    "sim_options = {'name': 'cosine',\n",
    "              'user_based': False}  # compute similarities between items\n",
    "algo = sp.KNNBaseline()\n",
    "\n",
    "helpers.ready_made(algo, reco_path='Recommendations generated/ml-100k/Ready_I-KNNBaseline_reco.csv',\n",
    "          estimations_path='Recommendations generated/ml-100k/Ready_I-KNNBaseline_estimations.csv')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# project task 4:  use a version of your choice of Surprise KNNalgorithm"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# read the docs and try to find best parameter configuration (let say in terms of RMSE)\n",
    "# https://surprise.readthedocs.io/en/stable/knn_inspired.html##surprise.prediction_algorithms.knns.KNNBaseline\n",
    "# the solution here can be similar to examples above\n",
    "# please save the output in 'Recommendations generated/ml-100k/Self_KNNSurprisetask_reco.csv' and\n",
    "# 'Recommendations generated/ml-100k/Self_KNNSurprisetask_estimations.csv'"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Computing the cosine similarity matrix...\n",
      "Done computing similarity matrix.\n",
      "Generating predictions...\n",
      "Generating top N recommendations...\n",
      "Generating predictions...\n"
     ]
    }
   ],
   "source": [
    "#I chose KNN With Means because I thought it would be interesting if the algorithm take into account\n",
    "#the mean ratings of each user\n",
    "import helpers\n",
    "import surprise as sp\n",
    "import imp\n",
    "imp.reload(helpers)\n",
    "\n",
    "sim_options = {'name': 'cosine',\n",
    "              'user_based': True}  # compute similarities between users\n",
    "algo = sp.KNNWithZScore(k=10,sim_options=sim_options)\n",
    "\n",
    "helpers.ready_made(algo, reco_path='Recommendations generated/ml-100k/Self_KNNSurprisetask_reco.csv',\n",
    "          estimations_path='Recommendations generated/ml-100k/Self_KNNSurprisetask_estimations.csv')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
Prześlij pliki do '' 2020-06-13 03:25:51 +02:00			`{`
			`"cells": [`
			`{`
			`"cell_type": "markdown",`
			`"metadata": {},`
			`"source": [`
			`"# Self made simplified I-KNN"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 1,`
			`"metadata": {},`
			`"outputs": [],`
			`"source": [`
			`"import helpers\n",`
			`"import pandas as pd\n",`
			`"import numpy as np\n",`
			`"import scipy.sparse as sparse\n",`
			`"from collections import defaultdict\n",`
			`"from itertools import chain\n",`
			`"import random\n",`
			`"\n",`
			`"train_read=pd.read_csv('./Datasets/ml-100k/train.csv', sep='\\t', header=None)\n",`
			`"test_read=pd.read_csv('./Datasets/ml-100k/test.csv', sep='\\t', header=None)\n",`
			`"train_ui, test_ui, user_code_id, user_id_code, item_code_id, item_id_code = helpers.data_to_csr(train_read, test_read)"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 2,`
			`"metadata": {},`
			`"outputs": [],`
			`"source": [`
			`"class IKNN():\n",`
			`" \n",`
			`" def fit(self, train_ui):\n",`
			`" self.train_ui=train_ui\n",`
			`" \n",`
			`" train_iu=train_ui.transpose()\n",`
			`" norms=np.linalg.norm(train_iu.A, axis=1) # here we compute lenth of each item ratings vector\n",`
			`" norms=np.vectorize(lambda x: max(x,1))(norms[:,None]) # to avoid dividing by zero\n",`
			`"\n",`
			`" normalized_train_iu=sparse.csr_matrix(train_iu/norms)\n",`
			`"\n",`
			`" self.similarity_matrix_ii=normalized_train_iu*normalized_train_iu.transpose()\n",`
			`" \n",`
			`" self.estimations=np.array(train_uiself.similarity_matrix_ii/((train_ui>0)self.similarity_matrix_ii))\n",`
			`" \n",`
			`" def recommend(self, user_code_id, item_code_id, topK=10):\n",`
			`" \n",`
			`" top_k = defaultdict(list)\n",`
			`" for nb_user, user in enumerate(self.estimations):\n",`
			`" \n",`
			`" user_rated=self.train_ui.indices[self.train_ui.indptr[nb_user]:self.train_ui.indptr[nb_user+1]]\n",`
			`" for item, score in enumerate(user):\n",`
			`" if item not in user_rated and not np.isnan(score):\n",`
			`" top_k[user_code_id[nb_user]].append((item_code_id[item], score))\n",`
			`" result=[]\n",`
			`" # Let's choose k best items in the format: (user, item1, score1, item2, score2, ...)\n",`
			`" for uid, item_scores in top_k.items():\n",`
			`" item_scores.sort(key=lambda x: x[1], reverse=True)\n",`
			`" result.append([uid]+list(chain(*item_scores[:topK])))\n",`
			`" return result\n",`
			`" \n",`
			`" def estimate(self, user_code_id, item_code_id, test_ui):\n",`
			`" result=[]\n",`
			`" for user, item in zip(*test_ui.nonzero()):\n",`
			`" result.append([user_code_id[user], item_code_id[item], \n",`
			`" self.estimations[user,item] if not np.isnan(self.estimations[user,item]) else 1])\n",`
			`" return result"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 3,`
			`"metadata": {},`
			`"outputs": [`
			`{`
			`"name": "stdout",`
			`"output_type": "stream",`
			`"text": [`
			`"toy train ui:\n"`
			`]`
			`},`
			`{`
			`"data": {`
			`"text/plain": [`
			`"array([[3, 4, 0, 0, 5, 0, 0, 4],\n",`
			`" [0, 1, 2, 3, 0, 0, 0, 0],\n",`
			`" [0, 0, 0, 5, 0, 3, 4, 0]], dtype=int64)"`
			`]`
			`},`
			`"metadata": {},`
			`"output_type": "display_data"`
			`},`
			`{`
			`"name": "stdout",`
			`"output_type": "stream",`
			`"text": [`
			`"similarity matrix:\n"`
			`]`
			`},`
			`{`
			`"data": {`
			`"text/plain": [`
			`"array([[1. , 0.9701425 , 0. , 0. , 1. ,\n",`
			`" 0. , 0. , 1. ],\n",`
			`" [0.9701425 , 1. , 0.24253563, 0.12478355, 0.9701425 ,\n",`
			`" 0. , 0. , 0.9701425 ],\n",`
			`" [0. , 0.24253563, 1. , 0.51449576, 0. ,\n",`
			`" 0. , 0. , 0. ],\n",`
			`" [0. , 0.12478355, 0.51449576, 1. , 0. ,\n",`
			`" 0.85749293, 0.85749293, 0. ],\n",`
			`" [1. , 0.9701425 , 0. , 0. , 1. ,\n",`
			`" 0. , 0. , 1. ],\n",`
			`" [0. , 0. , 0. , 0.85749293, 0. ,\n",`
			`" 1. , 1. , 0. ],\n",`
			`" [0. , 0. , 0. , 0.85749293, 0. ,\n",`
			`" 1. , 1. , 0. ],\n",`
			`" [1. , 0.9701425 , 0. , 0. , 1. ,\n",`
			`" 0. , 0. , 1. ]])"`
			`]`
			`},`
			`"metadata": {},`
			`"output_type": "display_data"`
			`},`
			`{`
			`"name": "stdout",`
			`"output_type": "stream",`
			`"text": [`
			`"estimations matrix:\n"`
			`]`
			`},`
			`{`
			`"data": {`
			`"text/plain": [`
			`"array([[4. , 4. , 4. , 4. , 4. ,\n",`
			`" nan, nan, 4. ],\n",`
			`" [1. , 1.35990333, 2.15478388, 2.53390319, 1. ,\n",`
			`" 3. , 3. , 1. ],\n",`
			`" [ nan, 5. , 5. , 4.05248907, nan,\n",`
			`" 3.95012863, 3.95012863, nan]])"`
			`]`
			`},`
			`"metadata": {},`
			`"output_type": "display_data"`
			`},`
			`{`
			`"data": {`
			`"text/plain": [`
			`"[[0, 20, 4.0, 30, 4.0],\n",`
			`" [10, 50, 3.0, 60, 3.0, 0, 1.0, 40, 1.0, 70, 1.0],\n",`
			`" [20, 10, 5.0, 20, 5.0]]"`
			`]`
			`},`
			`"execution_count": 3,`
			`"metadata": {},`
			`"output_type": "execute_result"`
			`}`
			`],`
			`"source": [`
			`"# toy example\n",`
			`"toy_train_read=pd.read_csv('./Datasets/toy-example/train.csv', sep='\\t', header=None, names=['user', 'item', 'rating', 'timestamp'])\n",`
			`"toy_test_read=pd.read_csv('./Datasets/toy-example/test.csv', sep='\\t', header=None, names=['user', 'item', 'rating', 'timestamp'])\n",`
			`"\n",`
			`"toy_train_ui, toy_test_ui, toy_user_code_id, toy_user_id_code, \\\n",`
			`"toy_item_code_id, toy_item_id_code = helpers.data_to_csr(toy_train_read, toy_test_read)\n",`
			`"\n",`
			`"\n",`
			`"model=IKNN()\n",`
			`"model.fit(toy_train_ui)\n",`
			`"\n",`
			`"print('toy train ui:')\n",`
			`"display(toy_train_ui.A)\n",`
			`"\n",`
			`"print('similarity matrix:')\n",`
			`"display(model.similarity_matrix_ii.A)\n",`
			`"\n",`
			`"print('estimations matrix:')\n",`
			`"display(model.estimations)\n",`
			`"\n",`
			`"model.recommend(toy_user_code_id, toy_item_code_id)"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 4,`
			`"metadata": {},`
			`"outputs": [],`
			`"source": [`
			`"model=IKNN()\n",`
			`"model.fit(train_ui)\n",`
			`"\n",`
			`"top_n=pd.DataFrame(model.recommend(user_code_id, item_code_id, topK=10))\n",`
			`"\n",`
			`"top_n.to_csv('Recommendations generated/ml-100k/Self_IKNN_reco.csv', index=False, header=False)\n",`
			`"\n",`
			`"estimations=pd.DataFrame(model.estimate(user_code_id, item_code_id, test_ui))\n",`
			`"estimations.to_csv('Recommendations generated/ml-100k/Self_IKNN_estimations.csv', index=False, header=False)"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 5,`
			`"metadata": {},`
			`"outputs": [`
			`{`
			`"name": "stderr",`
			`"output_type": "stream",`
			`"text": [`
			`"943it [00:00, 6719.05it/s]\n"`
			`]`
			`},`
			`{`
			`"data": {`
			`"text/html": [`
			`"<div>\n",`
			`"<style scoped>\n",`
			`" .dataframe tbody tr th:only-of-type {\n",`
			`" vertical-align: middle;\n",`
			`" }\n",`
			`"\n",`
			`" .dataframe tbody tr th {\n",`
			`" vertical-align: top;\n",`
			`" }\n",`
			`"\n",`
			`" .dataframe thead th {\n",`
			`" text-align: right;\n",`
			`" }\n",`
			`"</style>\n",`
			`"<table border=\"1\" class=\"dataframe\">\n",`
			`" <thead>\n",`
			`" <tr style=\"text-align: right;\">\n",`
			`" <th></th>\n",`
			`" <th>RMSE</th>\n",`
			`" <th>MAE</th>\n",`
			`" <th>precision</th>\n",`
			`" <th>recall</th>\n",`
			`" <th>F_1</th>\n",`
			`" <th>F_05</th>\n",`
			`" <th>precision_super</th>\n",`
			`" <th>recall_super</th>\n",`
			`" <th>NDCG</th>\n",`
			`" <th>mAP</th>\n",`
			`" <th>MRR</th>\n",`
			`" <th>LAUC</th>\n",`
			`" <th>HR</th>\n",`
			`" <th>H2R</th>\n",`
			`" <th>Reco in test</th>\n",`
			`" <th>Test coverage</th>\n",`
			`" <th>Shannon</th>\n",`
			`" <th>Gini</th>\n",`
			`" </tr>\n",`
			`" </thead>\n",`
			`" <tbody>\n",`
			`" <tr>\n",`
			`" <th>0</th>\n",`
			`" <td>1.018363</td>\n",`
			`" <td>0.808793</td>\n",`
			`" <td>0.000318</td>\n",`
			`" <td>0.000108</td>\n",`
			`" <td>0.00014</td>\n",`
			`" <td>0.000189</td>\n",`
			`" <td>0.0</td>\n",`
			`" <td>0.0</td>\n",`
			`" <td>0.000214</td>\n",`
			`" <td>0.000037</td>\n",`
			`" <td>0.000368</td>\n",`
			`" <td>0.496391</td>\n",`
			`" <td>0.003181</td>\n",`
			`" <td>0.0</td>\n",`
			`" <td>0.392153</td>\n",`
			`" <td>0.11544</td>\n",`
			`" <td>4.174741</td>\n",`
			`" <td>0.965327</td>\n",`
			`" </tr>\n",`
			`" </tbody>\n",`
			`"</table>\n",`
			`"</div>"`
			`],`
			`"text/plain": [`
			`" RMSE MAE precision recall F_1 F_05 \\\n",`
			`"0 1.018363 0.808793 0.000318 0.000108 0.00014 0.000189 \n",`
			`"\n",`
			`" precision_super recall_super NDCG mAP MRR LAUC \\\n",`
			`"0 0.0 0.0 0.000214 0.000037 0.000368 0.496391 \n",`
			`"\n",`
			`" HR H2R Reco in test Test coverage Shannon Gini \n",`
			`"0 0.003181 0.0 0.392153 0.11544 4.174741 0.965327 "`
			`]`
			`},`
			`"execution_count": 5,`
			`"metadata": {},`
			`"output_type": "execute_result"`
			`}`
			`],`
			`"source": [`
			`"import evaluation_measures as ev\n",`
			`"estimations_df=pd.read_csv('Recommendations generated/ml-100k/Self_IKNN_estimations.csv', header=None)\n",`
			`"reco=np.loadtxt('Recommendations generated/ml-100k/Self_IKNN_reco.csv', delimiter=',')\n",`
			`"\n",`
			`"ev.evaluate(test=pd.read_csv('./Datasets/ml-100k/test.csv', sep='\\t', header=None),\n",`
			`" estimations_df=estimations_df, \n",`
			`" reco=reco,\n",`
			`" super_reactions=[4,5])"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 6,`
			`"metadata": {},`
			`"outputs": [`
			`{`
			`"name": "stderr",`
			`"output_type": "stream",`
			`"text": [`
			`"943it [00:00, 7023.03it/s]\n",`
			`"943it [00:00, 6323.02it/s]\n",`
			`"943it [00:00, 6003.69it/s]\n",`
			`"943it [00:00, 6582.48it/s]\n",`
			`"943it [00:00, 5623.69it/s]\n",`
			`"943it [00:00, 6775.77it/s]\n",`
			`"943it [00:00, 6119.28it/s]\n"`
			`]`
			`},`
			`{`
			`"data": {`
			`"text/html": [`
			`"<div>\n",`
			`"<style scoped>\n",`
			`" .dataframe tbody tr th:only-of-type {\n",`
			`" vertical-align: middle;\n",`
			`" }\n",`
			`"\n",`
			`" .dataframe tbody tr th {\n",`
			`" vertical-align: top;\n",`
			`" }\n",`
			`"\n",`
			`" .dataframe thead th {\n",`
			`" text-align: right;\n",`
			`" }\n",`
			`"</style>\n",`
			`"<table border=\"1\" class=\"dataframe\">\n",`
			`" <thead>\n",`
			`" <tr style=\"text-align: right;\">\n",`
			`" <th></th>\n",`
			`" <th>Model</th>\n",`
			`" <th>RMSE</th>\n",`
			`" <th>MAE</th>\n",`
			`" <th>precision</th>\n",`
			`" <th>recall</th>\n",`
			`" <th>F_1</th>\n",`
			`" <th>F_05</th>\n",`
			`" <th>precision_super</th>\n",`
			`" <th>recall_super</th>\n",`
			`" <th>NDCG</th>\n",`
			`" <th>mAP</th>\n",`
			`" <th>MRR</th>\n",`
			`" <th>LAUC</th>\n",`
			`" <th>HR</th>\n",`
			`" <th>H2R</th>\n",`
			`" <th>Reco in test</th>\n",`
			`" <th>Test coverage</th>\n",`
			`" <th>Shannon</th>\n",`
			`" <th>Gini</th>\n",`
			`" </tr>\n",`
			`" </thead>\n",`
			`" <tbody>\n",`
			`" <tr>\n",`
			`" <th>0</th>\n",`
			`" <td>Self_TopPop</td>\n",`
			`" <td>2.508258</td>\n",`
			`" <td>2.217909</td>\n",`
			`" <td>0.188865</td>\n",`
			`" <td>0.116919</td>\n",`
			`" <td>0.118732</td>\n",`
			`" <td>0.141584</td>\n",`
			`" <td>0.130472</td>\n",`
			`" <td>0.137473</td>\n",`
			`" <td>0.214651</td>\n",`
			`" <td>0.111707</td>\n",`
			`" <td>0.400939</td>\n",`
			`" <td>0.555546</td>\n",`
			`" <td>0.765642</td>\n",`
			`" <td>0.492047</td>\n",`
			`" <td>1.000000</td>\n",`
			`" <td>0.038961</td>\n",`
			`" <td>3.159079</td>\n",`
			`" <td>0.987317</td>\n",`
			`" </tr>\n",`
			`" <tr>\n",`
			`" <th>0</th>\n",`
			`" <td>Ready_Baseline</td>\n",`
			`" <td>0.949459</td>\n",`
			`" <td>0.752487</td>\n",`
			`" <td>0.091410</td>\n",`
			`" <td>0.037652</td>\n",`
			`" <td>0.046030</td>\n",`
			`" <td>0.061286</td>\n",`
			`" <td>0.079614</td>\n",`
			`" <td>0.056463</td>\n",`
			`" <td>0.095957</td>\n",`
			`" <td>0.043178</td>\n",`
			`" <td>0.198193</td>\n",`
			`" <td>0.515501</td>\n",`
			`" <td>0.437964</td>\n",`
			`" <td>0.239661</td>\n",`
			`" <td>1.000000</td>\n",`
			`" <td>0.033911</td>\n",`
			`" <td>2.836513</td>\n",`
			`" <td>0.991139</td>\n",`
			`" </tr>\n",`
			`" <tr>\n",`
			`" <th>0</th>\n",`
			`" <td>Self_GlobalAvg</td>\n",`
			`" <td>1.125760</td>\n",`
			`" <td>0.943534</td>\n",`
			`" <td>0.061188</td>\n",`
			`" <td>0.025968</td>\n",`
			`" <td>0.031383</td>\n",`
			`" <td>0.041343</td>\n",`
			`" <td>0.040558</td>\n",`
			`" <td>0.032107</td>\n",`
			`" <td>0.067695</td>\n",`
			`" <td>0.027470</td>\n",`
			`" <td>0.171187</td>\n",`
			`" <td>0.509546</td>\n",`
			`" <td>0.384942</td>\n",`
			`" <td>0.142100</td>\n",`
			`" <td>1.000000</td>\n",`
			`" <td>0.025974</td>\n",`
			`" <td>2.711772</td>\n",`
			`" <td>0.992003</td>\n",`
			`" </tr>\n",`
			`" <tr>\n",`
			`" <th>0</th>\n",`
			`" <td>Ready_Random</td>\n",`
			`" <td>1.524954</td>\n",`
			`" <td>1.223352</td>\n",`
			`" <td>0.045599</td>\n",`
			`" <td>0.021181</td>\n",`
			`" <td>0.024585</td>\n",`
			`" <td>0.031518</td>\n",`
			`" <td>0.027897</td>\n",`
			`" <td>0.021931</td>\n",`
			`" <td>0.048111</td>\n",`
			`" <td>0.017381</td>\n",`
			`" <td>0.119005</td>\n",`
			`" <td>0.507096</td>\n",`
			`" <td>0.330859</td>\n",`
			`" <td>0.091198</td>\n",`
			`" <td>0.988123</td>\n",`
			`" <td>0.181818</td>\n",`
			`" <td>5.100792</td>\n",`
			`" <td>0.906866</td>\n",`
			`" </tr>\n",`
			`" <tr>\n",`
			`" <th>0</th>\n",`
			`" <td>Self_TopRated</td>\n",`
			`" <td>NaN</td>\n",`
			`" <td>NaN</td>\n",`
			`" <td>0.032025</td>\n",`
			`" <td>0.012674</td>\n",`
			`" <td>0.015714</td>\n",`
			`" <td>0.021183</td>\n",`
			`" <td>0.028433</td>\n",`
			`" <td>0.018573</td>\n",`
			`" <td>0.022741</td>\n",`
			`" <td>0.005328</td>\n",`
			`" <td>0.031602</td>\n",`
			`" <td>0.502764</td>\n",`
			`" <td>0.237540</td>\n",`
			`" <td>0.065748</td>\n",`
			`" <td>0.697031</td>\n",`
			`" <td>0.014430</td>\n",`
			`" <td>2.220811</td>\n",`
			`" <td>0.995173</td>\n",`
			`" </tr>\n",`
			`" <tr>\n",`
			`" <th>0</th>\n",`
			`" <td>Self_BaselineUI</td>\n",`
			`" <td>0.967585</td>\n",`
			`" <td>0.762740</td>\n",`
			`" <td>0.000954</td>\n",`
			`" <td>0.000170</td>\n",`
			`" <td>0.000278</td>\n",`
			`" <td>0.000463</td>\n",`
			`" <td>0.000644</td>\n",`
			`" <td>0.000189</td>\n",`
			`" <td>0.000752</td>\n",`
			`" <td>0.000168</td>\n",`
			`" <td>0.001677</td>\n",`
			`" <td>0.496424</td>\n",`
			`" <td>0.009544</td>\n",`
			`" <td>0.000000</td>\n",`
			`" <td>0.600530</td>\n",`
			`" <td>0.005051</td>\n",`
			`" <td>1.803126</td>\n",`
			`" <td>0.996380</td>\n",`
			`" </tr>\n",`
			`" <tr>\n",`
			`" <th>0</th>\n",`
			`" <td>Self_IKNN</td>\n",`
			`" <td>1.018363</td>\n",`
			`" <td>0.808793</td>\n",`
			`" <td>0.000318</td>\n",`
			`" <td>0.000108</td>\n",`
			`" <td>0.000140</td>\n",`
			`" <td>0.000189</td>\n",`
			`" <td>0.000000</td>\n",`
			`" <td>0.000000</td>\n",`
			`" <td>0.000214</td>\n",`
			`" <td>0.000037</td>\n",`
			`" <td>0.000368</td>\n",`
			`" <td>0.496391</td>\n",`
			`" <td>0.003181</td>\n",`
			`" <td>0.000000</td>\n",`
			`" <td>0.392153</td>\n",`
			`" <td>0.115440</td>\n",`
			`" <td>4.174741</td>\n",`
			`" <td>0.965327</td>\n",`
			`" </tr>\n",`
			`" </tbody>\n",`
			`"</table>\n",`
			`"</div>"`
			`],`
			`"text/plain": [`
			`" Model RMSE MAE precision recall F_1 \\\n",`
			`"0 Self_TopPop 2.508258 2.217909 0.188865 0.116919 0.118732 \n",`
			`"0 Ready_Baseline 0.949459 0.752487 0.091410 0.037652 0.046030 \n",`
			`"0 Self_GlobalAvg 1.125760 0.943534 0.061188 0.025968 0.031383 \n",`
			`"0 Ready_Random 1.524954 1.223352 0.045599 0.021181 0.024585 \n",`
			`"0 Self_TopRated NaN NaN 0.032025 0.012674 0.015714 \n",`
			`"0 Self_BaselineUI 0.967585 0.762740 0.000954 0.000170 0.000278 \n",`
			`"0 Self_IKNN 1.018363 0.808793 0.000318 0.000108 0.000140 \n",`
			`"\n",`
			`" F_05 precision_super recall_super NDCG mAP MRR \\\n",`
			`"0 0.141584 0.130472 0.137473 0.214651 0.111707 0.400939 \n",`
			`"0 0.061286 0.079614 0.056463 0.095957 0.043178 0.198193 \n",`
			`"0 0.041343 0.040558 0.032107 0.067695 0.027470 0.171187 \n",`
			`"0 0.031518 0.027897 0.021931 0.048111 0.017381 0.119005 \n",`
			`"0 0.021183 0.028433 0.018573 0.022741 0.005328 0.031602 \n",`
			`"0 0.000463 0.000644 0.000189 0.000752 0.000168 0.001677 \n",`
			`"0 0.000189 0.000000 0.000000 0.000214 0.000037 0.000368 \n",`
			`"\n",`
			`" LAUC HR H2R Reco in test Test coverage Shannon \\\n",`
			`"0 0.555546 0.765642 0.492047 1.000000 0.038961 3.159079 \n",`
			`"0 0.515501 0.437964 0.239661 1.000000 0.033911 2.836513 \n",`
			`"0 0.509546 0.384942 0.142100 1.000000 0.025974 2.711772 \n",`
			`"0 0.507096 0.330859 0.091198 0.988123 0.181818 5.100792 \n",`
			`"0 0.502764 0.237540 0.065748 0.697031 0.014430 2.220811 \n",`
			`"0 0.496424 0.009544 0.000000 0.600530 0.005051 1.803126 \n",`
			`"0 0.496391 0.003181 0.000000 0.392153 0.115440 4.174741 \n",`
			`"\n",`
			`" Gini \n",`
			`"0 0.987317 \n",`
			`"0 0.991139 \n",`
			`"0 0.992003 \n",`
			`"0 0.906866 \n",`
			`"0 0.995173 \n",`
			`"0 0.996380 \n",`
			`"0 0.965327 "`
			`]`
			`},`
			`"execution_count": 6,`
			`"metadata": {},`
			`"output_type": "execute_result"`
			`}`
			`],`
			`"source": [`
			`"import imp\n",`
			`"imp.reload(ev)\n",`
			`"\n",`
			`"import evaluation_measures as ev\n",`
			`"dir_path=\"Recommendations generated/ml-100k/\"\n",`
			`"super_reactions=[4,5]\n",`
			`"test=pd.read_csv('./Datasets/ml-100k/test.csv', sep='\\t', header=None)\n",`
			`"\n",`
			`"ev.evaluate_all(test, dir_path, super_reactions)"`
			`]`
			`},`
			`{`
			`"cell_type": "markdown",`
			`"metadata": {},`
			`"source": [`
			`"# Ready-made KNNs - Surprise implementation"`
			`]`
			`},`
			`{`
			`"cell_type": "markdown",`
			`"metadata": {},`
			`"source": [`
			`"### I-KNN - basic"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 8,`
			`"metadata": {},`
			`"outputs": [],`
			`"source": [`
			`"import helpers\n",`
			`"import surprise as sp\n",`
			`"import imp\n",`
			`"imp.reload(helpers)\n",`
			`"\n",`
			`"sim_options = {'name': 'cosine',\n",`
			`" 'user_based': False} # compute similarities between items\n",`
			`"algo = sp.KNNBasic(sim_options=sim_options)\n",`
			`"\n",`
			`"helpers.ready_made(algo, reco_path='Recommendations generated/ml-100k/Ready_I-KNN_reco.csv',\n",`
			`" estimations_path='Recommendations generated/ml-100k/Ready_I-KNN_estimations.csv')"`
			`]`
			`},`
			`{`
			`"cell_type": "markdown",`
			`"metadata": {},`
			`"source": [`
			`"### U-KNN - basic"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": null,`
			`"metadata": {},`
			`"outputs": [],`
			`"source": [`
			`"import helpers\n",`
			`"import surprise as sp\n",`
			`"import imp\n",`
			`"imp.reload(helpers)\n",`
			`"\n",`
			`"sim_options = {'name': 'cosine',\n",`
			`" 'user_based': True} # compute similarities between users\n",`
			`"algo = sp.KNNBasic(sim_options=sim_options)\n",`
			`"\n",`
			`"helpers.ready_made(algo, reco_path='Recommendations generated/ml-100k/Ready_U-KNN_reco.csv',\n",`
			`" estimations_path='Recommendations generated/ml-100k/Ready_U-KNN_estimations.csv')"`
			`]`
			`},`
			`{`
			`"cell_type": "markdown",`
			`"metadata": {},`
			`"source": [`
			`"### I-KNN - on top baseline"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": null,`
			`"metadata": {},`
			`"outputs": [],`
			`"source": [`
			`"import helpers\n",`
			`"import surprise as sp\n",`
			`"import imp\n",`
			`"imp.reload(helpers)\n",`
			`"\n",`
			`"sim_options = {'name': 'cosine',\n",`
			`" 'user_based': False} # compute similarities between items\n",`
			`"algo = sp.KNNBaseline()\n",`
			`"\n",`
			`"helpers.ready_made(algo, reco_path='Recommendations generated/ml-100k/Ready_I-KNNBaseline_reco.csv',\n",`
			`" estimations_path='Recommendations generated/ml-100k/Ready_I-KNNBaseline_estimations.csv')"`
			`]`
			`},`
			`{`
			`"cell_type": "markdown",`
			`"metadata": {},`
			`"source": [`
			`"# project task 4: use a version of your choice of Surprise KNNalgorithm"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": null,`
			`"metadata": {},`
			`"outputs": [],`
			`"source": [`
			`"# read the docs and try to find best parameter configuration (let say in terms of RMSE)\n",`
			`"# https://surprise.readthedocs.io/en/stable/knn_inspired.html##surprise.prediction_algorithms.knns.KNNBaseline\n",`
			`"# the solution here can be similar to examples above\n",`
			`"# please save the output in 'Recommendations generated/ml-100k/Self_KNNSurprisetask_reco.csv' and\n",`
			`"# 'Recommendations generated/ml-100k/Self_KNNSurprisetask_estimations.csv'"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": 9,`
			`"metadata": {},`
			`"outputs": [`
			`{`
			`"name": "stdout",`
			`"output_type": "stream",`
			`"text": [`
			`"Computing the cosine similarity matrix...\n",`
			`"Done computing similarity matrix.\n",`
			`"Generating predictions...\n",`
			`"Generating top N recommendations...\n",`
			`"Generating predictions...\n"`
			`]`
			`}`
			`],`
			`"source": [`
			`"#I chose KNN With Means because I thought it would be interesting if the algorithm take into account\n",`
			`"#the mean ratings of each user\n",`
			`"import helpers\n",`
			`"import surprise as sp\n",`
			`"import imp\n",`
			`"imp.reload(helpers)\n",`
			`"\n",`
			`"sim_options = {'name': 'cosine',\n",`
			`" 'user_based': True} # compute similarities between users\n",`
			`"algo = sp.KNNWithZScore(k=10,sim_options=sim_options)\n",`
			`"\n",`
			`"helpers.ready_made(algo, reco_path='Recommendations generated/ml-100k/Self_KNNSurprisetask_reco.csv',\n",`
			`" estimations_path='Recommendations generated/ml-100k/Self_KNNSurprisetask_estimations.csv')"`
			`]`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": null,`
			`"metadata": {},`
			`"outputs": [],`
			`"source": []`
			`},`
			`{`
			`"cell_type": "code",`
			`"execution_count": null,`
			`"metadata": {},`
			`"outputs": [],`
			`"source": []`
			`}`
			`],`
			`"metadata": {`
			`"kernelspec": {`
			`"display_name": "Python 3",`
			`"language": "python",`
			`"name": "python3"`
			`},`
			`"language_info": {`
			`"codemirror_mode": {`
			`"name": "ipython",`
			`"version": 3`
			`},`
			`"file_extension": ".py",`
			`"mimetype": "text/x-python",`
			`"name": "python",`
			`"nbconvert_exporter": "python",`
			`"pygments_lexer": "ipython3",`
			`"version": "3.6.5"`
			`}`
			`},`
			`"nbformat": 4,`
			`"nbformat_minor": 4`
			`}`