Fixed task 1

This commit is contained in:
s460932 2020-06-18 00:08:09 +02:00
parent afa9655186
commit e2ff43599b
5 changed files with 41456 additions and 40864 deletions

View File

@ -9,7 +9,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 73, "execution_count": 32,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -26,7 +26,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 74, "execution_count": 33,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -43,7 +43,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 75, "execution_count": 34,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -134,7 +134,7 @@
"4 560 24 2 879976772 559 23" "4 560 24 2 879976772 559 23"
] ]
}, },
"execution_count": 75, "execution_count": 34,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -145,7 +145,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 76, "execution_count": 35,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -155,7 +155,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 77, "execution_count": 36,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -169,7 +169,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 78, "execution_count": 37,
"metadata": { "metadata": {
"scrolled": true "scrolled": true
}, },
@ -191,7 +191,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 79, "execution_count": 38,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -201,7 +201,7 @@
"\twith 8 stored elements in Compressed Sparse Row format>" "\twith 8 stored elements in Compressed Sparse Row format>"
] ]
}, },
"execution_count": 79, "execution_count": 38,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -216,7 +216,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 80, "execution_count": 39,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -259,7 +259,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 81, "execution_count": 40,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -285,7 +285,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 82, "execution_count": 41,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -309,7 +309,7 @@
"name": "stdout", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"557 ns ± 15.6 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)\n", "586 ns ± 31.8 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)\n",
"Inefficient way to access items rated by user:\n" "Inefficient way to access items rated by user:\n"
] ]
}, },
@ -327,7 +327,7 @@
"name": "stdout", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"65.2 µs ± 4.5 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n" "64.4 µs ± 1.75 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n"
] ]
} }
], ],
@ -352,7 +352,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 83, "execution_count": 42,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -386,7 +386,7 @@
"matrix([[ 8, 3, 11]])" "matrix([[ 8, 3, 11]])"
] ]
}, },
"execution_count": 83, "execution_count": 42,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -400,7 +400,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 84, "execution_count": 43,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -470,7 +470,7 @@
" [-1.66666667, 0. , 1.33333333, 0.33333333]])" " [-1.66666667, 0. , 1.33333333, 0.33333333]])"
] ]
}, },
"execution_count": 84, "execution_count": 43,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -500,7 +500,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 85, "execution_count": 44,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -558,7 +558,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 86, "execution_count": 45,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -571,7 +571,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 87, "execution_count": 46,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -611,6 +611,130 @@
"(pd.DataFrame(estimations)).to_csv('Recommendations generated/ml-100k/Self_TopPop_estimations.csv', index=False, header=False)" "(pd.DataFrame(estimations)).to_csv('Recommendations generated/ml-100k/Self_TopPop_estimations.csv', index=False, header=False)"
] ]
}, },
{
"cell_type": "code",
"execution_count": 47,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>0</th>\n",
" <th>1</th>\n",
" <th>2</th>\n",
" <th>3</th>\n",
" <th>4</th>\n",
" <th>5</th>\n",
" <th>6</th>\n",
" <th>7</th>\n",
" <th>8</th>\n",
" <th>9</th>\n",
" <th>...</th>\n",
" <th>11</th>\n",
" <th>12</th>\n",
" <th>13</th>\n",
" <th>14</th>\n",
" <th>15</th>\n",
" <th>16</th>\n",
" <th>17</th>\n",
" <th>18</th>\n",
" <th>19</th>\n",
" <th>20</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>50</td>\n",
" <td>5.000000</td>\n",
" <td>286</td>\n",
" <td>4.117021</td>\n",
" <td>288</td>\n",
" <td>4.053191</td>\n",
" <td>294</td>\n",
" <td>3.851064</td>\n",
" <td>300</td>\n",
" <td>...</td>\n",
" <td>174</td>\n",
" <td>3.531915</td>\n",
" <td>98</td>\n",
" <td>3.351064</td>\n",
" <td>313</td>\n",
" <td>3.063830</td>\n",
" <td>405</td>\n",
" <td>2.904255</td>\n",
" <td>79</td>\n",
" <td>2.851064</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>181</td>\n",
" <td>4.446809</td>\n",
" <td>1</td>\n",
" <td>3.914894</td>\n",
" <td>121</td>\n",
" <td>3.595745</td>\n",
" <td>127</td>\n",
" <td>3.563830</td>\n",
" <td>174</td>\n",
" <td>...</td>\n",
" <td>7</td>\n",
" <td>3.446809</td>\n",
" <td>98</td>\n",
" <td>3.351064</td>\n",
" <td>56</td>\n",
" <td>3.308511</td>\n",
" <td>237</td>\n",
" <td>3.287234</td>\n",
" <td>117</td>\n",
" <td>3.159574</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>2 rows × 21 columns</p>\n",
"</div>"
],
"text/plain": [
" 0 1 2 3 4 5 6 7 8 9 ... \\\n",
"0 1 50 5.000000 286 4.117021 288 4.053191 294 3.851064 300 ... \n",
"1 2 181 4.446809 1 3.914894 121 3.595745 127 3.563830 174 ... \n",
"\n",
" 11 12 13 14 15 16 17 18 19 20 \n",
"0 174 3.531915 98 3.351064 313 3.063830 405 2.904255 79 2.851064 \n",
"1 7 3.446809 98 3.351064 56 3.308511 237 3.287234 117 3.159574 \n",
"\n",
"[2 rows x 21 columns]"
]
},
"execution_count": 47,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.DataFrame(result)[:2]"
]
},
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {},
@ -620,7 +744,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 88, "execution_count": 48,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -657,7 +781,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 89, "execution_count": 49,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -770,7 +894,7 @@
"[2 rows x 21 columns]" "[2 rows x 21 columns]"
] ]
}, },
"execution_count": 89, "execution_count": 49,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -788,7 +912,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 90, "execution_count": 50,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -800,7 +924,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 91, "execution_count": 51,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -808,9 +932,13 @@
"train_iu=train_ui.transpose().tocsr()\n", "train_iu=train_ui.transpose().tocsr()\n",
"\n", "\n",
"for i in range(train_iu.shape[0]):\n", "for i in range(train_iu.shape[0]):\n",
" if(train_iu.indptr[i+1]-train_iu.indptr[i] != 0):\n", " if len(train_iu.data[train_iu.indptr[i]:train_iu.indptr[i+1]]) == 0:\n",
" avg = np.sum(train_iu.data[train_iu.indptr[i]:train_iu.indptr[i+1]])/(train_iu.indptr[i+1]-train_iu.indptr[i])\n", " TopRated.append((i, 0.))\n",
" TopRated.append((i, avg))\n", " else:\n",
" TopRated.append((i, sum(train_iu.data[train_iu.indptr[i]:train_iu.indptr[i+1]]) / len(train_iu.data[train_iu.indptr[i]:train_iu.indptr[i+1]])))\n",
"\n",
"\n",
"\n",
" \n", " \n",
"TopRated.sort(key=lambda x: x[1], reverse=True)\n", "TopRated.sort(key=lambda x: x[1], reverse=True)\n",
"\n", "\n",
@ -827,20 +955,26 @@
" item_pos+=1\n", " item_pos+=1\n",
" result.append([user_code_id[u]]+list(chain(*rec_user)))\n", " result.append([user_code_id[u]]+list(chain(*rec_user)))\n",
"\n", "\n",
" \n",
"(pd.DataFrame(result)).to_csv('Recommendations generated/ml-100k/Self_TopRated_reco.csv', index=False, header=False)\n", "(pd.DataFrame(result)).to_csv('Recommendations generated/ml-100k/Self_TopRated_reco.csv', index=False, header=False)\n",
"\n", "\n",
"\n", "\n",
"estimations=[]\n", "estimations=[]\n",
"\n", "\n",
"for user, item in zip(*test_ui.nonzero()):\n", "for user, i in zip(*test_ui.nonzero()):\n",
" estimations.append([user_code_id[user], item_code_id[item],\n", " if len(train_iu.data[train_iu.indptr[i]:train_iu.indptr[i+1]]) == 0:\n",
" (train_iu.indptr[item+1]-train_iu.indptr[item])*scaling_factor])\n", " estimations.append([user_code_id[user], item_code_id[i], 2.5])\n",
" else:\n",
" estimations.append(\n",
" [user_code_id[user], item_code_id[i], sum(train_iu.data[train_iu.indptr[i]:train_iu.indptr[i+1]]) / len(train_iu.data[train_iu.indptr[i]:train_iu.indptr[i+1]])])\n",
" \n",
" \n",
"(pd.DataFrame(estimations)).to_csv('Recommendations generated/ml-100k/Self_TopRated_estimations.csv', index=False, header=False)" "(pd.DataFrame(estimations)).to_csv('Recommendations generated/ml-100k/Self_TopRated_estimations.csv', index=False, header=False)"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 92, "execution_count": 64,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -936,30 +1070,192 @@
" <td>1500</td>\n", " <td>1500</td>\n",
" <td>5.0</td>\n", " <td>5.0</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>119</td>\n",
" <td>5.0</td>\n",
" <td>814</td>\n",
" <td>5.0</td>\n",
" <td>1122</td>\n",
" <td>5.0</td>\n",
" <td>1189</td>\n",
" <td>5.0</td>\n",
" <td>1201</td>\n",
" <td>...</td>\n",
" <td>1293</td>\n",
" <td>5.0</td>\n",
" <td>1306</td>\n",
" <td>5.0</td>\n",
" <td>1467</td>\n",
" <td>5.0</td>\n",
" <td>1491</td>\n",
" <td>5.0</td>\n",
" <td>1500</td>\n",
" <td>5.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
" <td>119</td>\n",
" <td>5.0</td>\n",
" <td>814</td>\n",
" <td>5.0</td>\n",
" <td>1122</td>\n",
" <td>5.0</td>\n",
" <td>1189</td>\n",
" <td>5.0</td>\n",
" <td>1201</td>\n",
" <td>...</td>\n",
" <td>1293</td>\n",
" <td>5.0</td>\n",
" <td>1306</td>\n",
" <td>5.0</td>\n",
" <td>1467</td>\n",
" <td>5.0</td>\n",
" <td>1491</td>\n",
" <td>5.0</td>\n",
" <td>1500</td>\n",
" <td>5.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>5</td>\n",
" <td>119</td>\n",
" <td>5.0</td>\n",
" <td>814</td>\n",
" <td>5.0</td>\n",
" <td>1122</td>\n",
" <td>5.0</td>\n",
" <td>1189</td>\n",
" <td>5.0</td>\n",
" <td>1201</td>\n",
" <td>...</td>\n",
" <td>1293</td>\n",
" <td>5.0</td>\n",
" <td>1306</td>\n",
" <td>5.0</td>\n",
" <td>1467</td>\n",
" <td>5.0</td>\n",
" <td>1491</td>\n",
" <td>5.0</td>\n",
" <td>1500</td>\n",
" <td>5.0</td>\n",
" </tr>\n",
" </tbody>\n", " </tbody>\n",
"</table>\n", "</table>\n",
"<p>2 rows × 21 columns</p>\n", "<p>5 rows × 21 columns</p>\n",
"</div>" "</div>"
], ],
"text/plain": [ "text/plain": [
" 0 1 2 3 4 5 6 7 8 9 ... 11 12 13 \\\n", " 0 1 2 3 4 5 6 7 8 9 ... 11 12 13 \\\n",
"0 1 814 5.0 1122 5.0 1189 5.0 1201 5.0 1293 ... 1306 5.0 1467 \n", "0 1 814 5.0 1122 5.0 1189 5.0 1201 5.0 1293 ... 1306 5.0 1467 \n",
"1 2 119 5.0 814 5.0 1122 5.0 1189 5.0 1201 ... 1293 5.0 1306 \n", "1 2 119 5.0 814 5.0 1122 5.0 1189 5.0 1201 ... 1293 5.0 1306 \n",
"2 3 119 5.0 814 5.0 1122 5.0 1189 5.0 1201 ... 1293 5.0 1306 \n",
"3 4 119 5.0 814 5.0 1122 5.0 1189 5.0 1201 ... 1293 5.0 1306 \n",
"4 5 119 5.0 814 5.0 1122 5.0 1189 5.0 1201 ... 1293 5.0 1306 \n",
"\n", "\n",
" 14 15 16 17 18 19 20 \n", " 14 15 16 17 18 19 20 \n",
"0 5.0 1491 5.0 1500 5.0 1536 5.0 \n", "0 5.0 1491 5.0 1500 5.0 1536 5.0 \n",
"1 5.0 1467 5.0 1491 5.0 1500 5.0 \n", "1 5.0 1467 5.0 1491 5.0 1500 5.0 \n",
"2 5.0 1467 5.0 1491 5.0 1500 5.0 \n",
"3 5.0 1467 5.0 1491 5.0 1500 5.0 \n",
"4 5.0 1467 5.0 1491 5.0 1500 5.0 \n",
"\n", "\n",
"[2 rows x 21 columns]" "[5 rows x 21 columns]"
] ]
}, },
"execution_count": 92, "execution_count": 64,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
], ],
"source": [ "source": [
"pd.DataFrame(result)[:2]" "pd.DataFrame(result)[:5]"
]
},
{
"cell_type": "code",
"execution_count": 65,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>0</th>\n",
" <th>1</th>\n",
" <th>2</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>5</td>\n",
" <td>3.516903</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1</td>\n",
" <td>10</td>\n",
" <td>3.901217</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1</td>\n",
" <td>25</td>\n",
" <td>3.479851</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1</td>\n",
" <td>32</td>\n",
" <td>3.833570</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1</td>\n",
" <td>33</td>\n",
" <td>3.488115</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" 0 1 2\n",
"0 1 5 3.516903\n",
"1 1 10 3.901217\n",
"2 1 25 3.479851\n",
"3 1 32 3.833570\n",
"4 1 33 3.488115"
]
},
"execution_count": 65,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.DataFrame(estimations)[:5]"
] ]
}, },
{ {
@ -971,7 +1267,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 93, "execution_count": 53,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1044,7 +1340,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 94, "execution_count": 54,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -1212,7 +1508,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 95, "execution_count": 55,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1236,7 +1532,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 96, "execution_count": 56,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1248,7 +1544,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 97, "execution_count": 57,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1318,7 +1614,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 98, "execution_count": 58,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -1489,7 +1785,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 99, "execution_count": 59,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1513,7 +1809,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 100, "execution_count": 60,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -1570,7 +1866,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 101, "execution_count": 61,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -1587,7 +1883,7 @@
"0.7524871012820799" "0.7524871012820799"
] ]
}, },
"execution_count": 101, "execution_count": 61,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -1617,24 +1913,24 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 102, "execution_count": 62,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
"name": "stdout", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"RMSE: 1.5228\n", "RMSE: 1.5151\n",
"MAE: 1.2225\n" "MAE: 1.2192\n"
] ]
}, },
{ {
"data": { "data": {
"text/plain": [ "text/plain": [
"1.2225008866215548" "1.2192187389503517"
] ]
}, },
"execution_count": 102, "execution_count": 62,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }

View File

@ -9,7 +9,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 73, "execution_count": 32,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -26,7 +26,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 74, "execution_count": 33,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -43,7 +43,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 75, "execution_count": 34,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -134,7 +134,7 @@
"4 560 24 2 879976772 559 23" "4 560 24 2 879976772 559 23"
] ]
}, },
"execution_count": 75, "execution_count": 34,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -145,7 +145,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 76, "execution_count": 35,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -155,7 +155,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 77, "execution_count": 36,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -169,7 +169,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 78, "execution_count": 37,
"metadata": { "metadata": {
"scrolled": true "scrolled": true
}, },
@ -191,7 +191,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 79, "execution_count": 38,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -201,7 +201,7 @@
"\twith 8 stored elements in Compressed Sparse Row format>" "\twith 8 stored elements in Compressed Sparse Row format>"
] ]
}, },
"execution_count": 79, "execution_count": 38,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -216,7 +216,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 80, "execution_count": 39,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -259,7 +259,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 81, "execution_count": 40,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -285,7 +285,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 82, "execution_count": 41,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -309,7 +309,7 @@
"name": "stdout", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"557 ns ± 15.6 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)\n", "586 ns ± 31.8 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)\n",
"Inefficient way to access items rated by user:\n" "Inefficient way to access items rated by user:\n"
] ]
}, },
@ -327,7 +327,7 @@
"name": "stdout", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"65.2 µs ± 4.5 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n" "64.4 µs ± 1.75 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n"
] ]
} }
], ],
@ -352,7 +352,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 83, "execution_count": 42,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -386,7 +386,7 @@
"matrix([[ 8, 3, 11]])" "matrix([[ 8, 3, 11]])"
] ]
}, },
"execution_count": 83, "execution_count": 42,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -400,7 +400,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 84, "execution_count": 43,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -470,7 +470,7 @@
" [-1.66666667, 0. , 1.33333333, 0.33333333]])" " [-1.66666667, 0. , 1.33333333, 0.33333333]])"
] ]
}, },
"execution_count": 84, "execution_count": 43,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -500,7 +500,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 85, "execution_count": 44,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -558,7 +558,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 86, "execution_count": 45,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -571,7 +571,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 87, "execution_count": 46,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -611,6 +611,130 @@
"(pd.DataFrame(estimations)).to_csv('Recommendations generated/ml-100k/Self_TopPop_estimations.csv', index=False, header=False)" "(pd.DataFrame(estimations)).to_csv('Recommendations generated/ml-100k/Self_TopPop_estimations.csv', index=False, header=False)"
] ]
}, },
{
"cell_type": "code",
"execution_count": 47,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>0</th>\n",
" <th>1</th>\n",
" <th>2</th>\n",
" <th>3</th>\n",
" <th>4</th>\n",
" <th>5</th>\n",
" <th>6</th>\n",
" <th>7</th>\n",
" <th>8</th>\n",
" <th>9</th>\n",
" <th>...</th>\n",
" <th>11</th>\n",
" <th>12</th>\n",
" <th>13</th>\n",
" <th>14</th>\n",
" <th>15</th>\n",
" <th>16</th>\n",
" <th>17</th>\n",
" <th>18</th>\n",
" <th>19</th>\n",
" <th>20</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>50</td>\n",
" <td>5.000000</td>\n",
" <td>286</td>\n",
" <td>4.117021</td>\n",
" <td>288</td>\n",
" <td>4.053191</td>\n",
" <td>294</td>\n",
" <td>3.851064</td>\n",
" <td>300</td>\n",
" <td>...</td>\n",
" <td>174</td>\n",
" <td>3.531915</td>\n",
" <td>98</td>\n",
" <td>3.351064</td>\n",
" <td>313</td>\n",
" <td>3.063830</td>\n",
" <td>405</td>\n",
" <td>2.904255</td>\n",
" <td>79</td>\n",
" <td>2.851064</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>181</td>\n",
" <td>4.446809</td>\n",
" <td>1</td>\n",
" <td>3.914894</td>\n",
" <td>121</td>\n",
" <td>3.595745</td>\n",
" <td>127</td>\n",
" <td>3.563830</td>\n",
" <td>174</td>\n",
" <td>...</td>\n",
" <td>7</td>\n",
" <td>3.446809</td>\n",
" <td>98</td>\n",
" <td>3.351064</td>\n",
" <td>56</td>\n",
" <td>3.308511</td>\n",
" <td>237</td>\n",
" <td>3.287234</td>\n",
" <td>117</td>\n",
" <td>3.159574</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>2 rows × 21 columns</p>\n",
"</div>"
],
"text/plain": [
" 0 1 2 3 4 5 6 7 8 9 ... \\\n",
"0 1 50 5.000000 286 4.117021 288 4.053191 294 3.851064 300 ... \n",
"1 2 181 4.446809 1 3.914894 121 3.595745 127 3.563830 174 ... \n",
"\n",
" 11 12 13 14 15 16 17 18 19 20 \n",
"0 174 3.531915 98 3.351064 313 3.063830 405 2.904255 79 2.851064 \n",
"1 7 3.446809 98 3.351064 56 3.308511 237 3.287234 117 3.159574 \n",
"\n",
"[2 rows x 21 columns]"
]
},
"execution_count": 47,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.DataFrame(result)[:2]"
]
},
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {},
@ -620,7 +744,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 88, "execution_count": 48,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -657,7 +781,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 89, "execution_count": 49,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -770,7 +894,7 @@
"[2 rows x 21 columns]" "[2 rows x 21 columns]"
] ]
}, },
"execution_count": 89, "execution_count": 49,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -788,7 +912,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 90, "execution_count": 50,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -800,7 +924,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 91, "execution_count": 51,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -808,9 +932,13 @@
"train_iu=train_ui.transpose().tocsr()\n", "train_iu=train_ui.transpose().tocsr()\n",
"\n", "\n",
"for i in range(train_iu.shape[0]):\n", "for i in range(train_iu.shape[0]):\n",
" if(train_iu.indptr[i+1]-train_iu.indptr[i] != 0):\n", " if len(train_iu.data[train_iu.indptr[i]:train_iu.indptr[i+1]]) == 0:\n",
" avg = np.sum(train_iu.data[train_iu.indptr[i]:train_iu.indptr[i+1]])/(train_iu.indptr[i+1]-train_iu.indptr[i])\n", " TopRated.append((i, 0.))\n",
" TopRated.append((i, avg))\n", " else:\n",
" TopRated.append((i, sum(train_iu.data[train_iu.indptr[i]:train_iu.indptr[i+1]]) / len(train_iu.data[train_iu.indptr[i]:train_iu.indptr[i+1]])))\n",
"\n",
"\n",
"\n",
" \n", " \n",
"TopRated.sort(key=lambda x: x[1], reverse=True)\n", "TopRated.sort(key=lambda x: x[1], reverse=True)\n",
"\n", "\n",
@ -827,20 +955,26 @@
" item_pos+=1\n", " item_pos+=1\n",
" result.append([user_code_id[u]]+list(chain(*rec_user)))\n", " result.append([user_code_id[u]]+list(chain(*rec_user)))\n",
"\n", "\n",
" \n",
"(pd.DataFrame(result)).to_csv('Recommendations generated/ml-100k/Self_TopRated_reco.csv', index=False, header=False)\n", "(pd.DataFrame(result)).to_csv('Recommendations generated/ml-100k/Self_TopRated_reco.csv', index=False, header=False)\n",
"\n", "\n",
"\n", "\n",
"estimations=[]\n", "estimations=[]\n",
"\n", "\n",
"for user, item in zip(*test_ui.nonzero()):\n", "for user, i in zip(*test_ui.nonzero()):\n",
" estimations.append([user_code_id[user], item_code_id[item],\n", " if len(train_iu.data[train_iu.indptr[i]:train_iu.indptr[i+1]]) == 0:\n",
" (train_iu.indptr[item+1]-train_iu.indptr[item])*scaling_factor])\n", " estimations.append([user_code_id[user], item_code_id[i], 2.5])\n",
" else:\n",
" estimations.append(\n",
" [user_code_id[user], item_code_id[i], sum(train_iu.data[train_iu.indptr[i]:train_iu.indptr[i+1]]) / len(train_iu.data[train_iu.indptr[i]:train_iu.indptr[i+1]])])\n",
" \n",
" \n",
"(pd.DataFrame(estimations)).to_csv('Recommendations generated/ml-100k/Self_TopRated_estimations.csv', index=False, header=False)" "(pd.DataFrame(estimations)).to_csv('Recommendations generated/ml-100k/Self_TopRated_estimations.csv', index=False, header=False)"
] ]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 92, "execution_count": 64,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -936,30 +1070,192 @@
" <td>1500</td>\n", " <td>1500</td>\n",
" <td>5.0</td>\n", " <td>5.0</td>\n",
" </tr>\n", " </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>119</td>\n",
" <td>5.0</td>\n",
" <td>814</td>\n",
" <td>5.0</td>\n",
" <td>1122</td>\n",
" <td>5.0</td>\n",
" <td>1189</td>\n",
" <td>5.0</td>\n",
" <td>1201</td>\n",
" <td>...</td>\n",
" <td>1293</td>\n",
" <td>5.0</td>\n",
" <td>1306</td>\n",
" <td>5.0</td>\n",
" <td>1467</td>\n",
" <td>5.0</td>\n",
" <td>1491</td>\n",
" <td>5.0</td>\n",
" <td>1500</td>\n",
" <td>5.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
" <td>119</td>\n",
" <td>5.0</td>\n",
" <td>814</td>\n",
" <td>5.0</td>\n",
" <td>1122</td>\n",
" <td>5.0</td>\n",
" <td>1189</td>\n",
" <td>5.0</td>\n",
" <td>1201</td>\n",
" <td>...</td>\n",
" <td>1293</td>\n",
" <td>5.0</td>\n",
" <td>1306</td>\n",
" <td>5.0</td>\n",
" <td>1467</td>\n",
" <td>5.0</td>\n",
" <td>1491</td>\n",
" <td>5.0</td>\n",
" <td>1500</td>\n",
" <td>5.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>5</td>\n",
" <td>119</td>\n",
" <td>5.0</td>\n",
" <td>814</td>\n",
" <td>5.0</td>\n",
" <td>1122</td>\n",
" <td>5.0</td>\n",
" <td>1189</td>\n",
" <td>5.0</td>\n",
" <td>1201</td>\n",
" <td>...</td>\n",
" <td>1293</td>\n",
" <td>5.0</td>\n",
" <td>1306</td>\n",
" <td>5.0</td>\n",
" <td>1467</td>\n",
" <td>5.0</td>\n",
" <td>1491</td>\n",
" <td>5.0</td>\n",
" <td>1500</td>\n",
" <td>5.0</td>\n",
" </tr>\n",
" </tbody>\n", " </tbody>\n",
"</table>\n", "</table>\n",
"<p>2 rows × 21 columns</p>\n", "<p>5 rows × 21 columns</p>\n",
"</div>" "</div>"
], ],
"text/plain": [ "text/plain": [
" 0 1 2 3 4 5 6 7 8 9 ... 11 12 13 \\\n", " 0 1 2 3 4 5 6 7 8 9 ... 11 12 13 \\\n",
"0 1 814 5.0 1122 5.0 1189 5.0 1201 5.0 1293 ... 1306 5.0 1467 \n", "0 1 814 5.0 1122 5.0 1189 5.0 1201 5.0 1293 ... 1306 5.0 1467 \n",
"1 2 119 5.0 814 5.0 1122 5.0 1189 5.0 1201 ... 1293 5.0 1306 \n", "1 2 119 5.0 814 5.0 1122 5.0 1189 5.0 1201 ... 1293 5.0 1306 \n",
"2 3 119 5.0 814 5.0 1122 5.0 1189 5.0 1201 ... 1293 5.0 1306 \n",
"3 4 119 5.0 814 5.0 1122 5.0 1189 5.0 1201 ... 1293 5.0 1306 \n",
"4 5 119 5.0 814 5.0 1122 5.0 1189 5.0 1201 ... 1293 5.0 1306 \n",
"\n", "\n",
" 14 15 16 17 18 19 20 \n", " 14 15 16 17 18 19 20 \n",
"0 5.0 1491 5.0 1500 5.0 1536 5.0 \n", "0 5.0 1491 5.0 1500 5.0 1536 5.0 \n",
"1 5.0 1467 5.0 1491 5.0 1500 5.0 \n", "1 5.0 1467 5.0 1491 5.0 1500 5.0 \n",
"2 5.0 1467 5.0 1491 5.0 1500 5.0 \n",
"3 5.0 1467 5.0 1491 5.0 1500 5.0 \n",
"4 5.0 1467 5.0 1491 5.0 1500 5.0 \n",
"\n", "\n",
"[2 rows x 21 columns]" "[5 rows x 21 columns]"
] ]
}, },
"execution_count": 92, "execution_count": 64,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
], ],
"source": [ "source": [
"pd.DataFrame(result)[:2]" "pd.DataFrame(result)[:5]"
]
},
{
"cell_type": "code",
"execution_count": 65,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>0</th>\n",
" <th>1</th>\n",
" <th>2</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>5</td>\n",
" <td>3.516903</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1</td>\n",
" <td>10</td>\n",
" <td>3.901217</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1</td>\n",
" <td>25</td>\n",
" <td>3.479851</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1</td>\n",
" <td>32</td>\n",
" <td>3.833570</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1</td>\n",
" <td>33</td>\n",
" <td>3.488115</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" 0 1 2\n",
"0 1 5 3.516903\n",
"1 1 10 3.901217\n",
"2 1 25 3.479851\n",
"3 1 32 3.833570\n",
"4 1 33 3.488115"
]
},
"execution_count": 65,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.DataFrame(estimations)[:5]"
] ]
}, },
{ {
@ -971,7 +1267,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 93, "execution_count": 53,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1044,7 +1340,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 94, "execution_count": 54,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -1212,7 +1508,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 95, "execution_count": 55,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1236,7 +1532,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 96, "execution_count": 56,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1248,7 +1544,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 97, "execution_count": 57,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1318,7 +1614,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 98, "execution_count": 58,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -1489,7 +1785,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 99, "execution_count": 59,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -1513,7 +1809,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 100, "execution_count": 60,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -1570,7 +1866,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 101, "execution_count": 61,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -1587,7 +1883,7 @@
"0.7524871012820799" "0.7524871012820799"
] ]
}, },
"execution_count": 101, "execution_count": 61,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -1617,24 +1913,24 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 102, "execution_count": 62,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
"name": "stdout", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"RMSE: 1.5228\n", "RMSE: 1.5151\n",
"MAE: 1.2225\n" "MAE: 1.2192\n"
] ]
}, },
{ {
"data": { "data": {
"text/plain": [ "text/plain": [
"1.2225008866215548" "1.2192187389503517"
] ]
}, },
"execution_count": 102, "execution_count": 62,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff