forked from robkw/introduction_to_recommender_systems
2nd meeting
This commit is contained in:
parent
bfcef2ae57
commit
0f00fb0454
1004
P2. Evaluation.ipynb
1004
P2. Evaluation.ipynb
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
Binary file not shown.
|
@ -0,0 +1,96 @@
|
|||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"['dimensions: 1, cases when observation is the nearest: 0.0%',\n",
|
||||
" 'dimensions: 2, cases when observation is the nearest: 0.0%',\n",
|
||||
" 'dimensions: 3, cases when observation is the nearest: 0.0%',\n",
|
||||
" 'dimensions: 10, cases when observation is the nearest: 13.0%',\n",
|
||||
" 'dimensions: 20, cases when observation is the nearest: 61.0%',\n",
|
||||
" 'dimensions: 30, cases when observation is the nearest: 96.0%',\n",
|
||||
" 'dimensions: 40, cases when observation is the nearest: 98.0%',\n",
|
||||
" 'dimensions: 50, cases when observation is the nearest: 100.0%',\n",
|
||||
" 'dimensions: 60, cases when observation is the nearest: 100.0%',\n",
|
||||
" 'dimensions: 70, cases when observation is the nearest: 100.0%',\n",
|
||||
" 'dimensions: 80, cases when observation is the nearest: 100.0%',\n",
|
||||
" 'dimensions: 90, cases when observation is the nearest: 100.0%']"
|
||||
]
|
||||
},
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import numpy as np\n",
|
||||
"import pandas as pd\n",
|
||||
"import random\n",
|
||||
"from numpy.linalg import norm\n",
|
||||
"\n",
|
||||
"dimensions = [1, 2, 3] + [10 * i for i in range(1, 10)]\n",
|
||||
"nb_vectors = 10000\n",
|
||||
"trials = 100\n",
|
||||
"k = 1 # by setting k=1 we want to check how often the closest vector to the avarage of 2 random vectors is one of these 2 vectors\n",
|
||||
"\n",
|
||||
"result = []\n",
|
||||
"for dimension in dimensions:\n",
|
||||
" vectors = np.random.normal(0, 1, size=(nb_vectors, dimension))\n",
|
||||
" successes = 0\n",
|
||||
" for i in range(trials):\n",
|
||||
" i1, i2 = random.sample(range(nb_vectors), 2)\n",
|
||||
" target = (vectors[i1] + vectors[i2]) / 2\n",
|
||||
"\n",
|
||||
" distances = pd.DataFrame(\n",
|
||||
" enumerate(\n",
|
||||
" np.dot(target, vectors.transpose())\n",
|
||||
" / norm(target)\n",
|
||||
" / norm(vectors.transpose(), axis=0)\n",
|
||||
" )\n",
|
||||
" )\n",
|
||||
" distances = distances.sort_values(by=[1], ascending=False)\n",
|
||||
" if (i1 in (list(distances[0][:k]))) | (i2 in (list(distances[0][:k]))):\n",
|
||||
" successes += 1\n",
|
||||
" result.append(successes / trials)\n",
|
||||
"\n",
|
||||
"[\n",
|
||||
" f\"dimensions: {i}, cases when observation is the nearest: {100*round(j,3)}%\"\n",
|
||||
" for i, j in zip(dimensions, result)\n",
|
||||
"]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.8.5"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
File diff suppressed because one or more lines are too long
Binary file not shown.
Loading…
Reference in New Issue