Merge pull request '2nd meeting' (#2) from 2nd_meeting into master
Reviewed-on: #2
This commit is contained in:
commit
7838e0c156
1004
P2. Evaluation.ipynb
1004
P2. Evaluation.ipynb
File diff suppressed because it is too large
Load Diff
1057
P3. k-nearest neighbours.ipynb
Normal file
1057
P3. k-nearest neighbours.ipynb
Normal file
File diff suppressed because it is too large
Load Diff
BIN
P3. k-nearest neighbours.pdf
Normal file
BIN
P3. k-nearest neighbours.pdf
Normal file
Binary file not shown.
96
P4. Appendix - embeddings in high demensional spaces.ipynb
Normal file
96
P4. Appendix - embeddings in high demensional spaces.ipynb
Normal file
@ -0,0 +1,96 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"['dimensions: 1, cases when observation is the nearest: 0.0%',\n",
|
||||
" 'dimensions: 2, cases when observation is the nearest: 0.0%',\n",
|
||||
" 'dimensions: 3, cases when observation is the nearest: 0.0%',\n",
|
||||
" 'dimensions: 10, cases when observation is the nearest: 13.0%',\n",
|
||||
" 'dimensions: 20, cases when observation is the nearest: 61.0%',\n",
|
||||
" 'dimensions: 30, cases when observation is the nearest: 96.0%',\n",
|
||||
" 'dimensions: 40, cases when observation is the nearest: 98.0%',\n",
|
||||
" 'dimensions: 50, cases when observation is the nearest: 100.0%',\n",
|
||||
" 'dimensions: 60, cases when observation is the nearest: 100.0%',\n",
|
||||
" 'dimensions: 70, cases when observation is the nearest: 100.0%',\n",
|
||||
" 'dimensions: 80, cases when observation is the nearest: 100.0%',\n",
|
||||
" 'dimensions: 90, cases when observation is the nearest: 100.0%']"
|
||||
]
|
||||
},
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import numpy as np\n",
|
||||
"import pandas as pd\n",
|
||||
"import random\n",
|
||||
"from numpy.linalg import norm\n",
|
||||
"\n",
|
||||
"dimensions = [1, 2, 3] + [10 * i for i in range(1, 10)]\n",
|
||||
"nb_vectors = 10000\n",
|
||||
"trials = 100\n",
|
||||
"k = 1 # by setting k=1 we want to check how often the closest vector to the avarage of 2 random vectors is one of these 2 vectors\n",
|
||||
"\n",
|
||||
"result = []\n",
|
||||
"for dimension in dimensions:\n",
|
||||
" vectors = np.random.normal(0, 1, size=(nb_vectors, dimension))\n",
|
||||
" successes = 0\n",
|
||||
" for i in range(trials):\n",
|
||||
" i1, i2 = random.sample(range(nb_vectors), 2)\n",
|
||||
" target = (vectors[i1] + vectors[i2]) / 2\n",
|
||||
"\n",
|
||||
" distances = pd.DataFrame(\n",
|
||||
" enumerate(\n",
|
||||
" np.dot(target, vectors.transpose())\n",
|
||||
" / norm(target)\n",
|
||||
" / norm(vectors.transpose(), axis=0)\n",
|
||||
" )\n",
|
||||
" )\n",
|
||||
" distances = distances.sort_values(by=[1], ascending=False)\n",
|
||||
" if (i1 in (list(distances[0][:k]))) | (i2 in (list(distances[0][:k]))):\n",
|
||||
" successes += 1\n",
|
||||
" result.append(successes / trials)\n",
|
||||
"\n",
|
||||
"[\n",
|
||||
" f\"dimensions: {i}, cases when observation is the nearest: {100*round(j,3)}%\"\n",
|
||||
" for i, j in zip(dimensions, result)\n",
|
||||
"]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.8.5"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
1403
P4. Matrix Factorization.ipynb
Normal file
1403
P4. Matrix Factorization.ipynb
Normal file
File diff suppressed because one or more lines are too long
BIN
P4. Matrix Factorization.pdf
Normal file
BIN
P4. Matrix Factorization.pdf
Normal file
Binary file not shown.
Loading…
Reference in New Issue
Block a user