2nd meeting

This commit is contained in:
Robert Kwiecinski 2021-04-16 22:41:06 +02:00
parent bfcef2ae57
commit 0f00fb0454
6 changed files with 2997 additions and 563 deletions

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

Binary file not shown.

View File

@ -0,0 +1,96 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['dimensions: 1, cases when observation is the nearest: 0.0%',\n",
" 'dimensions: 2, cases when observation is the nearest: 0.0%',\n",
" 'dimensions: 3, cases when observation is the nearest: 0.0%',\n",
" 'dimensions: 10, cases when observation is the nearest: 13.0%',\n",
" 'dimensions: 20, cases when observation is the nearest: 61.0%',\n",
" 'dimensions: 30, cases when observation is the nearest: 96.0%',\n",
" 'dimensions: 40, cases when observation is the nearest: 98.0%',\n",
" 'dimensions: 50, cases when observation is the nearest: 100.0%',\n",
" 'dimensions: 60, cases when observation is the nearest: 100.0%',\n",
" 'dimensions: 70, cases when observation is the nearest: 100.0%',\n",
" 'dimensions: 80, cases when observation is the nearest: 100.0%',\n",
" 'dimensions: 90, cases when observation is the nearest: 100.0%']"
]
},
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"import random\n",
"from numpy.linalg import norm\n",
"\n",
"dimensions = [1, 2, 3] + [10 * i for i in range(1, 10)]\n",
"nb_vectors = 10000\n",
"trials = 100\n",
"k = 1 # by setting k=1 we want to check how often the closest vector to the avarage of 2 random vectors is one of these 2 vectors\n",
"\n",
"result = []\n",
"for dimension in dimensions:\n",
" vectors = np.random.normal(0, 1, size=(nb_vectors, dimension))\n",
" successes = 0\n",
" for i in range(trials):\n",
" i1, i2 = random.sample(range(nb_vectors), 2)\n",
" target = (vectors[i1] + vectors[i2]) / 2\n",
"\n",
" distances = pd.DataFrame(\n",
" enumerate(\n",
" np.dot(target, vectors.transpose())\n",
" / norm(target)\n",
" / norm(vectors.transpose(), axis=0)\n",
" )\n",
" )\n",
" distances = distances.sort_values(by=[1], ascending=False)\n",
" if (i1 in (list(distances[0][:k]))) | (i2 in (list(distances[0][:k]))):\n",
" successes += 1\n",
" result.append(successes / trials)\n",
"\n",
"[\n",
" f\"dimensions: {i}, cases when observation is the nearest: {100*round(j,3)}%\"\n",
" for i, j in zip(dimensions, result)\n",
"]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

File diff suppressed because one or more lines are too long

Binary file not shown.