move 03 solutions to solutions

This commit is contained in:
Jakub Pokrywka 2021-03-24 12:38:00 +01:00
parent fdf0e512f2
commit f8f21aea0a
2 changed files with 43 additions and 33 deletions

View File

@ -309,22 +309,6 @@
" pass" " pass"
] ]
}, },
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"def word_to_index(word):\n",
" vec = np.zeros(len(vocabulary))\n",
" if word in vocabulary:\n",
" idx = vocabulary.index(word)\n",
" vec[idx] = 1\n",
" else:\n",
" vec[-1] = 1\n",
" return vec"
]
},
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 16, "execution_count": 16,
@ -368,16 +352,7 @@
"execution_count": 18, "execution_count": 18,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": []
"def tf(document):\n",
" document_vector = None\n",
" for word in document:\n",
" if document_vector is None:\n",
" document_vector = word_to_index(word)\n",
" else:\n",
" document_vector += word_to_index(word)\n",
" return document_vector"
]
}, },
{ {
"cell_type": "code", "cell_type": "code",
@ -495,14 +470,12 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 24, "execution_count": null,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"def similarity(query, document):\n", "def similarity(query, document):\n",
" numerator = np.sum(query * document)\n", " pass"
" denominator = np.sqrt(np.sum(query*query)) * np.sqrt(np.sum(document*document)) \n",
" return numerator / denominator"
] ]
}, },
{ {
@ -1117,7 +1090,7 @@
"name": "python", "name": "python",
"nbconvert_exporter": "python", "nbconvert_exporter": "python",
"pygments_lexer": "ipython3", "pygments_lexer": "ipython3",
"version": "3.8.5" "version": "3.8.3"
} }
}, },
"nbformat": 4, "nbformat": 4,

View File

@ -1,11 +1,48 @@
{ {
"cells": [ "cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"def word_to_index(word):\n",
" vec = np.zeros(len(vocabulary))\n",
" if word in vocabulary:\n",
" idx = vocabulary.index(word)\n",
" vec[idx] = 1\n",
" else:\n",
" vec[-1] = 1\n",
" return vec"
]
},
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [] "source": [
"def tf(document):\n",
" document_vector = None\n",
" for word in document:\n",
" if document_vector is None:\n",
" document_vector = word_to_index(word)\n",
" else:\n",
" document_vector += word_to_index(word)\n",
" return document_vector"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def similarity(query, document):\n",
" numerator = np.sum(query * document)\n",
" denominator = np.sqrt(np.sum(query*query)) * np.sqrt(np.sum(document*document)) \n",
" return numerator / denominator"
]
} }
], ],
"metadata": { "metadata": {
@ -24,7 +61,7 @@
"name": "python", "name": "python",
"nbconvert_exporter": "python", "nbconvert_exporter": "python",
"pygments_lexer": "ipython3", "pygments_lexer": "ipython3",
"version": "3.8.5" "version": "3.8.3"
} }
}, },
"nbformat": 4, "nbformat": 4,