From f8f21aea0a5a9825423255f198126b8692db32ba Mon Sep 17 00:00:00 2001 From: Jakub Pokrywka Date: Wed, 24 Mar 2021 12:38:00 +0100 Subject: [PATCH] move 03 solutions to solutions --- cw/03a_tfidf_tasks.ipynb | 35 +++--------------------- cw/03a_tfidf_tasks_ODPOWIEDZI.ipynb | 41 +++++++++++++++++++++++++++-- 2 files changed, 43 insertions(+), 33 deletions(-) diff --git a/cw/03a_tfidf_tasks.ipynb b/cw/03a_tfidf_tasks.ipynb index 24b36fa..518cf5f 100644 --- a/cw/03a_tfidf_tasks.ipynb +++ b/cw/03a_tfidf_tasks.ipynb @@ -309,22 +309,6 @@ " pass" ] }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [], - "source": [ - "def word_to_index(word):\n", - " vec = np.zeros(len(vocabulary))\n", - " if word in vocabulary:\n", - " idx = vocabulary.index(word)\n", - " vec[idx] = 1\n", - " else:\n", - " vec[-1] = 1\n", - " return vec" - ] - }, { "cell_type": "code", "execution_count": 16, @@ -368,16 +352,7 @@ "execution_count": 18, "metadata": {}, "outputs": [], - "source": [ - "def tf(document):\n", - " document_vector = None\n", - " for word in document:\n", - " if document_vector is None:\n", - " document_vector = word_to_index(word)\n", - " else:\n", - " document_vector += word_to_index(word)\n", - " return document_vector" - ] + "source": [] }, { "cell_type": "code", @@ -495,14 +470,12 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def similarity(query, document):\n", - " numerator = np.sum(query * document)\n", - " denominator = np.sqrt(np.sum(query*query)) * np.sqrt(np.sum(document*document)) \n", - " return numerator / denominator" + " pass" ] }, { @@ -1117,7 +1090,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.5" + "version": "3.8.3" } }, "nbformat": 4, diff --git a/cw/03a_tfidf_tasks_ODPOWIEDZI.ipynb b/cw/03a_tfidf_tasks_ODPOWIEDZI.ipynb index 956cbd9..6554361 100644 --- a/cw/03a_tfidf_tasks_ODPOWIEDZI.ipynb +++ b/cw/03a_tfidf_tasks_ODPOWIEDZI.ipynb @@ -1,11 +1,48 @@ { "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "def word_to_index(word):\n", + " vec = np.zeros(len(vocabulary))\n", + " if word in vocabulary:\n", + " idx = vocabulary.index(word)\n", + " vec[idx] = 1\n", + " else:\n", + " vec[-1] = 1\n", + " return vec" + ] + }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "def tf(document):\n", + " document_vector = None\n", + " for word in document:\n", + " if document_vector is None:\n", + " document_vector = word_to_index(word)\n", + " else:\n", + " document_vector += word_to_index(word)\n", + " return document_vector" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def similarity(query, document):\n", + " numerator = np.sum(query * document)\n", + " denominator = np.sqrt(np.sum(query*query)) * np.sqrt(np.sum(document*document)) \n", + " return numerator / denominator" + ] } ], "metadata": { @@ -24,7 +61,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.5" + "version": "3.8.3" } }, "nbformat": 4,