From f4e46c4588c138e6d7232605be5ec02567f2735f Mon Sep 17 00:00:00 2001
From: Jakub Pokrywka <jakubpokrywka@gmail.com>
Date: Tue, 22 Mar 2022 21:56:40 +0100
Subject: [PATCH] add 03

---
 cw/03a_tfidf.ipynb           | 137 +++++------------------
 cw/03b_tfidf_newsgroup.ipynb | 209 +++++++++++++++++++++++++++++++++--
 2 files changed, 228 insertions(+), 118 deletions(-)

diff --git a/cw/03a_tfidf.ipynb b/cw/03a_tfidf.ipynb
index 292afe7..bb85f01 100644
--- a/cw/03a_tfidf.ipynb
+++ b/cw/03a_tfidf.ipynb
@@ -64,6 +64,14 @@
     "- czy możemy ztokenizować tekst np. documents.split(' ') jakie wystąpią wtedy problemy?"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### ODPOWIEDŹ\n",
+    "- lepiej użyć preprocessingu i dopiero później tokenizacji"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -295,15 +303,6 @@
     "vocabulary"
    ]
   },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## PYTANIA\n",
-    "\n",
-    "jak będzie słowo \"jak\" w reprezentacji wektorowej TF?"
-   ]
-  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -359,13 +358,6 @@
     "    pass"
    ]
   },
-  {
-   "cell_type": "code",
-   "execution_count": 18,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
   {
    "cell_type": "code",
    "execution_count": 19,
@@ -860,6 +852,9 @@
    ],
    "source": [
     "# dlatego potrzebujemy mianownik w cosine similarity\n",
+    "# dłuższe dokumenty, w który raz wystąpie słowo rower są gorzej punktowane od\n",
+    "# krótszych. Jeżeli słowo rower wystąpiło w bardzo krótki dokumencie, to znaczy\n",
+    "# że jest większe prawdopodobieństwo że dokument jest o rowerze\n",
     "query = 'rowerze'\n",
     "for i in range(len(documents)):\n",
     "    display(documents[i])\n",
@@ -965,7 +960,8 @@
     }
    ],
    "source": [
-    "# dlatego potrzebujemy term frequency → wiecej znaczy bardziej dopasowany dokument\n",
+    "# dlatego potrzebujemy term frequency → wiecej wystąpień słowa w dokumencie\n",
+    "# znaczy bardziej dopasowany dokument\n",
     "query = 'i'\n",
     "for i in range(len(documents)):\n",
     "    display(documents[i])\n",
@@ -974,104 +970,26 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 36,
+   "execution_count": 3,
    "metadata": {
-    "scrolled": true
+    "scrolled": false
    },
    "outputs": [
     {
-     "data": {
-      "text/plain": [
-       "'Ala lubi zwierzęta i ma kota oraz psa!'"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "0.24999999999999994"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "'Ola lubi zwierzęta oraz ma kota a także chomika!'"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "0.2357022603955158"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "'I Jan jeździ na rowerze.'"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "0.31622776601683794"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "'2 wojna światowa była wielkim konfliktem zbrojnym'"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "0.0"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "'Tomek lubi psy, ma psa  i jeździ na motorze i rowerze.'"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "data": {
-      "text/plain": [
-       "0.39223227027636803"
-      ]
-     },
-     "metadata": {},
-     "output_type": "display_data"
+     "ename": "NameError",
+     "evalue": "name 'documents' is not defined",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
+      "\u001b[0;32m<ipython-input-3-ca637083c8f1>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m      2\u001b[0m \u001b[0;31m# słowo chomik ma większą wagę od i, ponieważ występuje w mniejszej ilości dokumentów\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      3\u001b[0m \u001b[0mquery\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m'i chomika'\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m \u001b[0;32mfor\u001b[0m \u001b[0mi\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mrange\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdocuments\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      5\u001b[0m     \u001b[0mdisplay\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdocuments\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      6\u001b[0m     \u001b[0mdisplay\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0msimilarity\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtransform_query\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mquery\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdocuments_vectorized\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mi\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
+      "\u001b[0;31mNameError\u001b[0m: name 'documents' is not defined"
+     ]
     }
    ],
    "source": [
     "# dlatego IDF - żeby ważniejsze słowa miał większą wagę\n",
+    "# słowo chomik ma większą wagę od i, ponieważ występuje w mniejszej ilości dokumentów\n",
     "query = 'i chomika'\n",
     "for i in range(len(documents)):\n",
     "    display(documents[i])\n",
@@ -1081,7 +999,10 @@
   {
    "cell_type": "markdown",
    "metadata": {},
-   "source": []
+   "source": [
+    "### Uwaga\n",
+    "Powyższe przykłady pokazują score dokuemntu. Aby zrobić wyszukiwarkę, powinniśmy posortować te dokumenty po score (od największego) i zaprezentwoać w tej kolejności."
+   ]
   }
  ],
  "metadata": {
diff --git a/cw/03b_tfidf_newsgroup.ipynb b/cw/03b_tfidf_newsgroup.ipynb
index 1967e81..05f3b7d 100644
--- a/cw/03b_tfidf_newsgroup.ipynb
+++ b/cw/03b_tfidf_newsgroup.ipynb
@@ -341,9 +341,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "query_str = 'speed'\n",
+    "#query_str = 'speed'\n",
     "#query_str = 'speed car'\n",
-    "#query_str = 'spider man'"
+    "query_str = 'spider man'"
    ]
   },
   {
@@ -385,7 +385,7 @@
      "data": {
       "text/plain": [
        "<1x130107 sparse matrix of type '<class 'numpy.float64'>'\n",
-       "\twith 1 stored elements in Compressed Sparse Row format>"
+       "\twith 2 stored elements in Compressed Sparse Row format>"
       ]
      },
      "execution_count": 17,
@@ -414,7 +414,7 @@
     {
      "data": {
       "text/plain": [
-       "array([0.26949927, 0.3491801 , 0.44292083, 0.47784165])"
+       "array([0.17360013, 0.22933014, 0.28954818, 0.45372239])"
       ]
      },
      "execution_count": 19,
@@ -434,7 +434,7 @@
     {
      "data": {
       "text/plain": [
-       "array([4517, 5509, 2116, 9921])"
+       "array([ 2455,  8920,  5497, 11031])"
       ]
      },
      "execution_count": 20,
@@ -448,11 +448,185 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 21,
    "metadata": {
     "scrolled": true
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "From: keiths@spider.co.uk (Keith Smith)\n",
+      "Subject: win/NT file systems\n",
+      "Organization: Spider Systems Limited, Edinburgh, UK.\n",
+      "Lines: 6\n",
+      "Nntp-Posting-Host: trapdoor.spider.co.uk\n",
+      "\n",
+      "OK will some one out there tell me why / how DOS 5\n",
+      "can read (I havn't tried writing in case it breaks something)\n",
+      "the Win/NT NTFS file system.\n",
+      "I thought NTFS was supposed to be better than the FAT system\n",
+      "\n",
+      "keith\n",
+      "\n",
+      "0.4537223924558256\n",
+      "----------------------------------------------------------------------------------------------------\n",
+      "----------------------------------------------------------------------------------------------------\n",
+      "----------------------------------------------------------------------------------------------------\n",
+      "From: brandt@cs.unc.edu (Andrew Brandt)\n",
+      "Subject: Seeking good Alfa Romeo mechanic.\n",
+      "Organization: The University of North Carolina at Chapel Hill\n",
+      "Lines: 14\n",
+      "NNTP-Posting-Host: axon.cs.unc.edu\n",
+      "Keywords: alfa, romeo, spider, mechanic\n",
+      "\n",
+      "I am looking for recommendations for a good (great?) Alfa Romeo\n",
+      "mechanic in South Jersey or Philadelphia or nearby.\n",
+      "\n",
+      "I have a '78 Alfa Spider that needs some engine, tranny, steering work\n",
+      "done.  The body is in quite good shape.  The car is awful in cold\n",
+      "weather, won't start if below freezing (I know, I know, why drive a\n",
+      "Spider if there's snow on the ground ...).  It has Bosch *mechanical*\n",
+      "fuel injection that I am sure needs adjustment.\n",
+      "\n",
+      "Any opinions are welcome on what to look for or who to call.\n",
+      "\n",
+      "Email or post (to rec.autos), I will summarize if people want.\n",
+      "\n",
+      "Thx, Andy (brandt@cs.unc.edu)\n",
+      "\n",
+      "0.28954817869991817\n",
+      "----------------------------------------------------------------------------------------------------\n",
+      "----------------------------------------------------------------------------------------------------\n",
+      "----------------------------------------------------------------------------------------------------\n",
+      "From: michaelr@spider.co.uk (Michael S. A. Robb)\n",
+      "Subject: Re: Honors Degrees: Do they mean anything?\n",
+      "Organization: Spider Systems Limited, Edinburgh, UK.\n",
+      "Lines: 44\n",
+      "\n",
+      "In article <TKLD.93Apr2123341@burns.cogsci.ed.ac.uk> tkld@cogsci.ed.ac.uk (Kevin Davidson) writes:\n",
+      ">\n",
+      ">>   In my opinion, a programming degree is still worth having.\n",
+      ">\n",
+      "> Yes, but a CS degree is *not* a programming degree. Does anybody know of\n",
+      ">a computing course where *programming* is taught ? Computer Science is\n",
+      ">a branch of maths (or the course I did was).\n",
+      "> I've also done a Software Engineering course - much more practical and likely\n",
+      ">to be the sort of thing an employer really wants, rather than what they think\n",
+      ">they want, but also did not teach programming. The ability to program was\n",
+      ">an entry requirement.\n",
+      "\n",
+      "At Robert Gordon University, programming was the main (most time-consuming) \n",
+      "start of the course. The first two years consisted of five subjects:\n",
+      "Software Engineering (Pascal/C/UNIX), Computer Engineering (6502/6809/68000 \n",
+      "assembler), Computer Theory (LISP/Prolog), Mathematics/Statistics and \n",
+      "Communication Skills (How to pass interviews/intelligence tests and group\n",
+      "discussions e.g. How to survive a helicopter crash in the North Sea).\n",
+      "The third year (Industrial placement) was spent working for a computer company \n",
+      "for a year. The company could be anywhere in Europe (there was a special \n",
+      "Travel Allowance Scheme to cover the visiting costs of professors).  \n",
+      "The fourth year included Operating Systems(C/Modula-2), Software Engineering \n",
+      "(C/8086 assembler), Real Time Laboratory (C/68000 assembler) and Computing \n",
+      "Theory (LISP).  There were also Group Projects in 2nd and 4th Years, where \n",
+      "students worked in teams to select their own project or decide to work for an \n",
+      "outside company (the only disadvantage being that specifications would change \n",
+      "suddenly).\n",
+      " \n",
+      "In the first four years, there was a 50%:50% weighting between courseworks and \n",
+      "exams for most subjects. However in the Honours year, this was reduced to a \n",
+      "30%:70% split between an Individual Project and final exams (no coursework \n",
+      "assessment) - are all Computer Science courses like this?\n",
+      "\n",
+      "BTW - we started off with 22 students in our first year and were left with 8 by\n",
+      "Honours year. Also, every course is tutored separately. Not easy trying\n",
+      "to sleep when you are in 8 student class :-). \n",
+      "\n",
+      "Cheers,\n",
+      "  Michael \n",
+      "-- \n",
+      "| Michael S. A. Robb     | Tel: +44 31 554 9424  | \"..The problem with bolt-on\n",
+      "| Software Engineer      | Fax: +44 31 554 0649  |  software is making sure the\n",
+      "| Spider Systems Limited | E-mail:               |  bolts are the right size..\"\n",
+      "| Edinburgh, EH6 5NG     | michaelr@spider.co.uk |             - Anonymous\n",
+      "\n",
+      "0.22933013891071233\n",
+      "----------------------------------------------------------------------------------------------------\n",
+      "----------------------------------------------------------------------------------------------------\n",
+      "----------------------------------------------------------------------------------------------------\n",
+      "From: jrm@elm.circa.ufl.edu (Jeff Mason)\n",
+      "Subject: AUCTION: Marvel, DC, Valiant, Image, Dark Horse, etc...\n",
+      "Organization: Univ. of Florida Psychology Dept.\n",
+      "Lines: 59\n",
+      "NNTP-Posting-Host: elm.circa.ufl.edu\n",
+      "\n",
+      "I am auctioning off the following comics.  These minimum bids are set\n",
+      "below what I would normally sell them for.  Make an offer, and I will\n",
+      "accept the highest bid after the auction has been completed.\n",
+      "\n",
+      "TITLE                                                   Minimum/Current \n",
+      "--------------------------------------------------------------\n",
+      "Alpha Flight 51 (Jim Lee's first work at Marvel)\t$ 5.00\n",
+      "Aliens 1 (1st app Aliens in comics, 1st prnt, May 1988)\t$20.00\n",
+      "Amazing Spider-Man 136 (Intro new Green Goblin)         $20.00\n",
+      "Amazing Spider-Man 238 (1st appearance Hobgoblin)\t$50.00\n",
+      "Archer and Armstrong 1 (Frank Miller/Smith/Layton)\t$ 7.50\n",
+      "Avengers 263 (1st appearance X-factor)                  $ 3.50\n",
+      "Bloodshot 1 (Chromium cover, BWSmith Cover/Poster)\t$ 5.00\n",
+      "Daredevil 158 (Frank Miller art begins)                 $35.00\n",
+      "Dark Horse Presents 1 (1st app Concrete, 1st printing)\t$ 7.50 \n",
+      "H.A.R.D. Corps 1 \t\t\t\t\t$ 5.00\n",
+      "Incredible Hulk 324 (1st app Grey Hulk since #1, 1962)\t$ 7.50\n",
+      "Incredible Hulk 330 (1st McFarlane issue)\t\t$15.00\n",
+      "Incredible Hulk 331 (Grey Hulk series begins)\t\t$11.20\t\n",
+      "Incredible Hulk 367 (1st Dale Keown art in Hulk)        $15.00\n",
+      "Incredible Hulk 377 (1st all new hulk, 1st prnt, Keown) $15.00\n",
+      "Marvel Comics Presents 1 (Wolverine, Silver Surfer)     $ 7.50\n",
+      "Maxx Limited Ashcan (4000 copies exist, blue cover)\t$30.00\n",
+      "New Mutants 86 (McFarlane cover, 1st app Cable - cameo)\t$10.00\n",
+      "New Mutants 100 (1st app X-Force)                       $ 5.00\n",
+      "New Mutants Annual 5 (1st Liefeld art on New Mutants)\t$10.00\n",
+      "Omega Men 3 (1st appearance Lobo)                       $ 7.50\n",
+      "Omega Men 10 (1st full Lobo story)                      $ 7.50\n",
+      "Power Man & Iron Fist 78 (3rd appearance Sabretooth)    $25.00\n",
+      "                      84 (4th appearance Sabretooth)    $20.00\n",
+      "Simpsons Comics and Stories 1 (Polybagged special ed.)\t$ 7.50\n",
+      "Spectacular Spider-Man 147 (1st app New Hobgoblin)      $12.50\n",
+      "Star Trek the Next Generation 1 (Feb 1988, DC mini)     $ 7.50\n",
+      "Star Trek the Next Generation 1 (Oct 1989, DC comics)   $ 7.50\n",
+      "Web of Spider-Man 29 (Hobgoblin, Wolverine appear)      $10.00 \n",
+      "Web of Spider-Man 30 (Origin Rose, Hobgoblin appears)   $ 7.50\n",
+      "Wolverine 10 (Before claws, 1st battle with Sabretooth)\t$15.00\n",
+      "Wolverine 41 (Sabretooth claims to be Wolverine's dad)\t$ 5.00\n",
+      "Wolverine 42 (Sabretooth proven not to be his dad)\t$ 3.50\n",
+      "Wolverine 43 (Sabretooth/Wolverine saga concludes)\t$ 3.00\n",
+      "Wolverine 1 (1982 mini-series, Miller art)\t\t$20.00\n",
+      "Wonder Woman 267 (Return of Animal Man)                 $12.50\n",
+      "X-Force 1 (Signed by Liefeld, Bagged, X-Force card)     $20.00\n",
+      "X-Force 1 (Signed by Liefeld, Bagged, Shatterstar card) $10.00\n",
+      "X-Force 1 (Signed by Liefeld, Bagged, Deadpool card)    $10.00\n",
+      "X-Force 1 (Signed by Liefeld, Bagged, Sunspot/Gideon)   $10.00\n",
+      "\n",
+      "All comics are in near mint to mint condition, are bagged in shiny \n",
+      "polypropylene bags, and backed with white acid free boards.  Shipping is\n",
+      "$1.50 for one book, $3.00 for more than one book, or free if you order \n",
+      "a large enough amount of stuff.  I am willing to haggle.\n",
+      "\n",
+      "I have thousands and thousands of other comics, so please let me know what \n",
+      "you've been looking for, and maybe I can help.  Some titles I have posted\n",
+      "here don't list every issue I have of that title, I tried to save space.\n",
+      "-- \n",
+      "Geoffrey R. Mason\t\t|\tjrm@elm.circa.ufl.edu\n",
+      "Department of Psychology\t|\tmason@webb.psych.ufl.edu\n",
+      "University of Florida\t\t|\tprothan@maple.circa.ufl.edu\n",
+      "\n",
+      "0.17360012846950526\n",
+      "----------------------------------------------------------------------------------------------------\n",
+      "----------------------------------------------------------------------------------------------------\n",
+      "----------------------------------------------------------------------------------------------------\n"
+     ]
+    }
+   ],
    "source": [
     "for i in range (1,5):\n",
     "    print(newsgroups[similarities.argsort()[0][-i]])\n",
@@ -462,6 +636,14 @@
     "    print('-'*100)"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### analiza\n",
+    "Dla frazy \"spider man\" (komórka 14) wynik zapytania jest niesatysfakcjonujący, ponieważ pierwszy artykuł nie jest o spider-man'ie, ale zawiera tylko słowa \"spider\". Po zmianie metody wektoryzacji (komórka 8) jako pierwszy wynik pojawia się istotnie film o spider manie (proszę to sprawdzić samodzielnie). Wynika to z faktu, że używamy również bigramów. W ten sposób poprawiliśmy wyszukiwarkę dla tego konkretnego przykładu (chociaż nie wiemy czy nie popsuliśmy wyszukiwarki w innym przypadku- w tym ćwiczeniu nie przejmujemy się tym)"
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -469,10 +651,10 @@
     "## Zadanie domowe\n",
     "\n",
     "\n",
-    "- Wybrać zbiór tekstowy, który ma conajmniej 10000 dokumentów (inny niż w tym przykładzie).\n",
-    "- Na jego podstawie stworzyć wyszukiwarkę bazującą na OKAPI BM25, tzn. system który dla podanej frazy podaje kilka (5-10) posortowanych najbardziej pasujących dokumentów razem ze scorami. Należy wypisywać też ilość zwracanych dokumentów, czyli takich z niezerowym scorem. Można korzystać z gotowych bibliotek do wektoryzacji dokumentów, należy jednak samemu zaimplementować OKAPI BM25. \n",
+    "- Wybrać zbiór tekstowy, który ma conajmniej 10_000 dokumentów (inny niż w tym przykładzie).\n",
+    "- Na jego podstawie stworzyć wyszukiwarkę bazującą na OKAPI BM25, tzn. system który dla podanej frazy podaje kilka (5-10) posortowanych najbardziej pasujących dokumentów razem ze scorami. Należy wypisywać też ilość zwracanych dokumentów, czyli takich z niezerowym scorem. Można korzystać z gotowych bibliotek do wektoryzacji dokumentów, należy jednak samemu zaimplementować OKAPI BM25. Można użyć dowolnych parametrów TF-IDF\n",
     "- Znaleźć frazę (query), dla której wynik nie jest satysfakcjonujący.\n",
-    "- Poprawić wyszukiwarkę (np. poprzez zmianę preprocessingu tekstu, wektoryzer, zmianę parametrów algorytmu rankującego lub sam algorytm) tak, żeby zwracała satysfakcjonujące wyniki dla poprzedniej frazy. Należy zrobić inną zmianę niż w tym przykładzie, tylko wymyślić coś własnego.\n",
+    "- Poprawić wyszukiwarkę (np. poprzez zmianę preprocessingu tekstu, wektoryzer, zmianę parametrów algorytmu rankującego lub sam algorytm) tak, żeby zwracała satysfakcjonujące wyniki dla poprzedniej frazy. Należy zrobić inną zmianę niż w powyższym przykładzie (czyli coś innego niż użycie bigramów), tylko wymyślić coś własnego.\n",
     "- prezentować pracę na zajęciach (06.04) odpowiadając na pytania:\n",
     " - jak wygląda zbiór i system wyszukiwania przed zmianami\n",
     " - dla jakiej frazy wyniki są niesatysfakcjonujące (pokazać wyniki)\n",
@@ -483,6 +665,13 @@
     "Prezentacja powinna być maksymalnie prosta i trwać maksymalnie 2-3 minuty.\n",
     "punktów do zdobycia: 70\n"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {