From c8128a9e08745fdf3f542ccd698f6e707e64b828 Mon Sep 17 00:00:00 2001 From: Jakub Pokrywka Date: Wed, 23 Feb 2022 15:12:56 +0100 Subject: [PATCH] a --- .../01_Kodowanie_tekstu-checkpoint.ipynb | 17 +- cw/00_Informacje_na_temat_przedmiotu.ipynb | 9 + cw/01_Kodowanie_tekstu.ipynb | 238 +- cw/02_Jezyk.ipynb | 7547 +++++++++++++++++ cw/02_Język.ipynb | 117 - cw/03_statystyczny_model_językowy.ipynb | 176 + 6 files changed, 7909 insertions(+), 195 deletions(-) create mode 100644 cw/02_Jezyk.ipynb delete mode 100644 cw/02_Język.ipynb create mode 100644 cw/03_statystyczny_model_językowy.ipynb diff --git a/cw/.ipynb_checkpoints/01_Kodowanie_tekstu-checkpoint.ipynb b/cw/.ipynb_checkpoints/01_Kodowanie_tekstu-checkpoint.ipynb index 9ab48b3..033c435 100644 --- a/cw/.ipynb_checkpoints/01_Kodowanie_tekstu-checkpoint.ipynb +++ b/cw/.ipynb_checkpoints/01_Kodowanie_tekstu-checkpoint.ipynb @@ -12,7 +12,15 @@ "\n", "\n", "![Logo 2](https://git.wmi.amu.edu.pl/AITech/Szablon/raw/branch/master/Logotyp_AITech2.jpg)\n", - "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ "NR_INDEKSU = 375985" ] }, @@ -725,6 +733,13 @@ "- następnie wygeneruj z notebooka PDF (File → Download As → PDF via Latex).\n", "- notebook z kodem oraz PDF zamieść w zakładce zadań w MS TEAMS" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { diff --git a/cw/00_Informacje_na_temat_przedmiotu.ipynb b/cw/00_Informacje_na_temat_przedmiotu.ipynb index 68a5001..cb3bf6c 100644 --- a/cw/00_Informacje_na_temat_przedmiotu.ipynb +++ b/cw/00_Informacje_na_temat_przedmiotu.ipynb @@ -35,6 +35,15 @@ "## Literatura\n", "Polecana literatura do przedmiotu:\n", "\n", + "- Philipp Koehn. \"Neural Machine Translation\". 2020. (darmowa- https://www.cambridge.org/core/books/neural-machine-translation/7AAA628F88ADD64124EA008C425C0197)\n", + "- https://web.stanford.edu/~jurafsky/slp3/3.pdf\n", + "- Jacob Devlin, Ming-Wei Chang, Kenton Lee, Kristina Toutanova. 2019. BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding. North American Association for Computational Linguistics (NAACL).\n", + "- Colin Raffel, Noam Shazeer, Adam Roberts, Katherine Lee, Sharan Narang, Michael Matena, Yanqi Zhou, Wei Li, Peter J. Liu. 2020. Exploring the Limits of Transfer Learning with a Unified Text-to-Text Transformer. Journal of Machine Learning Research vol 21, number 140, pages 1-67.\n", + "- Radford, Alec and Wu, Jeff and Child, Rewon and Luan, David and Amodei, Dario and Sutskever, Ilya. 2019. Language Models are Unsupervised Multitask Learners\n", + "- https://jalammar.github.io/illustrated-transformer/\n", + "- https://www.youtube.com/watch?v=-9evrZnBorM&ab_channel=YannicKilcher\n", + "- https://www.youtube.com/watch?v=u1_qMdb0kYU&ab_channel=YannicKilcher\n", + "\n", "\n", "\n", "## Zaliczenie\n", diff --git a/cw/01_Kodowanie_tekstu.ipynb b/cw/01_Kodowanie_tekstu.ipynb index c64b76e..033c435 100644 --- a/cw/01_Kodowanie_tekstu.ipynb +++ b/cw/01_Kodowanie_tekstu.ipynb @@ -17,7 +17,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -52,33 +52,13 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "c = '⨃'" ] }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "10755" - ] - }, - "execution_count": 2, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "ord(c)" - ] - }, { "cell_type": "code", "execution_count": 3, @@ -87,7 +67,7 @@ { "data": { "text/plain": [ - "'⨃'" + "10755" ] }, "execution_count": 3, @@ -96,7 +76,7 @@ } ], "source": [ - "chr(10755)" + "ord(c)" ] }, { @@ -107,7 +87,7 @@ { "data": { "text/plain": [ - "0" + "'⨃'" ] }, "execution_count": 4, @@ -116,15 +96,7 @@ } ], "source": [ - "10755 - 2* 16**3 - 10* 16**2 - 0 * 16**1 - 3* 16**0" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "$10755_{10} = 2* 16^3 + 10* 16^2 + 0 * 16^1 + 3* 16^0 =$ U+2A03 \n", - "\n" + "chr(10755)" ] }, { @@ -143,6 +115,34 @@ "output_type": "execute_result" } ], + "source": [ + "10755 - 2* 16**3 - 10* 16**2 - 0 * 16**1 - 3* 16**0" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "$10755_{10} = 2* 16^3 + 10* 16^2 + 0 * 16^1 + 3* 16^0 =$ U+2A03 \n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "10755 - 1*2**13 - 0*2**12 - 1*2**11 - 0*2**10 - 1*2**9 -0*2**8 -0*2**7-0*2**6-0*2**5-0*2**4-0*2**3-0*2**2-0*2**1 - 1*2**1 - 1*2**0" ] @@ -156,7 +156,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 7, "metadata": {}, "outputs": [ { @@ -165,7 +165,7 @@ "14" ] }, - "execution_count": 6, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -176,7 +176,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 8, "metadata": {}, "outputs": [ { @@ -185,7 +185,7 @@ "'0010101000000011'" ] }, - "execution_count": 7, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -210,7 +210,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 9, "metadata": {}, "outputs": [ { @@ -219,7 +219,7 @@ "'11100010 10101000 10000011'" ] }, - "execution_count": 8, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -230,7 +230,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 10, "metadata": {}, "outputs": [], "source": [ @@ -239,7 +239,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 11, "metadata": {}, "outputs": [ { @@ -256,7 +256,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 12, "metadata": {}, "outputs": [ { @@ -273,7 +273,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 13, "metadata": {}, "outputs": [ { @@ -282,7 +282,7 @@ "'\\x0c'" ] }, - "execution_count": 12, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } @@ -302,19 +302,18 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 14, "metadata": {}, "outputs": [ { - "ename": "NameError", - "evalue": "name 'NR_INDEKSU' is not defined", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mchr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mNR_INDEKSU\u001b[0m \u001b[0;34m%\u001b[0m \u001b[0;36m100\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", - "\u001b[0;31mNameError\u001b[0m: name 'NR_INDEKSU' is not defined" - ] + "data": { + "text/plain": [ + "'U'" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ @@ -323,18 +322,40 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 15, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "'ϙ'" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "chr(NR_INDEKSU % 1000)" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 16, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "'\\U00012856'" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "chr(NR_INDEKSU % 100000 - 123)" ] @@ -362,7 +383,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 17, "metadata": {}, "outputs": [], "source": [ @@ -383,9 +404,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 18, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "00000000: 01111010 01100001 11000101 10111100 11000011 10110011 za....\r\n", + "00000006: 11000101 10000010 11000100 10000111 00100000 01100111 .... g\r\n", + "0000000c: 11000100 10011001 11000101 10011011 01101100 11000100 ....l.\r\n", + "00000012: 10000101 00100000 01101010 01100001 11000101 10111010 . ja..\r\n", + "00000018: 11000101 10000100 00001010 ...\r\n" + ] + } + ], "source": [ "!xxd -b '01_materialy/polski_tekst.txt'" ] @@ -401,7 +434,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 19, "metadata": {}, "outputs": [], "source": [ @@ -410,7 +443,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 20, "metadata": {}, "outputs": [], "source": [ @@ -419,7 +452,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 21, "metadata": {}, "outputs": [], "source": [ @@ -428,11 +461,22 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 22, "metadata": { "scrolled": false }, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "'00101100 00001010 01001010 01100001 01101011 00100000 01100010 01111001 11000101 10000010 00100000 01010011 01110100 01100101 01100110 01100101 01101011 00100000 01000010 01110101 01110010 01100011 01111010 01111001 01101101 01110101 01100011 01101000 01100001 11100010 10000000 10100110 00001010 11100010 10000000 10010100 00100000 01001010 01100001 00100000 01101110 01101001 01101011 01101111 01100111 01101111 00100000 01110011 01101001 11000100 10011001 00100000 01101110 01101001 01100101 00100000 01100010 01101111 01101010 11000100 10011001 00100001 00001010 01000011 01101000 01101111 11000100 10000111 01100010 01111001 00100000 01101110 01101001 01100101 01100100 11000101 10111010 01110111 01101001 01100101 01100100 11000101 10111010 11100010 10000000 10100110 00100000 01110100'" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "tekst" ] @@ -462,7 +506,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 23, "metadata": {}, "outputs": [], "source": [ @@ -471,7 +515,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 24, "metadata": {}, "outputs": [], "source": [ @@ -480,7 +524,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 25, "metadata": {}, "outputs": [], "source": [ @@ -489,11 +533,22 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 26, "metadata": { "scrolled": true }, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "'0x2e 0x20 0x31 0x36 0x37 0x30 0x2c 0x20 0x70 0x72 0x7a 0x65 0x64 0x20 0x75 0x70 0x61 0x64 0x6b 0x69 0x65 0x6d 0x20 0x4b 0x61 0x6d 0x69 0x65 0xc5 0x84 0x63 0x61 0x20 0x69 0x20 0x68 0x61 0x6e 0x69 0x65 0x62 0x6e 0x79 0x6d 0x69 0x20 0x75 0x6b 0xc5 0x82 0x61 0x64 0x61 0x6d 0x69 0x20 0x62 0x75 0x63 0x7a 0x61 0x63 0x6b 0x69 0x6d 0x69 0x2c 0x20 0x6b 0x74 0xc3 0xb3 0x72 0x65 0x20 0x6f 0x62 0x6f 0x77 0x69 0xc4 0x85 0x7a 0x79 0x77 0x61 0xc5 0x82'" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "tekst" ] @@ -583,7 +638,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 27, "metadata": {}, "outputs": [], "source": [ @@ -599,9 +654,20 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 28, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "['A', 'a', 'b', 'ce', 'cef', 'Ą', 'ą', 'ż']" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "sorted(przykladowa_lista)" ] @@ -615,9 +681,20 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 29, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "['A', 'Ą', 'a', 'ą', 'b', 'ce', 'cef', 'ż']" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "['A', 'Ą', 'a', 'ą' ,'b', 'ce', 'cef', 'ż']" ] @@ -656,6 +733,13 @@ "- następnie wygeneruj z notebooka PDF (File → Download As → PDF via Latex).\n", "- notebook z kodem oraz PDF zamieść w zakładce zadań w MS TEAMS" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { diff --git a/cw/02_Jezyk.ipynb b/cw/02_Jezyk.ipynb new file mode 100644 index 0000000..ffdad79 --- /dev/null +++ b/cw/02_Jezyk.ipynb @@ -0,0 +1,7547 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![Logo 1](https://git.wmi.amu.edu.pl/AITech/Szablon/raw/branch/master/Logotyp_AITech1.jpg)\n", + "
\n", + "

Ekstrakcja informacji

\n", + "

0. Jezyk [ćwiczenia]

\n", + "

Jakub Pokrywka (2022)

\n", + "
\n", + "\n", + "![Logo 2](https://git.wmi.amu.edu.pl/AITech/Szablon/raw/branch/master/Logotyp_AITech2.jpg)" + ] + }, + { + "cell_type": "code", + "execution_count": 278, + "metadata": {}, + "outputs": [], + "source": [ + "NR_INDEKSU = 375985" + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "metadata": {}, + "outputs": [], + "source": [ + "import random\n", + "import plotly.express as px\n", + "import numpy as np\n", + "import pandas as pd\n", + "import nltk" + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "program : program\n", + "programs : program\n", + "programmer : programm\n", + "programming : program\n", + "programmers : programm\n" + ] + } + ], + "source": [ + "ps = nltk.stem.PorterStemmer()\n", + "\n", + "for w in [\"program\", \"programs\", \"programmer\", \"programming\", \"programmers\"]:\n", + " print(w, \" : \", ps.stem(w))" + ] + }, + { + "cell_type": "code", + "execution_count": 77, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[nltk_data] Downloading package punkt to /home/kuba/nltk_data...\n", + "[nltk_data] Package punkt is already up-to-date!\n", + "[nltk_data] Downloading package stopwords to /home/kuba/nltk_data...\n", + "[nltk_data] Unzipping corpora/stopwords.zip.\n" + ] + }, + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 77, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "nltk.download('punkt')\n", + "nltk.download('stopwords')" + ] + }, + { + "cell_type": "code", + "execution_count": 78, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['Python',\n", + " 'is',\n", + " 'dynamically-typed',\n", + " 'and',\n", + " 'garbage-collected',\n", + " '.',\n", + " 'It',\n", + " 'supports',\n", + " 'multiple',\n", + " 'programming',\n", + " 'paradigms',\n", + " ',',\n", + " 'including',\n", + " 'structured',\n", + " '(',\n", + " 'particularly',\n", + " ',',\n", + " 'procedural',\n", + " ')',\n", + " ',',\n", + " 'object-oriented',\n", + " 'and',\n", + " 'functional',\n", + " 'programming',\n", + " '.',\n", + " 'It',\n", + " 'is',\n", + " 'often',\n", + " 'described',\n", + " 'as',\n", + " 'a',\n", + " '``',\n", + " 'batteries',\n", + " 'included',\n", + " \"''\",\n", + " 'language',\n", + " 'due',\n", + " 'to',\n", + " 'its',\n", + " 'comprehensive',\n", + " 'standard',\n", + " 'library',\n", + " '.']" + ] + }, + "execution_count": 78, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "text = \"\"\"Python is dynamically-typed and garbage-collected. It supports multiple programming paradigms, including structured (particularly, procedural), object-oriented and functional programming. It is often described as a \"batteries included\" language due to its comprehensive standard library.\"\"\"\n", + "nltk.tokenize.word_tokenize(text)" + ] + }, + { + "cell_type": "code", + "execution_count": 79, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['Python is dynamically-typed and garbage-collected.',\n", + " 'It supports multiple programming paradigms, including structured (particularly, procedural), object-oriented and functional programming.',\n", + " 'It is often described as a \"batteries included\" language due to its comprehensive standard library.']" + ] + }, + "execution_count": 79, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "nltk.tokenize.sent_tokenize(text)" + ] + }, + { + "cell_type": "code", + "execution_count": 80, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['aber',\n", + " 'alle',\n", + " 'allem',\n", + " 'allen',\n", + " 'aller',\n", + " 'alles',\n", + " 'als',\n", + " 'also',\n", + " 'am',\n", + " 'an',\n", + " 'ander',\n", + " 'andere',\n", + " 'anderem',\n", + " 'anderen',\n", + " 'anderer',\n", + " 'anderes',\n", + " 'anderm',\n", + " 'andern',\n", + " 'anderr',\n", + " 'anders',\n", + " 'auch',\n", + " 'auf',\n", + " 'aus',\n", + " 'bei',\n", + " 'bin',\n", + " 'bis',\n", + " 'bist',\n", + " 'da',\n", + " 'damit',\n", + " 'dann',\n", + " 'der',\n", + " 'den',\n", + " 'des',\n", + " 'dem',\n", + " 'die',\n", + " 'das',\n", + " 'dass',\n", + " 'daß',\n", + " 'derselbe',\n", + " 'derselben',\n", + " 'denselben',\n", + " 'desselben',\n", + " 'demselben',\n", + " 'dieselbe',\n", + " 'dieselben',\n", + " 'dasselbe',\n", + " 'dazu',\n", + " 'dein',\n", + " 'deine',\n", + " 'deinem',\n", + " 'deinen',\n", + " 'deiner',\n", + " 'deines',\n", + " 'denn',\n", + " 'derer',\n", + " 'dessen',\n", + " 'dich',\n", + " 'dir',\n", + " 'du',\n", + " 'dies',\n", + " 'diese',\n", + " 'diesem',\n", + " 'diesen',\n", + " 'dieser',\n", + " 'dieses',\n", + " 'doch',\n", + " 'dort',\n", + " 'durch',\n", + " 'ein',\n", + " 'eine',\n", + " 'einem',\n", + " 'einen',\n", + " 'einer',\n", + " 'eines',\n", + " 'einig',\n", + " 'einige',\n", + " 'einigem',\n", + " 'einigen',\n", + " 'einiger',\n", + " 'einiges',\n", + " 'einmal',\n", + " 'er',\n", + " 'ihn',\n", + " 'ihm',\n", + " 'es',\n", + " 'etwas',\n", + " 'euer',\n", + " 'eure',\n", + " 'eurem',\n", + " 'euren',\n", + " 'eurer',\n", + " 'eures',\n", + " 'für',\n", + " 'gegen',\n", + " 'gewesen',\n", + " 'hab',\n", + " 'habe',\n", + " 'haben',\n", + " 'hat',\n", + " 'hatte',\n", + " 'hatten',\n", + " 'hier',\n", + " 'hin',\n", + " 'hinter',\n", + " 'ich',\n", + " 'mich',\n", + " 'mir',\n", + " 'ihr',\n", + " 'ihre',\n", + " 'ihrem',\n", + " 'ihren',\n", + " 'ihrer',\n", + " 'ihres',\n", + " 'euch',\n", + " 'im',\n", + " 'in',\n", + " 'indem',\n", + " 'ins',\n", + " 'ist',\n", + " 'jede',\n", + " 'jedem',\n", + " 'jeden',\n", + " 'jeder',\n", + " 'jedes',\n", + " 'jene',\n", + " 'jenem',\n", + " 'jenen',\n", + " 'jener',\n", + " 'jenes',\n", + " 'jetzt',\n", + " 'kann',\n", + " 'kein',\n", + " 'keine',\n", + " 'keinem',\n", + " 'keinen',\n", + " 'keiner',\n", + " 'keines',\n", + " 'können',\n", + " 'könnte',\n", + " 'machen',\n", + " 'man',\n", + " 'manche',\n", + " 'manchem',\n", + " 'manchen',\n", + " 'mancher',\n", + " 'manches',\n", + " 'mein',\n", + " 'meine',\n", + " 'meinem',\n", + " 'meinen',\n", + " 'meiner',\n", + " 'meines',\n", + " 'mit',\n", + " 'muss',\n", + " 'musste',\n", + " 'nach',\n", + " 'nicht',\n", + " 'nichts',\n", + " 'noch',\n", + " 'nun',\n", + " 'nur',\n", + " 'ob',\n", + " 'oder',\n", + " 'ohne',\n", + " 'sehr',\n", + " 'sein',\n", + " 'seine',\n", + " 'seinem',\n", + " 'seinen',\n", + " 'seiner',\n", + " 'seines',\n", + " 'selbst',\n", + " 'sich',\n", + " 'sie',\n", + " 'ihnen',\n", + " 'sind',\n", + " 'so',\n", + " 'solche',\n", + " 'solchem',\n", + " 'solchen',\n", + " 'solcher',\n", + " 'solches',\n", + " 'soll',\n", + " 'sollte',\n", + " 'sondern',\n", + " 'sonst',\n", + " 'über',\n", + " 'um',\n", + " 'und',\n", + " 'uns',\n", + " 'unsere',\n", + " 'unserem',\n", + " 'unseren',\n", + " 'unser',\n", + " 'unseres',\n", + " 'unter',\n", + " 'viel',\n", + " 'vom',\n", + " 'von',\n", + " 'vor',\n", + " 'während',\n", + " 'war',\n", + " 'waren',\n", + " 'warst',\n", + " 'was',\n", + " 'weg',\n", + " 'weil',\n", + " 'weiter',\n", + " 'welche',\n", + " 'welchem',\n", + " 'welchen',\n", + " 'welcher',\n", + " 'welches',\n", + " 'wenn',\n", + " 'werde',\n", + " 'werden',\n", + " 'wie',\n", + " 'wieder',\n", + " 'will',\n", + " 'wir',\n", + " 'wird',\n", + " 'wirst',\n", + " 'wo',\n", + " 'wollen',\n", + " 'wollte',\n", + " 'würde',\n", + " 'würden',\n", + " 'zu',\n", + " 'zum',\n", + " 'zur',\n", + " 'zwar',\n", + " 'zwischen']" + ] + }, + "execution_count": 80, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "nltk.corpus.stopwords.words('german')" + ] + }, + { + "cell_type": "code", + "execution_count": 84, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[('Python', 'is'), ('is', 'dynamically-typed'), ('dynamically-typed', 'and'), ('and', 'garbage-collected'), ('garbage-collected', '.'), ('.', 'It'), ('It', 'supports'), ('supports', 'multiple'), ('multiple', 'programming'), ('programming', 'paradigms'), ('paradigms', ','), (',', 'including'), ('including', 'structured'), ('structured', '('), ('(', 'particularly'), ('particularly', ','), (',', 'procedural'), ('procedural', ')'), (')', ','), (',', 'object-oriented'), ('object-oriented', 'and'), ('and', 'functional'), ('functional', 'programming'), ('programming', '.'), ('.', 'It'), ('It', 'is'), ('is', 'often'), ('often', 'described'), ('described', 'as'), ('as', 'a'), ('a', '``'), ('``', 'batteries'), ('batteries', 'included'), ('included', \"''\"), (\"''\", 'language'), ('language', 'due'), ('due', 'to'), ('to', 'its'), ('its', 'comprehensive'), ('comprehensive', 'standard'), ('standard', 'library'), ('library', '.')]\n" + ] + } + ], + "source": [ + "nltk_tokens = nltk.word_tokenize(text)\n", + "print(list(nltk.bigrams(nltk_tokens)))" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "metadata": { + "scrolled": true + }, + "outputs": [ + { + "data": { + "application/vnd.plotly.v1+json": { + "config": { + "plotlyServerURL": "https://plot.ly" + }, + "data": [ + { + "alignmentgroup": "True", + "hovertemplate": "słowo=%{x}
liczba=%{y}", + "legendgroup": "", + "marker": { + "color": "#636efa", + "pattern": { + "shape": "" + } + }, + "name": "", + "offsetgroup": "", + "orientation": "v", + "showlegend": false, + "textposition": "auto", + "type": "bar", + "x": [ + "ma", + "ala", + "psa", + "kota" + ], + "xaxis": "x", + "y": [ + 20, + 15, + 10, + 10 + ], + "yaxis": "y" + } + ], + "layout": { + "barmode": "relative", + "legend": { + "tracegroupgap": 0 + }, + "margin": { + "t": 60 + }, + "template": { + "data": { + "bar": [ + { + "error_x": { + "color": "#2a3f5f" + }, + "error_y": { + "color": "#2a3f5f" + }, + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + }, + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "bar" + } + ], + "barpolar": [ + { + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + }, + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "barpolar" + } + ], + "carpet": [ + { + "aaxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "baxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "type": "carpet" + } + ], + "choropleth": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "choropleth" + } + ], + "contour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "contour" + } + ], + "contourcarpet": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "contourcarpet" + } + ], + "heatmap": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmap" + } + ], + "heatmapgl": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmapgl" + } + ], + "histogram": [ + { + "marker": { + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "histogram" + } + ], + "histogram2d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2d" + } + ], + "histogram2dcontour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2dcontour" + } + ], + "mesh3d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "mesh3d" + } + ], + "parcoords": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "parcoords" + } + ], + "pie": [ + { + "automargin": true, + "type": "pie" + } + ], + "scatter": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatter" + } + ], + "scatter3d": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatter3d" + } + ], + "scattercarpet": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattercarpet" + } + ], + "scattergeo": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergeo" + } + ], + "scattergl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergl" + } + ], + "scattermapbox": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattermapbox" + } + ], + "scatterpolar": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolar" + } + ], + "scatterpolargl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolargl" + } + ], + "scatterternary": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterternary" + } + ], + "surface": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "surface" + } + ], + "table": [ + { + "cells": { + "fill": { + "color": "#EBF0F8" + }, + "line": { + "color": "white" + } + }, + "header": { + "fill": { + "color": "#C8D4E3" + }, + "line": { + "color": "white" + } + }, + "type": "table" + } + ] + }, + "layout": { + "annotationdefaults": { + "arrowcolor": "#2a3f5f", + "arrowhead": 0, + "arrowwidth": 1 + }, + "autotypenumbers": "strict", + "coloraxis": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "colorscale": { + "diverging": [ + [ + 0, + "#8e0152" + ], + [ + 0.1, + "#c51b7d" + ], + [ + 0.2, + "#de77ae" + ], + [ + 0.3, + "#f1b6da" + ], + [ + 0.4, + "#fde0ef" + ], + [ + 0.5, + "#f7f7f7" + ], + [ + 0.6, + "#e6f5d0" + ], + [ + 0.7, + "#b8e186" + ], + [ + 0.8, + "#7fbc41" + ], + [ + 0.9, + "#4d9221" + ], + [ + 1, + "#276419" + ] + ], + "sequential": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "sequentialminus": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ] + }, + "colorway": [ + "#636efa", + "#EF553B", + "#00cc96", + "#ab63fa", + "#FFA15A", + "#19d3f3", + "#FF6692", + "#B6E880", + "#FF97FF", + "#FECB52" + ], + "font": { + "color": "#2a3f5f" + }, + "geo": { + "bgcolor": "white", + "lakecolor": "white", + "landcolor": "#E5ECF6", + "showlakes": true, + "showland": true, + "subunitcolor": "white" + }, + "hoverlabel": { + "align": "left" + }, + "hovermode": "closest", + "mapbox": { + "style": "light" + }, + "paper_bgcolor": "white", + "plot_bgcolor": "#E5ECF6", + "polar": { + "angularaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "radialaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "scene": { + "xaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "yaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "zaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + } + }, + "shapedefaults": { + "line": { + "color": "#2a3f5f" + } + }, + "ternary": { + "aaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "baxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "caxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "title": { + "x": 0.05 + }, + "xaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + }, + "yaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + } + } + }, + "xaxis": { + "anchor": "y", + "domain": [ + 0, + 1 + ], + "title": { + "text": "słowo" + } + }, + "yaxis": { + "anchor": "x", + "domain": [ + 0, + 1 + ], + "title": { + "text": "liczba" + } + } + } + }, + "text/html": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "df = pd.DataFrame([['ma', 20], ['ala', 15], ['psa', 10], ['kota', 10]], columns=['słowo', 'liczba'])\n", + "fig = px.bar(df, x=\"słowo\", y=\"liczba\")\n", + "fig.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.plotly.v1+json": { + "config": { + "plotlyServerURL": "https://plot.ly" + }, + "data": [ + { + "alignmentgroup": "True", + "bingroup": "x", + "hovertemplate": "jezyk=polski
dlugosc=%{x}
count=%{y}", + "legendgroup": "", + "marker": { + "color": "#636efa", + "pattern": { + "shape": "" + } + }, + "name": "", + "nbinsx": 50, + "offsetgroup": "", + "orientation": "v", + "showlegend": false, + "type": "histogram", + "x": [ + 8, + 4, + 3, + 8, + 7, + 2, + 6, + 7, + 7, + 6, + 3, + 9, + 11, + 9, + 7, + 2, + 10, + 6, + 1, + 2, + 1, + 1, + 2, + 8, + 5, + 2, + 1, + 3, + 1, + 2, + 8, + 1, + 6, + 3, + 4, + 11, + 1, + 2, + 4, + 1, + 6, + 4, + 5, + 4, + 6, + 2, + 5, + 4, + 5, + 6, + 14, + 2, + 3, + 6, + 2, + 6, + 3, + 8, + 11, + 2, + 6, + 4, + 17, + 6, + 5, + 1, + 6, + 2, + 6, + 2, + 7, + 3, + 6, + 2, + 2, + 3, + 11, + 3, + 1, + 1, + 8, + 12, + 4, + 8, + 3, + 5, + 3, + 5, + 5, + 1, + 8, + 2, + 15, + 1, + 3, + 1, + 6, + 1, + 7, + 5, + 1, + 7, + 11, + 2, + 4, + 1, + 1, + 1, + 3, + 3, + 3, + 1, + 2, + 1, + 2, + 1, + 13, + 3, + 2, + 1, + 1, + 6, + 2, + 7, + 3, + 4, + 7, + 5, + 12, + 3, + 6, + 5, + 2, + 1, + 2, + 4, + 1, + 1, + 4, + 1, + 1, + 1, + 5, + 3, + 3, + 9, + 9, + 1, + 3, + 1, + 4, + 5, + 9, + 2, + 1, + 1, + 2, + 1, + 3, + 1, + 1, + 4, + 2, + 1, + 2, + 8, + 1, + 2, + 6, + 1, + 5, + 1, + 6, + 2, + 3, + 15, + 4, + 11, + 2, + 14, + 3, + 2, + 10, + 1, + 13, + 5, + 3, + 9, + 8, + 13, + 7, + 12, + 1, + 5, + 6, + 1, + 1, + 10, + 9, + 1, + 6, + 16, + 5, + 2, + 2, + 8, + 2, + 8, + 1, + 1, + 7, + 1, + 2, + 1, + 18, + 10, + 5, + 3, + 8, + 14, + 5, + 1, + 5, + 10, + 27, + 1, + 4, + 3, + 1, + 11, + 4, + 7, + 7, + 1, + 1, + 1, + 10, + 3, + 3, + 2, + 2, + 1, + 7, + 3, + 3, + 1, + 6, + 11, + 5, + 6, + 7, + 2, + 1, + 2, + 4, + 8, + 9, + 2, + 1, + 3, + 6, + 8, + 6, + 11, + 2, + 3, + 16, + 2, + 2, + 1, + 22, + 4, + 4, + 12, + 8, + 3, + 5, + 3, + 2, + 9, + 4, + 5, + 5, + 1, + 2, + 1, + 1, + 9, + 6, + 1, + 5, + 2, + 7, + 8, + 16, + 17, + 6, + 2, + 3, + 3, + 7, + 4, + 5, + 5, + 2, + 1, + 4, + 4, + 9, + 1, + 2, + 5, + 3, + 1, + 9, + 2, + 1, + 3, + 2, + 3, + 15, + 1, + 2, + 13, + 7, + 2, + 2, + 8, + 7, + 4, + 1, + 3, + 7, + 4, + 6, + 2, + 6, + 7, + 4, + 4, + 1, + 4, + 1, + 1, + 2, + 1, + 6, + 13, + 6, + 11, + 14, + 31, + 6, + 5, + 7, + 10, + 3, + 12, + 2, + 1, + 3, + 1, + 9, + 12, + 8, + 1, + 13, + 6, + 2, + 4, + 2, + 8, + 3, + 8, + 10, + 6, + 32, + 4, + 1, + 5, + 1, + 5, + 5, + 6, + 1, + 1, + 5, + 8, + 8, + 3, + 1, + 10, + 2, + 4, + 8, + 15, + 7, + 11, + 1, + 2, + 1, + 7, + 5, + 1, + 5, + 3, + 2, + 2, + 11, + 2, + 17, + 2, + 5, + 14, + 16, + 8, + 9, + 9, + 3, + 1, + 4, + 2, + 2, + 6, + 4, + 3, + 3, + 3, + 3, + 7, + 8, + 2, + 4, + 7, + 3, + 1, + 3, + 5, + 4, + 4, + 1, + 4, + 4, + 3, + 5, + 2, + 2, + 1, + 3, + 2, + 2, + 22, + 9, + 3, + 1, + 8, + 2, + 10, + 6, + 6, + 2, + 12, + 5, + 4, + 1, + 11, + 16, + 11, + 2, + 1, + 3, + 8, + 5, + 2, + 6, + 6, + 5, + 1, + 3, + 4, + 3, + 8, + 7, + 3, + 7, + 3, + 6, + 1, + 1, + 2, + 2, + 12, + 8, + 2, + 2, + 7, + 3, + 1, + 9, + 1, + 1, + 1, + 7, + 1, + 4, + 2, + 2, + 1, + 1, + 6, + 7, + 7, + 10, + 5, + 4, + 6, + 1, + 1, + 9, + 3, + 3, + 4, + 5, + 4, + 3, + 1, + 2, + 9, + 2, + 7, + 2, + 10, + 7, + 1, + 2, + 2, + 4, + 2, + 13, + 8, + 5, + 15, + 3, + 10, + 3, + 7, + 3, + 3, + 2, + 7, + 1, + 30, + 1, + 5, + 3, + 2, + 8, + 1, + 7, + 7, + 3, + 7, + 1, + 9, + 7, + 14, + 1, + 5, + 2, + 3, + 1, + 2, + 11, + 4, + 7, + 5, + 1, + 2, + 1, + 5, + 7, + 4, + 3, + 1, + 3, + 1, + 5, + 2, + 4, + 2, + 1, + 7, + 4, + 1, + 10, + 5, + 3, + 1, + 1, + 1, + 10, + 6, + 2, + 3, + 3, + 5, + 2, + 1, + 6, + 2, + 5, + 3, + 2, + 1, + 15, + 7, + 10, + 14, + 3, + 1, + 5, + 2, + 1, + 4, + 1, + 2, + 3, + 4, + 6, + 12, + 2, + 4, + 1, + 9, + 2, + 8, + 7, + 3, + 18, + 10, + 3, + 6, + 8, + 1, + 8, + 1, + 5, + 6, + 1, + 1, + 5, + 5, + 14, + 1, + 4, + 2, + 1, + 2, + 1, + 7, + 3, + 1, + 2, + 5, + 3, + 11, + 3, + 4, + 2, + 7, + 29, + 3, + 3, + 1, + 10, + 3, + 4, + 2, + 3, + 12, + 2, + 1, + 2, + 1, + 14, + 2, + 5, + 1, + 2, + 13, + 3, + 4, + 1, + 2, + 3, + 2, + 3, + 1, + 2, + 1, + 2, + 8, + 12, + 5, + 5, + 2, + 6, + 9, + 1, + 11, + 1, + 8, + 1, + 2, + 2, + 2, + 8, + 9, + 3, + 1, + 4, + 3, + 1, + 4, + 10, + 1, + 4, + 6, + 3, + 1, + 13, + 18, + 4, + 1, + 1, + 2, + 8, + 3, + 1, + 1, + 5, + 6, + 22, + 13, + 7, + 1, + 12, + 3, + 2, + 1, + 5, + 3, + 6, + 6, + 2, + 2, + 15, + 16, + 3, + 3, + 13, + 1, + 4, + 5, + 1, + 2, + 2, + 23, + 10, + 5, + 9, + 2, + 1, + 2, + 5, + 2, + 2, + 2, + 7, + 3, + 3, + 1, + 3, + 1, + 5, + 1, + 1, + 4, + 1, + 2, + 1, + 6, + 14, + 2, + 6, + 6, + 6, + 1, + 2, + 10, + 7, + 7, + 2, + 5, + 1, + 2, + 5, + 10, + 2, + 1, + 1, + 7, + 2, + 1, + 14, + 3, + 2, + 18, + 3, + 5, + 1, + 6, + 3, + 4, + 3, + 6, + 7, + 5, + 6, + 11, + 7, + 6, + 1, + 2, + 1, + 1, + 4, + 3, + 2, + 4, + 8, + 5, + 4, + 4, + 3, + 2, + 4, + 5, + 4, + 1, + 2, + 3, + 3, + 17, + 4, + 8, + 4, + 7, + 4, + 2, + 20, + 1, + 1, + 7, + 7, + 3, + 1, + 16, + 4, + 1, + 4, + 5, + 13, + 4, + 5, + 3, + 2, + 6, + 2, + 1, + 16, + 1, + 1, + 5, + 2, + 7, + 6, + 4, + 1, + 8, + 6, + 1, + 7, + 5, + 3, + 6, + 4, + 1, + 1, + 7, + 2, + 3, + 10, + 7, + 6, + 3, + 7, + 2, + 3, + 2, + 1, + 4, + 1, + 2, + 19, + 1, + 10, + 1, + 1, + 2, + 1, + 8, + 8, + 4, + 5, + 1, + 1, + 8, + 8, + 4, + 7, + 1, + 3, + 7, + 4, + 1, + 1, + 4, + 8, + 2, + 4, + 5, + 2, + 2, + 4, + 5, + 6, + 5, + 5, + 2, + 6, + 6, + 1, + 5, + 3, + 12, + 6, + 5, + 4, + 22, + 11, + 1, + 3, + 4, + 1, + 9, + 11, + 5, + 1, + 1, + 2, + 9, + 8, + 4, + 26, + 2, + 3, + 5, + 3, + 1, + 5, + 7, + 1, + 7, + 3, + 1, + 2, + 2, + 5, + 2, + 1, + 3, + 10, + 12, + 4, + 2, + 4, + 2, + 4, + 2, + 1, + 3, + 8, + 23, + 3, + 3, + 2, + 1, + 4, + 18, + 5, + 5, + 12, + 1, + 23, + 7, + 7, + 1, + 9, + 4, + 8, + 4, + 2, + 3, + 4, + 9, + 9, + 5, + 2, + 4, + 5, + 5, + 5, + 1, + 7, + 8, + 2, + 1, + 5, + 2, + 5, + 2, + 1, + 1, + 13, + 1, + 9, + 4, + 1, + 2, + 3, + 3, + 1, + 10, + 1, + 4, + 3, + 2, + 9, + 2, + 8, + 8, + 5, + 7, + 7, + 3, + 5, + 3, + 2, + 1, + 11, + 2, + 3, + 3, + 6, + 2, + 2, + 5, + 2, + 1, + 19, + 1, + 6, + 6, + 5, + 14, + 8, + 1, + 2, + 18, + 7, + 2, + 9, + 3, + 2, + 13, + 8, + 4, + 6, + 17, + 9, + 7, + 2, + 2, + 1, + 10, + 5, + 5, + 11, + 5, + 10, + 6, + 1, + 1, + 2, + 3, + 5, + 13, + 7, + 11, + 1, + 12, + 1, + 2, + 1, + 2, + 1, + 6, + 2, + 7, + 3, + 5, + 3, + 1, + 2, + 2, + 3, + 8, + 1, + 15, + 5, + 2, + 2, + 2, + 1, + 1, + 2, + 4, + 11, + 3, + 2, + 2, + 4, + 18, + 3, + 14, + 1, + 1, + 2, + 7, + 1, + 7, + 1, + 4, + 1, + 1, + 6, + 3, + 3, + 4, + 1, + 12, + 7, + 5, + 16, + 2, + 7, + 7, + 4, + 6, + 3, + 6, + 2, + 3, + 8, + 10, + 3, + 1, + 3, + 4, + 8, + 2, + 7, + 1, + 1, + 1, + 4, + 1, + 9, + 2, + 1, + 2, + 2, + 3, + 2, + 6, + 1, + 1, + 5, + 2, + 3, + 5, + 6, + 5, + 7, + 3, + 4, + 8, + 3, + 1, + 2, + 5, + 6, + 9, + 17, + 9, + 8, + 2, + 1, + 7, + 1, + 4, + 1, + 10, + 3, + 6, + 5, + 2, + 11, + 7, + 11, + 4, + 3, + 5, + 3, + 10, + 4, + 3, + 5, + 2, + 2, + 1, + 6, + 2, + 8, + 4, + 1, + 3, + 2, + 1, + 1, + 1, + 5, + 10, + 2, + 4, + 7, + 5, + 17, + 10, + 1, + 1, + 2, + 4, + 9, + 5, + 6, + 4, + 4, + 6, + 7, + 8, + 1, + 4, + 10, + 11, + 4, + 1, + 2, + 1, + 2, + 4, + 5, + 2, + 2, + 1, + 8, + 5, + 1, + 28, + 2, + 3, + 1, + 3, + 3, + 3, + 11, + 15, + 2, + 1, + 20, + 2, + 7, + 5, + 10, + 3, + 4, + 2, + 3, + 2, + 1, + 8, + 10, + 8, + 2, + 6, + 3, + 16, + 2, + 5, + 4, + 12, + 3, + 1, + 4, + 2, + 1, + 2, + 2, + 1, + 5, + 5, + 3, + 7, + 2, + 1, + 3, + 2, + 2, + 15, + 2, + 2, + 1, + 1, + 4, + 9, + 6, + 9, + 6, + 1, + 7, + 4, + 4, + 8, + 12, + 6, + 1, + 1, + 10, + 7, + 9, + 9, + 1, + 2, + 3, + 1, + 7, + 6, + 1, + 4, + 5, + 3, + 1, + 1, + 2, + 4, + 3, + 6, + 4, + 7, + 2, + 6, + 5, + 2, + 10, + 3, + 8, + 7, + 6, + 1, + 1, + 9, + 3, + 3, + 4, + 13, + 7, + 17, + 3, + 9, + 8, + 10, + 1, + 4, + 4, + 5, + 1, + 1, + 3, + 3, + 10, + 7, + 3, + 2, + 5, + 8, + 6, + 2, + 4, + 13, + 9, + 11, + 2, + 7, + 7, + 1, + 3, + 8, + 3, + 3, + 8, + 3, + 1, + 12, + 3, + 3, + 3, + 7, + 1, + 9, + 3, + 8, + 1, + 2, + 6, + 5, + 6, + 4, + 6, + 6, + 10, + 4, + 3, + 2, + 1, + 4, + 1, + 8, + 4, + 4, + 4, + 5, + 1, + 1, + 3, + 5, + 4, + 10, + 6, + 2, + 5, + 11, + 13, + 6, + 5, + 2, + 5, + 6, + 9, + 4, + 7, + 1, + 2, + 5, + 7, + 4, + 8, + 1, + 20, + 12, + 2, + 5, + 11, + 2, + 1, + 3, + 10, + 5, + 7, + 3, + 4, + 7, + 1, + 1, + 2, + 2, + 3, + 3, + 9, + 2, + 5, + 4, + 3, + 5, + 1, + 9, + 2, + 5, + 1, + 4, + 14, + 5, + 9, + 5, + 3, + 1, + 5, + 3, + 3, + 3, + 2, + 2, + 2, + 11, + 5, + 5, + 5, + 7, + 30, + 7, + 3, + 2, + 1, + 5, + 2, + 3, + 12, + 11, + 17, + 3, + 4, + 11, + 2, + 1, + 4, + 1, + 14, + 5, + 7, + 9, + 10, + 10, + 3, + 6, + 13, + 2, + 6, + 7, + 9, + 2, + 3, + 2, + 1, + 4, + 9, + 1, + 15, + 1, + 6, + 4, + 16, + 2, + 4, + 3, + 2, + 2, + 13, + 5, + 5, + 5, + 2, + 4, + 10, + 3, + 14, + 1, + 1, + 1, + 7, + 5, + 21, + 2, + 1, + 5, + 7, + 3, + 5, + 1, + 1, + 4, + 2, + 5, + 2, + 10, + 2, + 13, + 1, + 3, + 15, + 13, + 8, + 4, + 3, + 6, + 5, + 4, + 2, + 4, + 3, + 3, + 2, + 4, + 10, + 5, + 5, + 1, + 3, + 2, + 2, + 5, + 8, + 6, + 6, + 2, + 13, + 4, + 13, + 19, + 3, + 2, + 3, + 2, + 2, + 8, + 1, + 2, + 18, + 1, + 3, + 1, + 1, + 11, + 9, + 4, + 12, + 1, + 1, + 1, + 6, + 5, + 7, + 5, + 2, + 4, + 2, + 2, + 4, + 4, + 3, + 22, + 1, + 12, + 3, + 3, + 1, + 3, + 4, + 1, + 2, + 4, + 1, + 7, + 10, + 2, + 4, + 5, + 1, + 4, + 3, + 7, + 4, + 3, + 7, + 5, + 1, + 1, + 5, + 14, + 2, + 3 + ], + "xaxis": "x3", + "yaxis": "y3" + }, + { + "alignmentgroup": "True", + "bingroup": "x", + "hovertemplate": "jezyk=hiszp
dlugosc=%{x}
count=%{y}", + "legendgroup": "", + "marker": { + "color": "#636efa", + "pattern": { + "shape": "" + } + }, + "name": "", + "nbinsx": 50, + "offsetgroup": "", + "orientation": "v", + "showlegend": false, + "type": "histogram", + "x": [ + 13, + 2, + 4, + 3, + 11, + 1, + 1, + 8, + 10, + 11, + 2, + 1, + 6, + 3, + 1, + 3, + 3, + 1, + 5, + 1, + 3, + 1, + 4, + 4, + 2, + 8, + 4, + 5, + 1, + 1, + 2, + 2, + 10, + 2, + 17, + 8, + 5, + 2, + 17, + 5, + 1, + 2, + 1, + 8, + 4, + 2, + 2, + 15, + 1, + 6, + 3, + 4, + 9, + 4, + 1, + 2, + 4, + 7, + 12, + 3, + 9, + 6, + 1, + 4, + 1, + 3, + 1, + 6, + 3, + 3, + 9, + 10, + 2, + 14, + 3, + 19, + 8, + 4, + 6, + 7, + 6, + 1, + 2, + 14, + 2, + 3, + 9, + 2, + 2, + 12, + 12, + 11, + 5, + 5, + 3, + 3, + 1, + 4, + 6, + 4, + 5, + 3, + 2, + 1, + 2, + 6, + 12, + 10, + 16, + 4, + 15, + 7, + 6, + 17, + 15, + 2, + 9, + 2, + 3, + 3, + 2, + 3, + 2, + 4, + 14, + 2, + 8, + 13, + 35, + 2, + 1, + 1, + 6, + 2, + 8, + 3, + 3, + 17, + 11, + 3, + 1, + 1, + 3, + 2, + 6, + 2, + 3, + 4, + 7, + 4, + 3, + 7, + 1, + 2, + 4, + 2, + 3, + 4, + 8, + 1, + 4, + 7, + 6, + 2, + 2, + 15, + 1, + 7, + 1, + 4, + 3, + 13, + 6, + 4, + 4, + 20, + 3, + 2, + 4, + 5, + 8, + 7, + 7, + 9, + 4, + 1, + 13, + 2, + 6, + 6, + 1, + 1, + 3, + 14, + 5, + 5, + 3, + 9, + 4, + 1, + 1, + 2, + 3, + 9, + 2, + 2, + 9, + 5, + 2, + 2, + 4, + 2, + 2, + 6, + 12, + 4, + 10, + 4, + 3, + 4, + 6, + 3, + 1, + 2, + 6, + 8, + 1, + 1, + 10, + 2, + 19, + 1, + 4, + 9, + 4, + 10, + 2, + 2, + 4, + 5, + 7, + 4, + 4, + 4, + 7, + 1, + 10, + 2, + 4, + 8, + 10, + 3, + 7, + 7, + 4, + 5, + 9, + 2, + 3, + 5, + 7, + 1, + 4, + 6, + 5, + 5, + 3, + 8, + 3, + 3, + 1, + 1, + 1, + 6, + 13, + 3, + 1, + 4, + 4, + 1, + 6, + 2, + 3, + 7, + 9, + 5, + 1, + 5, + 16, + 9, + 5, + 1, + 10, + 1, + 7, + 1, + 1, + 22, + 3, + 7, + 3, + 3, + 3, + 13, + 13, + 1, + 20, + 9, + 1, + 23, + 3, + 1, + 6, + 13, + 5, + 6, + 1, + 8, + 2, + 10, + 2, + 12, + 7, + 2, + 1, + 16, + 4, + 7, + 2, + 2, + 20, + 5, + 11, + 8, + 3, + 4, + 5, + 3, + 3, + 8, + 5, + 16, + 2, + 6, + 3, + 11, + 2, + 1, + 3, + 5, + 3, + 1, + 6, + 1, + 12, + 4, + 5, + 1, + 18, + 1, + 2, + 2, + 4, + 19, + 2, + 4, + 7, + 1, + 14, + 3, + 9, + 6, + 4, + 17, + 8, + 2, + 10, + 4, + 10, + 8, + 2, + 3, + 2, + 9, + 1, + 5, + 12, + 2, + 6, + 4, + 3, + 1, + 5, + 3, + 2, + 9, + 4, + 3, + 14, + 2, + 2, + 3, + 3, + 4, + 2, + 3, + 7, + 6, + 4, + 9, + 2, + 6, + 3, + 5, + 2, + 1, + 1, + 6, + 9, + 6, + 6, + 1, + 12, + 11, + 2, + 3, + 4, + 1, + 15, + 2, + 1, + 13, + 3, + 3, + 2, + 2, + 3, + 3, + 1, + 1, + 3, + 3, + 9, + 3, + 1, + 6, + 5, + 2, + 2, + 14, + 4, + 8, + 3, + 4, + 2, + 13, + 1, + 2, + 5, + 2, + 16, + 2, + 1, + 2, + 5, + 7, + 1, + 3, + 1, + 4, + 7, + 5, + 2, + 4, + 2, + 1, + 14, + 3, + 6, + 7, + 1, + 1, + 5, + 2, + 4, + 11, + 2, + 2, + 3, + 9, + 6, + 10, + 11, + 1, + 5, + 3, + 3, + 1, + 1, + 3, + 10, + 1, + 16, + 5, + 4, + 19, + 2, + 3, + 1, + 2, + 6, + 4, + 3, + 9, + 4, + 7, + 3, + 4, + 5, + 9, + 1, + 2, + 1, + 3, + 1, + 3, + 7, + 5, + 4, + 1, + 4, + 14, + 7, + 22, + 6, + 1, + 3, + 1, + 5, + 36, + 3, + 1, + 1, + 3, + 5, + 2, + 9, + 13, + 1, + 2, + 20, + 2, + 7, + 8, + 3, + 1, + 4, + 4, + 13, + 1, + 4, + 4, + 1, + 2, + 4, + 5, + 4, + 10, + 11, + 3, + 3, + 2, + 5, + 3, + 3, + 3, + 1, + 3, + 1, + 2, + 4, + 4, + 2, + 4, + 2, + 21, + 6, + 1, + 9, + 4, + 2, + 3, + 5, + 9, + 1, + 2, + 5, + 4, + 5, + 11, + 4, + 1, + 4, + 4, + 2, + 6, + 1, + 7, + 5, + 4, + 7, + 3, + 4, + 5, + 10, + 1, + 5, + 1, + 2, + 3, + 9, + 8, + 7, + 8, + 1, + 1, + 3, + 3, + 7, + 7, + 3, + 2, + 1, + 1, + 11, + 11, + 16, + 3, + 2, + 10, + 8, + 3, + 2, + 25, + 3, + 10, + 9, + 9, + 4, + 7, + 2, + 3, + 3, + 6, + 1, + 1, + 4, + 6, + 1, + 3, + 1, + 11, + 4, + 1, + 2, + 4, + 3, + 4, + 9, + 1, + 10, + 4, + 4, + 5, + 3, + 6, + 4, + 5, + 1, + 2, + 8, + 4, + 5, + 4, + 3, + 5, + 8, + 1, + 7, + 1, + 6, + 3, + 4, + 19, + 1, + 3, + 9, + 11, + 6, + 5, + 5, + 2, + 3, + 18, + 3, + 6, + 8, + 3, + 7, + 1, + 13, + 3, + 5, + 2, + 4, + 8, + 6, + 4, + 1, + 6, + 1, + 14, + 2, + 2, + 1, + 1, + 4, + 1, + 5, + 2, + 2, + 2, + 6, + 4, + 2, + 4, + 14, + 2, + 3, + 6, + 6, + 1, + 1, + 3, + 1, + 4, + 17, + 8, + 1, + 6, + 4, + 9, + 2, + 8, + 4, + 1, + 10, + 3, + 12, + 3, + 1, + 3, + 1, + 7, + 1, + 14, + 3, + 13, + 4, + 3, + 3, + 3, + 1, + 1, + 3, + 2, + 10, + 7, + 1, + 2, + 8, + 4, + 6, + 7, + 1, + 12, + 12, + 4, + 3, + 7, + 4, + 16, + 1, + 2, + 9, + 1, + 1, + 5, + 1, + 9, + 4, + 8, + 1, + 1, + 5, + 3, + 2, + 1, + 1, + 1, + 2, + 1, + 6, + 5, + 2, + 11, + 3, + 2, + 8, + 1, + 4, + 2, + 4, + 3, + 2, + 7, + 3, + 8, + 3, + 8, + 1, + 1, + 5, + 12, + 1, + 1, + 4, + 2, + 2, + 4, + 4, + 8, + 7, + 5, + 1, + 4, + 1, + 13, + 8, + 1, + 4, + 2, + 2, + 4, + 4, + 4, + 2, + 6, + 2, + 4, + 7, + 2, + 1, + 1, + 3, + 6, + 4, + 11, + 5, + 1, + 19, + 10, + 12, + 4, + 10, + 13, + 1, + 5, + 14, + 8, + 2, + 1, + 6, + 13, + 1, + 7, + 3, + 7, + 12, + 2, + 7, + 6, + 2, + 5, + 1, + 3, + 5, + 3, + 1, + 4, + 15, + 4, + 1, + 1, + 9, + 4, + 4, + 6, + 7, + 1, + 13, + 1, + 2, + 3, + 1, + 5, + 3, + 10, + 2, + 1, + 5, + 4, + 1, + 6, + 1, + 3, + 4, + 3, + 5, + 1, + 4, + 6, + 3, + 4, + 2, + 1, + 6, + 3, + 3, + 2, + 2, + 5, + 1, + 2, + 1, + 26, + 5, + 2, + 2, + 1, + 1, + 2, + 4, + 5, + 6, + 2, + 1, + 3, + 6, + 2, + 3, + 6, + 8, + 4, + 1, + 3, + 1, + 4, + 4, + 3, + 10, + 4, + 1, + 12, + 18, + 7, + 8, + 7, + 1, + 7, + 4, + 3, + 2, + 13, + 9, + 1, + 14, + 2, + 2, + 13, + 7, + 6, + 1, + 1, + 9, + 3, + 4, + 2, + 6, + 5, + 1, + 2, + 1, + 1, + 2, + 6, + 6, + 22, + 4, + 2, + 2, + 4, + 4, + 4, + 3, + 5, + 2, + 2, + 1, + 5, + 1, + 16, + 11, + 3, + 1, + 8, + 5, + 5, + 14, + 8, + 3, + 2, + 3, + 2, + 1, + 13, + 2, + 4, + 3, + 6, + 3, + 2, + 11, + 1, + 8, + 3, + 2, + 7, + 2, + 5, + 4, + 3, + 5, + 4, + 3, + 7, + 2, + 3, + 1, + 10, + 8, + 8, + 1, + 1, + 1, + 2, + 1, + 9, + 1, + 12, + 4, + 1, + 3, + 2, + 1, + 1, + 2, + 7, + 9, + 2, + 4, + 1, + 9, + 14, + 1, + 6, + 2, + 2, + 1, + 6, + 6, + 5, + 7, + 46, + 4, + 1, + 3, + 15, + 2, + 2, + 4, + 2, + 9, + 8, + 3, + 1, + 6, + 2, + 3, + 12, + 1, + 7, + 2, + 1, + 3, + 5, + 1, + 16, + 1, + 2, + 1, + 1, + 10, + 1, + 1, + 3, + 1, + 3, + 1, + 7, + 20, + 2, + 3, + 3, + 2, + 1, + 3, + 1, + 2, + 2, + 3, + 1, + 1, + 3, + 7, + 2, + 12, + 6, + 14, + 3, + 1, + 9, + 12, + 6, + 5, + 6, + 8, + 3, + 3, + 1, + 3, + 49, + 11, + 2, + 16, + 6, + 10, + 2, + 4, + 6, + 2, + 12, + 4, + 12, + 10, + 8, + 7, + 3, + 3, + 1, + 3, + 3, + 2, + 5, + 4, + 2, + 11, + 4, + 8, + 4, + 4, + 2, + 9, + 2, + 7, + 1, + 6, + 7, + 7, + 4, + 2, + 4, + 8, + 8, + 5, + 18, + 6, + 3, + 15, + 1, + 5, + 1, + 9, + 2, + 1, + 3, + 9, + 4, + 7, + 4, + 3, + 5, + 9, + 3, + 1, + 7, + 6, + 2, + 9, + 10, + 4, + 2, + 2, + 4, + 9, + 4, + 4, + 7, + 12, + 5, + 3, + 26, + 3, + 5, + 10, + 13, + 1, + 1, + 2, + 11, + 1, + 13, + 15, + 1, + 6, + 1, + 1, + 4, + 3, + 6, + 3, + 4, + 3, + 9, + 1, + 2, + 19, + 2, + 3, + 2, + 22, + 5, + 2, + 1, + 1, + 5, + 1, + 10, + 9, + 6, + 10, + 3, + 5, + 5, + 9, + 1, + 3, + 11, + 12, + 10, + 2, + 1, + 4, + 8, + 7, + 11, + 3, + 7, + 3, + 5, + 3, + 4, + 8, + 2, + 5, + 2, + 4, + 13, + 3, + 3, + 8, + 5, + 8, + 9, + 10, + 7, + 5, + 3, + 3, + 2, + 1, + 2, + 2, + 11, + 12, + 2, + 2, + 4, + 5, + 12, + 11, + 12, + 5, + 4, + 5, + 11, + 2, + 5, + 1, + 19, + 9, + 5, + 3, + 6, + 5, + 1, + 2, + 4, + 3, + 1, + 2, + 1, + 2, + 2, + 5, + 2, + 2, + 1, + 9, + 7, + 5, + 4, + 8, + 5, + 4, + 3, + 2, + 8, + 11, + 8, + 3, + 2, + 2, + 6, + 1, + 7, + 13, + 4, + 2, + 5, + 1, + 3, + 1, + 10, + 1, + 1, + 1, + 3, + 8, + 5, + 8, + 2, + 2, + 4, + 1, + 1, + 4, + 9, + 3, + 1, + 1, + 1, + 2, + 2, + 3, + 3, + 2, + 4, + 5, + 1, + 5, + 15, + 2, + 1, + 17, + 10, + 1, + 2, + 7, + 3, + 5, + 3, + 7, + 1, + 8, + 8, + 7, + 1, + 8, + 3, + 1, + 1, + 17, + 2, + 2, + 5, + 7, + 2, + 4, + 1, + 1, + 13, + 4, + 1, + 7, + 27, + 1, + 4, + 10, + 14, + 8, + 4, + 4, + 3, + 9, + 3, + 4, + 5, + 1, + 8, + 2, + 1, + 3, + 4, + 2, + 1, + 2, + 10, + 2, + 9, + 1, + 6, + 10, + 3, + 3, + 4, + 1, + 5, + 5, + 1, + 5, + 3, + 4, + 15, + 3, + 3, + 7, + 2, + 4, + 4, + 18, + 4, + 4, + 2, + 2, + 2, + 4, + 7, + 3, + 1, + 9, + 3, + 5, + 2, + 15, + 8, + 6, + 2, + 2, + 1, + 2, + 8, + 3, + 3, + 4, + 8, + 6, + 1, + 9, + 7, + 1, + 11, + 7, + 1, + 5, + 9, + 5, + 2, + 12, + 14, + 4, + 5, + 3, + 4, + 16, + 2, + 3, + 2, + 1, + 4, + 3, + 2, + 4, + 14, + 1, + 1, + 1, + 3, + 6, + 8, + 2, + 2, + 7, + 1, + 1, + 13, + 1, + 5, + 6, + 8, + 3, + 1, + 1, + 4, + 7, + 5, + 2, + 8, + 1, + 7, + 6, + 6, + 9, + 22, + 2, + 2, + 3, + 5, + 2, + 6, + 7, + 4, + 11, + 1, + 1, + 4, + 2, + 7, + 5, + 9, + 1, + 4, + 1, + 9, + 13, + 3, + 3, + 2, + 6, + 5, + 6, + 1, + 2, + 13, + 5, + 7, + 7, + 12, + 1, + 3, + 12, + 24, + 7, + 19, + 5, + 2, + 4, + 3, + 3, + 7, + 5, + 1, + 1, + 1, + 5, + 6, + 9, + 8, + 5, + 6, + 3, + 1, + 7, + 3, + 12, + 3, + 2, + 7, + 6, + 8 + ], + "xaxis": "x2", + "yaxis": "y2" + }, + { + "alignmentgroup": "True", + "bingroup": "x", + "hovertemplate": "jezyk=ang
dlugosc=%{x}
count=%{y}", + "legendgroup": "", + "marker": { + "color": "#636efa", + "pattern": { + "shape": "" + } + }, + "name": "", + "nbinsx": 50, + "offsetgroup": "", + "orientation": "v", + "showlegend": false, + "type": "histogram", + "x": [ + 7, + 2, + 12, + 1, + 12, + 1, + 2, + 3, + 2, + 18, + 2, + 4, + 3, + 8, + 3, + 2, + 8, + 4, + 2, + 9, + 4, + 9, + 16, + 8, + 4, + 2, + 1, + 2, + 3, + 2, + 3, + 1, + 4, + 1, + 7, + 5, + 2, + 3, + 1, + 14, + 2, + 1, + 2, + 9, + 6, + 1, + 3, + 1, + 1, + 2, + 7, + 3, + 4, + 4, + 7, + 3, + 2, + 3, + 25, + 3, + 6, + 8, + 11, + 6, + 3, + 1, + 9, + 5, + 4, + 4, + 4, + 3, + 7, + 4, + 8, + 2, + 2, + 1, + 7, + 1, + 6, + 4, + 4, + 3, + 3, + 5, + 2, + 8, + 7, + 3, + 6, + 6, + 5, + 1, + 2, + 1, + 4, + 7, + 1, + 3, + 2, + 1, + 2, + 1, + 4, + 2, + 9, + 1, + 3, + 3, + 16, + 1, + 4, + 1, + 4, + 4, + 3, + 6, + 2, + 5, + 1, + 10, + 2, + 3, + 4, + 5, + 1, + 6, + 3, + 9, + 6, + 8, + 6, + 5, + 5, + 7, + 2, + 8, + 5, + 2, + 3, + 9, + 8, + 1, + 11, + 2, + 2, + 8, + 8, + 4, + 3, + 7, + 1, + 4, + 1, + 3, + 4, + 19, + 2, + 1, + 10, + 4, + 3, + 6, + 8, + 3, + 9, + 7, + 1, + 1, + 9, + 9, + 3, + 1, + 1, + 1, + 1, + 6, + 1, + 3, + 7, + 7, + 1, + 7, + 4, + 1, + 2, + 2, + 2, + 13, + 6, + 1, + 4, + 3, + 3, + 1, + 6, + 6, + 4, + 8, + 5, + 1, + 14, + 2, + 1, + 19, + 1, + 2, + 3, + 6, + 2, + 2, + 1, + 3, + 4, + 4, + 8, + 14, + 2, + 4, + 2, + 6, + 2, + 2, + 1, + 4, + 2, + 6, + 15, + 3, + 1, + 6, + 19, + 17, + 9, + 1, + 9, + 2, + 3, + 3, + 4, + 4, + 1, + 2, + 1, + 19, + 2, + 25, + 2, + 9, + 2, + 8, + 5, + 6, + 7, + 7, + 5, + 10, + 5, + 1, + 11, + 1, + 1, + 4, + 5, + 3, + 4, + 1, + 9, + 3, + 1, + 2, + 1, + 3, + 9, + 11, + 1, + 1, + 11, + 15, + 5, + 7, + 4, + 6, + 11, + 5, + 4, + 9, + 5, + 1, + 13, + 1, + 6, + 17, + 3, + 4, + 2, + 1, + 4, + 2, + 8, + 13, + 5, + 6, + 6, + 3, + 9, + 2, + 3, + 4, + 3, + 2, + 1, + 1, + 2, + 2, + 12, + 2, + 2, + 1, + 18, + 1, + 1, + 6, + 11, + 2, + 1, + 1, + 1, + 1, + 43, + 5, + 2, + 6, + 2, + 4, + 6, + 2, + 7, + 5, + 3, + 6, + 4, + 5, + 5, + 5, + 6, + 17, + 3, + 11, + 2, + 3, + 6, + 5, + 1, + 2, + 26, + 1, + 8, + 7, + 7, + 4, + 4, + 4, + 1, + 3, + 2, + 2, + 1, + 1, + 7, + 1, + 1, + 5, + 5, + 11, + 12, + 2, + 7, + 6, + 1, + 7, + 6, + 7, + 8, + 6, + 4, + 17, + 2, + 2, + 1, + 7, + 5, + 3, + 7, + 7, + 2, + 2, + 8, + 1, + 7, + 12, + 4, + 15, + 1, + 6, + 6, + 4, + 5, + 3, + 12, + 8, + 13, + 3, + 16, + 19, + 11, + 8, + 4, + 5, + 2, + 5, + 18, + 4, + 2, + 19, + 6, + 6, + 3, + 5, + 2, + 1, + 5, + 4, + 2, + 1, + 5, + 3, + 7, + 3, + 2, + 4, + 5, + 3, + 8, + 1, + 14, + 2, + 14, + 6, + 6, + 1, + 3, + 5, + 2, + 2, + 8, + 1, + 3, + 1, + 3, + 1, + 10, + 1, + 1, + 11, + 2, + 3, + 8, + 1, + 13, + 1, + 5, + 6, + 8, + 6, + 11, + 6, + 13, + 9, + 1, + 1, + 3, + 2, + 1, + 10, + 1, + 2, + 3, + 2, + 7, + 5, + 1, + 6, + 2, + 3, + 2, + 7, + 2, + 2, + 1, + 2, + 3, + 2, + 13, + 1, + 13, + 2, + 1, + 9, + 9, + 11, + 16, + 5, + 2, + 2, + 7, + 7, + 1, + 2, + 1, + 1, + 1, + 5, + 1, + 11, + 3, + 5, + 2, + 2, + 3, + 9, + 4, + 11, + 1, + 2, + 9, + 8, + 1, + 7, + 1, + 5, + 3, + 7, + 6, + 2, + 1, + 5, + 3, + 1, + 2, + 7, + 12, + 3, + 4, + 4, + 3, + 3, + 1, + 4, + 7, + 18, + 1, + 2, + 1, + 13, + 2, + 2, + 1, + 1, + 1, + 6, + 2, + 15, + 8, + 3, + 1, + 1, + 3, + 4, + 9, + 2, + 3, + 9, + 1, + 1, + 2, + 5, + 14, + 6, + 5, + 8, + 3, + 2, + 4, + 5, + 2, + 4, + 1, + 2, + 1, + 1, + 6, + 3, + 1, + 4, + 5, + 9, + 2, + 9, + 1, + 1, + 2, + 5, + 5, + 4, + 15, + 7, + 4, + 6, + 2, + 9, + 1, + 2, + 1, + 2, + 2, + 3, + 4, + 6, + 1, + 8, + 4, + 22, + 1, + 8, + 2, + 2, + 6, + 6, + 5, + 8, + 11, + 3, + 3, + 8, + 4, + 7, + 3, + 1, + 1, + 1, + 6, + 6, + 7, + 2, + 2, + 5, + 1, + 15, + 2, + 3, + 11, + 2, + 7, + 2, + 12, + 7, + 3, + 1, + 1, + 2, + 3, + 3, + 7, + 2, + 5, + 2, + 5, + 1, + 5, + 10, + 1, + 2, + 2, + 2, + 2, + 3, + 13, + 3, + 10, + 9, + 5, + 4, + 1, + 2, + 1, + 6, + 2, + 1, + 9, + 15, + 1, + 8, + 1, + 10, + 2, + 5, + 15, + 3, + 2, + 1, + 4, + 4, + 1, + 2, + 1, + 18, + 5, + 15, + 3, + 11, + 12, + 8, + 2, + 1, + 3, + 1, + 4, + 1, + 9, + 5, + 1, + 9, + 8, + 4, + 14, + 4, + 4, + 1, + 5, + 3, + 2, + 1, + 3, + 13, + 6, + 2, + 4, + 3, + 13, + 2, + 11, + 2, + 1, + 2, + 12, + 4, + 1, + 5, + 6, + 2, + 4, + 2, + 1, + 4, + 2, + 5, + 8, + 2, + 4, + 1, + 1, + 11, + 2, + 3, + 12, + 1, + 4, + 1, + 8, + 1, + 7, + 2, + 2, + 2, + 5, + 3, + 1, + 3, + 11, + 1, + 1, + 1, + 1, + 2, + 16, + 1, + 3, + 2, + 1, + 11, + 1, + 6, + 9, + 6, + 7, + 13, + 7, + 3, + 8, + 2, + 7, + 3, + 6, + 10, + 6, + 2, + 3, + 1, + 5, + 8, + 2, + 4, + 2, + 1, + 2, + 1, + 5, + 4, + 7, + 1, + 1, + 1, + 4, + 1, + 2, + 5, + 4, + 6, + 4, + 3, + 3, + 1, + 1, + 3, + 3, + 21, + 9, + 6, + 1, + 1, + 4, + 1, + 14, + 6, + 3, + 1, + 4, + 1, + 1, + 16, + 12, + 1, + 9, + 7, + 1, + 1, + 3, + 1, + 2, + 6, + 18, + 11, + 17, + 1, + 5, + 3, + 15, + 3, + 3, + 14, + 5, + 2, + 2, + 5, + 4, + 1, + 8, + 8, + 6, + 2, + 1, + 7, + 4, + 1, + 8, + 6, + 10, + 9, + 1, + 7, + 1, + 1, + 6, + 1, + 16, + 1, + 4, + 2, + 2, + 2, + 1, + 2, + 3, + 1, + 8, + 3, + 3, + 12, + 4, + 1, + 5, + 6, + 4, + 16, + 9, + 3, + 8, + 9, + 7, + 9, + 5, + 2, + 2, + 5, + 3, + 16, + 8, + 1, + 2, + 2, + 2, + 1, + 2, + 17, + 5, + 2, + 2, + 2, + 3, + 2, + 4, + 6, + 11, + 13, + 1, + 6, + 1, + 1, + 5, + 1, + 2, + 2, + 2, + 4, + 7, + 10, + 3, + 8, + 6, + 2, + 8, + 4, + 2, + 10, + 1, + 5, + 2, + 6, + 14, + 17, + 19, + 1, + 1, + 8, + 3, + 9, + 3, + 8, + 1, + 3, + 1, + 4, + 1, + 7, + 5, + 19, + 3, + 2, + 4, + 17, + 1, + 10, + 10, + 1, + 1, + 6, + 5, + 6, + 4, + 1, + 6, + 1, + 2, + 2, + 6, + 5, + 10, + 12, + 3, + 11, + 1, + 6, + 19, + 4, + 6, + 3, + 1, + 2, + 4, + 2, + 2, + 4, + 2, + 3, + 3, + 1, + 15, + 22, + 8, + 1, + 2, + 1, + 1, + 16, + 1, + 3, + 2, + 1, + 17, + 3, + 1, + 9, + 2, + 5, + 9, + 8, + 2, + 8, + 6, + 2, + 8, + 2, + 3, + 4, + 1, + 4, + 12, + 3, + 3, + 1, + 2, + 2, + 5, + 26, + 8, + 2, + 3, + 7, + 12, + 6, + 9, + 6, + 8, + 1, + 4, + 2, + 3, + 4, + 4, + 5, + 4, + 4, + 5, + 4, + 1, + 5, + 13, + 8, + 5, + 2, + 9, + 5, + 13, + 5, + 2, + 9, + 1, + 1, + 3, + 8, + 7, + 1, + 9, + 15, + 8, + 4, + 14, + 1, + 16, + 1, + 12, + 2, + 4, + 1, + 1, + 3, + 25, + 3, + 1, + 1, + 4, + 1, + 8, + 1, + 4, + 3, + 2, + 1, + 4, + 3, + 2, + 3, + 7, + 2, + 1, + 5, + 2, + 5, + 5, + 8, + 21, + 2, + 2, + 5, + 1, + 2, + 1, + 5, + 10, + 4, + 13, + 7, + 8, + 4, + 2, + 6, + 7, + 1, + 1, + 4, + 16, + 18, + 11, + 3, + 4, + 13, + 3, + 17, + 12, + 7, + 4, + 1, + 14, + 5, + 4, + 11, + 7, + 2, + 6, + 1, + 4, + 1, + 1, + 5, + 1, + 1, + 6, + 5, + 4, + 2, + 14, + 8, + 5, + 1, + 9, + 1, + 1, + 4, + 4, + 6, + 2, + 3, + 5, + 2, + 4, + 3, + 1, + 2, + 1, + 12, + 1, + 1, + 11, + 2, + 3, + 20, + 2, + 18, + 1, + 1, + 4, + 2, + 9, + 3, + 4, + 4, + 3, + 2, + 2, + 1, + 4, + 1, + 4, + 4, + 2, + 1, + 2, + 2, + 3, + 3, + 1, + 1, + 10, + 3, + 3, + 2, + 16, + 3, + 2, + 2, + 3, + 1, + 2, + 25, + 6, + 5, + 16, + 7, + 1, + 2, + 2, + 5, + 3, + 7, + 16, + 10, + 2, + 5, + 7, + 2, + 3, + 9, + 6, + 1, + 1, + 1, + 9, + 2, + 3, + 6, + 5, + 3, + 7, + 4, + 5, + 6, + 2, + 4, + 1, + 3, + 3, + 6, + 9, + 2, + 1, + 1, + 3, + 4, + 9, + 6, + 1, + 1, + 2, + 2, + 4, + 1, + 3, + 4, + 3, + 4, + 10, + 7, + 6, + 9, + 4, + 1, + 1, + 4, + 1, + 11, + 4, + 7, + 1, + 6, + 5, + 6, + 3, + 2, + 2, + 2, + 3, + 17, + 1, + 4, + 2, + 4, + 4, + 1, + 4, + 1, + 1, + 3, + 20, + 12, + 2, + 2, + 1, + 3, + 5, + 3, + 2, + 3, + 5, + 3, + 2, + 5, + 4, + 11, + 4, + 2, + 2, + 1, + 9, + 5, + 3, + 2, + 3, + 2, + 1, + 23, + 1, + 4, + 1, + 20, + 3, + 3, + 7, + 14, + 10, + 5, + 1, + 1, + 3, + 4, + 3, + 7, + 7, + 4, + 1, + 25, + 1, + 3, + 1, + 11, + 6, + 1, + 4, + 4, + 1, + 5, + 6, + 10, + 4, + 2, + 1, + 6, + 10, + 1, + 2, + 21, + 1, + 13, + 3, + 8, + 3, + 2, + 3, + 1, + 11, + 4, + 12, + 2, + 2, + 4, + 8, + 5, + 2, + 4, + 2, + 2, + 1, + 2, + 4, + 1, + 5, + 6, + 5, + 7, + 1, + 3, + 10, + 3, + 1, + 3, + 1, + 12, + 6, + 1, + 1, + 8, + 1, + 3, + 4, + 2, + 2, + 6, + 2, + 1, + 2, + 3, + 5, + 15, + 4, + 3, + 3, + 3, + 2, + 8, + 20, + 10, + 10, + 1, + 11, + 4, + 1, + 20, + 6, + 20, + 10, + 4, + 5, + 5, + 3, + 3, + 15, + 10, + 4, + 1, + 8, + 6, + 5, + 1, + 8, + 11, + 3, + 2, + 13, + 2, + 6, + 11, + 4, + 4, + 14, + 5, + 3, + 2, + 1, + 6, + 2, + 2, + 7, + 7, + 5, + 3, + 1, + 8, + 1, + 3, + 3, + 2, + 1, + 1, + 1, + 16, + 6, + 10, + 2, + 10, + 16, + 7, + 7, + 8, + 3, + 7, + 3, + 5, + 2, + 1, + 17, + 4, + 1, + 7, + 3, + 3, + 3, + 1, + 1, + 3, + 2, + 10, + 1, + 4, + 4, + 2, + 5, + 3, + 2, + 2, + 1, + 9, + 6, + 5, + 1, + 2, + 23, + 6, + 3, + 1, + 6, + 10, + 3, + 19, + 3, + 6, + 11, + 5, + 13, + 6, + 13, + 7, + 3, + 2, + 6, + 18, + 10, + 9, + 16, + 10, + 3, + 4, + 8, + 6, + 8, + 3, + 7, + 1, + 1, + 2, + 1, + 1, + 4, + 5, + 2, + 1, + 3, + 4, + 5, + 6, + 4, + 3, + 1, + 1, + 3, + 9, + 3, + 6, + 3, + 4, + 4, + 4, + 2, + 5, + 2, + 9, + 1, + 4, + 6, + 3 + ], + "xaxis": "x", + "yaxis": "y" + } + ], + "layout": { + "annotations": [ + { + "font": {}, + "showarrow": false, + "text": "jezyk=ang", + "textangle": 90, + "x": 0.98, + "xanchor": "left", + "xref": "paper", + "y": 0.15666666666666665, + "yanchor": "middle", + "yref": "paper" + }, + { + "font": {}, + "showarrow": false, + "text": "jezyk=hiszp", + "textangle": 90, + "x": 0.98, + "xanchor": "left", + "xref": "paper", + "y": 0.4999999999999999, + "yanchor": "middle", + "yref": "paper" + }, + { + "font": {}, + "showarrow": false, + "text": "jezyk=polski", + "textangle": 90, + "x": 0.98, + "xanchor": "left", + "xref": "paper", + "y": 0.8433333333333332, + "yanchor": "middle", + "yref": "paper" + } + ], + "barmode": "relative", + "legend": { + "tracegroupgap": 0 + }, + "margin": { + "t": 60 + }, + "template": { + "data": { + "bar": [ + { + "error_x": { + "color": "#2a3f5f" + }, + "error_y": { + "color": "#2a3f5f" + }, + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + }, + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "bar" + } + ], + "barpolar": [ + { + "marker": { + "line": { + "color": "#E5ECF6", + "width": 0.5 + }, + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "barpolar" + } + ], + "carpet": [ + { + "aaxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "baxis": { + "endlinecolor": "#2a3f5f", + "gridcolor": "white", + "linecolor": "white", + "minorgridcolor": "white", + "startlinecolor": "#2a3f5f" + }, + "type": "carpet" + } + ], + "choropleth": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "choropleth" + } + ], + "contour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "contour" + } + ], + "contourcarpet": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "contourcarpet" + } + ], + "heatmap": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmap" + } + ], + "heatmapgl": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "heatmapgl" + } + ], + "histogram": [ + { + "marker": { + "pattern": { + "fillmode": "overlay", + "size": 10, + "solidity": 0.2 + } + }, + "type": "histogram" + } + ], + "histogram2d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2d" + } + ], + "histogram2dcontour": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "histogram2dcontour" + } + ], + "mesh3d": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "type": "mesh3d" + } + ], + "parcoords": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "parcoords" + } + ], + "pie": [ + { + "automargin": true, + "type": "pie" + } + ], + "scatter": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatter" + } + ], + "scatter3d": [ + { + "line": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatter3d" + } + ], + "scattercarpet": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattercarpet" + } + ], + "scattergeo": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergeo" + } + ], + "scattergl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattergl" + } + ], + "scattermapbox": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scattermapbox" + } + ], + "scatterpolar": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolar" + } + ], + "scatterpolargl": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterpolargl" + } + ], + "scatterternary": [ + { + "marker": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "type": "scatterternary" + } + ], + "surface": [ + { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + }, + "colorscale": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "type": "surface" + } + ], + "table": [ + { + "cells": { + "fill": { + "color": "#EBF0F8" + }, + "line": { + "color": "white" + } + }, + "header": { + "fill": { + "color": "#C8D4E3" + }, + "line": { + "color": "white" + } + }, + "type": "table" + } + ] + }, + "layout": { + "annotationdefaults": { + "arrowcolor": "#2a3f5f", + "arrowhead": 0, + "arrowwidth": 1 + }, + "autotypenumbers": "strict", + "coloraxis": { + "colorbar": { + "outlinewidth": 0, + "ticks": "" + } + }, + "colorscale": { + "diverging": [ + [ + 0, + "#8e0152" + ], + [ + 0.1, + "#c51b7d" + ], + [ + 0.2, + "#de77ae" + ], + [ + 0.3, + "#f1b6da" + ], + [ + 0.4, + "#fde0ef" + ], + [ + 0.5, + "#f7f7f7" + ], + [ + 0.6, + "#e6f5d0" + ], + [ + 0.7, + "#b8e186" + ], + [ + 0.8, + "#7fbc41" + ], + [ + 0.9, + "#4d9221" + ], + [ + 1, + "#276419" + ] + ], + "sequential": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ], + "sequentialminus": [ + [ + 0, + "#0d0887" + ], + [ + 0.1111111111111111, + "#46039f" + ], + [ + 0.2222222222222222, + "#7201a8" + ], + [ + 0.3333333333333333, + "#9c179e" + ], + [ + 0.4444444444444444, + "#bd3786" + ], + [ + 0.5555555555555556, + "#d8576b" + ], + [ + 0.6666666666666666, + "#ed7953" + ], + [ + 0.7777777777777778, + "#fb9f3a" + ], + [ + 0.8888888888888888, + "#fdca26" + ], + [ + 1, + "#f0f921" + ] + ] + }, + "colorway": [ + "#636efa", + "#EF553B", + "#00cc96", + "#ab63fa", + "#FFA15A", + "#19d3f3", + "#FF6692", + "#B6E880", + "#FF97FF", + "#FECB52" + ], + "font": { + "color": "#2a3f5f" + }, + "geo": { + "bgcolor": "white", + "lakecolor": "white", + "landcolor": "#E5ECF6", + "showlakes": true, + "showland": true, + "subunitcolor": "white" + }, + "hoverlabel": { + "align": "left" + }, + "hovermode": "closest", + "mapbox": { + "style": "light" + }, + "paper_bgcolor": "white", + "plot_bgcolor": "#E5ECF6", + "polar": { + "angularaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "radialaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "scene": { + "xaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "yaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + }, + "zaxis": { + "backgroundcolor": "#E5ECF6", + "gridcolor": "white", + "gridwidth": 2, + "linecolor": "white", + "showbackground": true, + "ticks": "", + "zerolinecolor": "white" + } + }, + "shapedefaults": { + "line": { + "color": "#2a3f5f" + } + }, + "ternary": { + "aaxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "baxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + }, + "bgcolor": "#E5ECF6", + "caxis": { + "gridcolor": "white", + "linecolor": "white", + "ticks": "" + } + }, + "title": { + "x": 0.05 + }, + "xaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + }, + "yaxis": { + "automargin": true, + "gridcolor": "white", + "linecolor": "white", + "ticks": "", + "title": { + "standoff": 15 + }, + "zerolinecolor": "white", + "zerolinewidth": 2 + } + } + }, + "xaxis": { + "anchor": "y", + "domain": [ + 0, + 0.98 + ], + "title": { + "text": "dlugosc" + } + }, + "xaxis2": { + "anchor": "y2", + "domain": [ + 0, + 0.98 + ], + "matches": "x", + "showticklabels": false + }, + "xaxis3": { + "anchor": "y3", + "domain": [ + 0, + 0.98 + ], + "matches": "x", + "showticklabels": false + }, + "yaxis": { + "anchor": "x", + "domain": [ + 0, + 0.3133333333333333 + ], + "title": { + "text": "count" + } + }, + "yaxis2": { + "anchor": "x2", + "domain": [ + 0.34333333333333327, + 0.6566666666666665 + ], + "matches": "y", + "title": { + "text": "count" + } + }, + "yaxis3": { + "anchor": "x3", + "domain": [ + 0.6866666666666665, + 0.9999999999999998 + ], + "matches": "y", + "title": { + "text": "count" + } + } + } + }, + "text/html": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "df = pd.DataFrame([[random.choice(['ang','polski','hiszp']), np.random.geometric(0.2)] for i in range(5000) ], columns=['jezyk', 'dlugosc'])\n", + "fig = px.histogram(df, x=\"dlugosc\",facet_row='jezyk',nbins=50, hover_data=df.columns)\n", + "fig.show()" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "metadata": {}, + "outputs": [], + "source": [ + "?px.histogram" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## ZADANIE 1 \n", + "\n", + "ZNAJDŹ PRZYKŁAD TEKSTÓW Z TEJ SAMEJ DOMENY 1_000_000 słów (20 punktów):\n", + "- język angielski \n", + "- język polski\n", + "- język z rodziny romańskich\n", + "\n", + "Narzędzia:\n", + "- nltk, plotly express\n", + "\n", + "\n", + "Dla każdego z języków:\n", + "- policz ilosć unikalnych lowercase słów (ze stemmingiem i bez)\n", + "- policz ilosć znaków\n", + "- policz ilosć unikalnych znaków\n", + "- policz ilosć zdań zdań\n", + "- policz ilosć unikalnych zdań\n", + "- podaj min, max, średnią oraz medianę ilości znaków w słowie \n", + "- podaj min, max, średnią oraz medianę ilości słów w zdaniu\n", + "- wygeneruj word cloud (normalnie i po usunięciu stopwordów)\n", + "- wypisz 20 najbardziej popularnych słów (normalnie i po usunięciu stopwordów) (lowercase)\n", + "- wypisz 20 najbardziej popularnych bigramów (normalnie i po usunięciu stopwordów)\n", + "- narysuj wykres częstotliwości słów (histogram) w taki sposób żeby był maksymalnie czytelny, wypróbuj skali logarytmicznej x, y, usuwanie słów poniżej limitu wystąpień itp. \n", + "- dla próbki 10000 zdań sprawdź jak często langdetect https://pypi.org/project/langdetect/ się myli i jakie języki odgaduje \n", + "- zilustruj prawo zipfa ( px.line z zaznaczonymi punktami)\n", + "- napisz wnioski (20-50 zdań)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### START ZADANIA" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### KONIEC ZADANIA" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## ZADANIE\n", + "\n", + "Weź teksty w języku polskim (mają składać sie po 5 osobnych dokumentów każdy:\n", + "- tekst prawny\n", + "- tekst naukowy\n", + "- tekst z polskiego z powieści (np. wolne lektury)\n", + "- tekst z polskiego internetu (reddit, wykop, komentarze)\n", + "- transkrypcja tekstu mówionego\n", + "\n", + "\n", + "- zilustruj gunning_fog INDEX z https://pypi.org/project/textstat/ (oś y) i średnią długość zdania (oś x) na jednym wykresie narysuj dla wszystkich tekstów na jednym wykresie , domeny oznacz kolorami (px.scatter)\n", + "- zilustruj prawo Heap'a dla wszystkich tekstów na jednym wykresie, domeny oznacz kolorami (px.scatter)\n", + "- napisz wnioski (20-50 zdań)\n", + "\n", + "NAPISZ WNIOSKI\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### START ZADANIA" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### KONIEC ZADANIA" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## WYKONANIE ZADAŃ\n", + "Zgodnie z instrukcją 01_Kodowanie_tekstu.ipynb" + ] + } + ], + "metadata": { + "author": "Jakub Pokrywka", + "email": "kubapok@wmi.amu.edu.pl", + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "lang": "pl", + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.3" + }, + "subtitle": "0.Informacje na temat przedmiotu[ćwiczenia]", + "title": "Ekstrakcja informacji", + "year": "2021" + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/cw/02_Język.ipynb b/cw/02_Język.ipynb deleted file mode 100644 index f5cd2e4..0000000 --- a/cw/02_Język.ipynb +++ /dev/null @@ -1,117 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "![Logo 1](https://git.wmi.amu.edu.pl/AITech/Szablon/raw/branch/master/Logotyp_AITech1.jpg)\n", - "
\n", - "

Ekstrakcja informacji

\n", - "

0. Język [ćwiczenia]

\n", - "

Jakub Pokrywka (2022)

\n", - "
\n", - "\n", - "![Logo 2](https://git.wmi.amu.edu.pl/AITech/Szablon/raw/branch/master/Logotyp_AITech2.jpg)" - ] - }, - { - "cell_type": "code", - "execution_count": 278, - "metadata": {}, - "outputs": [], - "source": [ - "NR_INDEKSU = 375985" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## ZNAJDŹ PRZYKŁAD TEKSTÓW Z TEJ SAMEJ DOMENY 1_000_000 słów:\n", - "- język angielski \n", - "- język polski\n", - "- język z rodziny romańskich\n", - "\n", - "Narzędzia:\n", - "- spacy\n", - "- nltk\n", - "\n", - "\n", - "\n", - "Dla każdego z języków:\n", - "- policz ilosć unikalnych słów (ze stemmingiem i bez)\n", - "- policz ilosć unikalnych znaków\n", - "- policz ilosć unikalnych zdań\n", - "- podaj ilość unikalnych \n", - "- podaj min, max, średnią oraz medianę ilości znaków w słowie\n", - "- podaj min, max, średnią oraz medianę ilości słów w zdaniu\n", - "- wygeneruj word cloud (normalnie i po usunięciu stopwordów)\n", - "- wypisz 20 najbardziej popularnych słów (normalnie i po usunięciu stopwordów)\n", - "- wypisz 20 najbardziej popularnych bigramów (normalnie i po usunięciu stopwordów)\n", - "- narysuj wykres częstotliwości słów w taki sposób żeby był maksymalnie czytelny, wypróbuj skali logarytmicznej x, y, usuwanie słów poniżej limitu wystąpień itp. \n", - "- dla próbki 10000 zdań sprawdź jak często langdetect https://pypi.org/project/langdetect/ się myli i jakie języki odgaduje \n", - "\n", - "\n", - "NAPISZ WNIOSKI" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## ZADANIE\n", - "\n", - "Weź teksty w języku polskim:\n", - "- tekst prawny\n", - "- tekst z polskiego naukowy\n", - "- tekst z polskiego z powieści (wolne lektury)\n", - "- tekst z polskiego gg\n", - "- transkrypcja tekstu mówionego\n", - "\n", - "\n", - "- gunning_fog INDEX ( https://pypi.org/project/textstat/ ) \n", - "- średnia długość zdania\n", - "- narysuj na jednym wykresie te wartości\n", - "\n", - "\n", - "\n", - "\n", - "NAPISZ WNIOSKI\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "author": "Jakub Pokrywka", - "email": "kubapok@wmi.amu.edu.pl", - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "lang": "pl", - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.3" - }, - "subtitle": "0.Informacje na temat przedmiotu[ćwiczenia]", - "title": "Ekstrakcja informacji", - "year": "2021" - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/cw/03_statystyczny_model_językowy.ipynb b/cw/03_statystyczny_model_językowy.ipynb new file mode 100644 index 0000000..9ce2cd4 --- /dev/null +++ b/cw/03_statystyczny_model_językowy.ipynb @@ -0,0 +1,176 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "![Logo 1](https://git.wmi.amu.edu.pl/AITech/Szablon/raw/branch/master/Logotyp_AITech1.jpg)\n", + "
\n", + "

Ekstrakcja informacji

\n", + "

0. Jezyk [ćwiczenia]

\n", + "

Jakub Pokrywka (2022)

\n", + "
\n", + "\n", + "![Logo 2](https://git.wmi.amu.edu.pl/AITech/Szablon/raw/branch/master/Logotyp_AITech2.jpg)" + ] + }, + { + "cell_type": "code", + "execution_count": 278, + "metadata": {}, + "outputs": [], + "source": [ + "NR_INDEKSU = 375985" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "https://web.stanford.edu/~jurafsky/slp3/3.pdf" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [], + "source": [ + "class Model():\n", + " \n", + " def __init__(self, vocab_size=30_000, UNK_token= ''):\n", + " pass\n", + " \n", + " def train(corpus:list) -> None:\n", + " pass\n", + " \n", + " def get_conditional_prob_for_word(text: list, word: str) -> float:\n", + " pass\n", + " \n", + " def get_prob_for_text(text: list) -> float:\n", + " pass\n", + " \n", + " def most_probable_next_word(text:list) -> str:\n", + " 'nie powinien zwracań nigdy '\n", + " pass\n", + " \n", + " def high_probable_next_word(text:list) -> str:\n", + " 'nie powinien zwracań nigdy '\n", + " pass\n", + " \n", + " def generate_text(text_beggining:list, length: int, greedy: bool) -> list:\n", + " 'nie powinien zwracań nigdy '\n", + " pass" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [], + "source": [ + "def get_ppl(text: list) -> float:\n", + " pass" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [], + "source": [ + "def get_entropy(text: list) -> float:\n", + " pass" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "- wybierz tekst w dowolnym języku (10_000_000 słów)\n", + "- podziel zbiór na train/test w proporcji 90/100\n", + "- stworzyć unigramowy model językowy\n", + "- stworzyć bigramowy model językowy\n", + "- stworzyć trigramowy model językowy\n", + "- wymyśl 5 krótkich zdań. Policz ich prawdopodobieństwo\n", + "- napisz włąsnoręcznie funkcję, która liczy perplexity na korpusie i policz perplexity na każdym z modeli dla train i test\n", + "- wygeneruj tekst, zaczynając od wymyślonych 5 początków. Postaraj się, żeby dla obu funkcji, a przynajmniej dla high_probable_next_word teksty były orginalne. Czy wynik będzię sie róźnił dla tekstów np.\n", + "`We sketch how Loomis–Whitney follows from this: Indeed, let X be a uniformly distributed random variable with values` oraz `random variable with values`?\n", + "- stwórz model dla korpusu z ZADANIE 1 i policz perplexity dla każdego z tekstów (zrób split 90/10) dla train i test\n", + "\n", + "- klasyfikacja za pomocą modelu językowego\n", + "- wygładzanie metodą laplace'a" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### START ZADANIA" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### KONIEC ZADANIA" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "- znajdź duży zbiór danych dla klasyfikacji binarnej, wytrenuj osobne modele dla każdej z klas i użyj dla klasyfikacji. Warunkiem zaliczenia jest uzyskanie wyniku większego niż baseline (zwracanie zawsze bardziej licznej klasy)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## WYKONANIE ZADAŃ\n", + "Zgodnie z instrukcją 01_Kodowanie_tekstu.ipynb" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Teoria informacji" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Wygładzanie modeli językowych" + ] + } + ], + "metadata": { + "author": "Jakub Pokrywka", + "email": "kubapok@wmi.amu.edu.pl", + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "lang": "pl", + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.3" + }, + "subtitle": "0.Informacje na temat przedmiotu[ćwiczenia]", + "title": "Ekstrakcja informacji", + "year": "2021" + }, + "nbformat": 4, + "nbformat_minor": 4 +}