forked from filipg/aitech-eks-pub
Merge git.wmi.amu.edu.pl:filipg/aitech-eks
This commit is contained in:
commit
0f34dcdeb4
49
add-metadata.py
Executable file
49
add-metadata.py
Executable file
@ -0,0 +1,49 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
#procedura napisywania plików ipynb (generowanie nagłówka i metadanych)
|
||||||
|
import json
|
||||||
|
import sys
|
||||||
|
import re
|
||||||
|
|
||||||
|
def modjup(filen,numer,tytul,typ,author,email,lang,title,year):
|
||||||
|
zerocell=['![Logo 1](https://git.wmi.amu.edu.pl/AITech/Szablon/raw/branch/master/Logotyp_AITech1.jpg)\n',
|
||||||
|
'<div class="alert alert-block alert-info">\n',
|
||||||
|
'<h1> %s </h1>\n'%(title),
|
||||||
|
'<h2> %s. <i>%s</i> [%s]</h2> \n'%(numer,tytul,typ),
|
||||||
|
'<h3> %s (%s)</h3>\n'%(author,year),
|
||||||
|
'</div>\n',
|
||||||
|
'\n',
|
||||||
|
'![Logo 2](https://git.wmi.amu.edu.pl/AITech/Szablon/raw/branch/master/Logotyp_AITech2.jpg)']
|
||||||
|
zerodict={'cell_type': 'markdown','metadata': {'collapsed': False},'source': zerocell}
|
||||||
|
with open(filen, 'r+',encoding='utf-8') as f:
|
||||||
|
ll=json.load(f)
|
||||||
|
ll["metadata"]["author"]=author
|
||||||
|
ll["metadata"]["email"]=email
|
||||||
|
ll["metadata"]["lang"]=lang
|
||||||
|
subtitle="%s.%s[%s]"%(numer,tytul,typ)
|
||||||
|
ll["metadata"]["subtitle"]=subtitle
|
||||||
|
ll["metadata"]["title"]=title
|
||||||
|
ll["metadata"]["year"]=year
|
||||||
|
|
||||||
|
if not(ll['cells'][0]['source'][0]==zerocell[0]):
|
||||||
|
ll['cells'].insert(0,zerodict)
|
||||||
|
else:
|
||||||
|
ll['cells'][0]=zerodict
|
||||||
|
f.seek(0)
|
||||||
|
json.dump(ll,f,indent=4)
|
||||||
|
|
||||||
|
#zmodyfikuj te dane
|
||||||
|
filen=sys.argv[1]
|
||||||
|
|
||||||
|
numer=re.match(r'^(?:\D+/)?0*(\d+)', filen).group(1)
|
||||||
|
tytul=sys.argv[2]
|
||||||
|
typ="wykład"
|
||||||
|
|
||||||
|
author="Filip Graliński"
|
||||||
|
email="filipg@amu.edu.pl"
|
||||||
|
lang= "pl"
|
||||||
|
title="Ekstrakcja informacji"
|
||||||
|
year="2021"
|
||||||
|
|
||||||
|
#uruchom procedurę
|
||||||
|
modjup(filen,numer,tytul,typ,author,email,lang,title,year)
|
7
convert_ipynb_to_md.sh
Normal file
7
convert_ipynb_to_md.sh
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
set -ex
|
||||||
|
|
||||||
|
FILEIPYNB=$1
|
||||||
|
jupyter nbconvert --to script $1
|
||||||
|
FILEPY=$(echo $FILEIPYNB | sed 's/.ipynb$/.py/')
|
||||||
|
FILEMD=$(echo $FILEIPYNB | sed 's/.ipynb$/.md/')
|
||||||
|
python convert_python_to_markdown.py "$FILEPY" "$FILEMD"
|
13
convert_python_to_markdown.py
Normal file
13
convert_python_to_markdown.py
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
import sys
|
||||||
|
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
from markdown import markdown
|
||||||
|
|
||||||
|
with open(sys.argv[1]) as f_in, open(sys.argv[2],'w') as f_out:
|
||||||
|
for i, line in enumerate(f_in):
|
||||||
|
if i in (1,2):
|
||||||
|
continue
|
||||||
|
if line[:2] == "# " and line[:5] != "# In[":
|
||||||
|
text = line[:2]
|
||||||
|
f_out.write(line[2:])
|
||||||
|
|
@ -1,90 +1,112 @@
|
|||||||
{
|
{
|
||||||
"cells": [
|
"cells": [
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {
|
||||||
"source": [
|
"collapsed": false
|
||||||
"# Informacje ogólne"
|
},
|
||||||
]
|
"source": [
|
||||||
},
|
"![Logo 1](https://git.wmi.amu.edu.pl/AITech/Szablon/raw/branch/master/Logotyp_AITech1.jpg)\n",
|
||||||
{
|
"<div class=\"alert alert-block alert-info\">\n",
|
||||||
"cell_type": "markdown",
|
"<h1> Ekstrakcja informacji </h1>\n",
|
||||||
"metadata": {},
|
"<h2> 0. <i>Informacje na temat przedmiotu</i> [\u0107wiczenia]</h2> \n",
|
||||||
"source": [
|
"<h3> Jakub Pokrywka (2021)</h3>\n",
|
||||||
"## Kontakt z prowadzącym\n",
|
"</div>\n",
|
||||||
"\n",
|
"\n",
|
||||||
"prowadzący: mgr inż. Jakub Pokrywka\n",
|
"![Logo 2](https://git.wmi.amu.edu.pl/AITech/Szablon/raw/branch/master/Logotyp_AITech2.jpg)"
|
||||||
"\n",
|
]
|
||||||
"Najlepiej kontaktowąć się ze mną przez MS TEAMS na grupie kanału (ogólne sprawy) lub w prywatnych wiadomościach. Odpisuję co 2-3 dni. Można też umówić się na zdzwonko w godzinach dyżuru (wt 12.00-13.00) lub umówić się w innym terminie.\n",
|
},
|
||||||
"\n",
|
{
|
||||||
"\n",
|
"cell_type": "markdown",
|
||||||
"## Literatura\n",
|
"metadata": {},
|
||||||
"Polecana literatura do przedmiotu:\n",
|
"source": [
|
||||||
"\n",
|
"# Informacje og\u00f3lne"
|
||||||
"\n",
|
]
|
||||||
"- https://www.manning.com/books/relevant-search#toc (darmowa) Polecam chociaż przejrzeć.\n",
|
},
|
||||||
"- Marie-Francine Moens. 2006. Information Extraction: Algorithms and Prospects in a Retrieval Context. Springer. (polecam mniej, jest trochę nieaktualna)\n",
|
{
|
||||||
"- Alex Graves. 2012. Supervised sequence labelling. Studies in Computational Intelligence, vol 385. Springer. Berlin, Heidelberg. \n",
|
"cell_type": "markdown",
|
||||||
"\n",
|
"metadata": {},
|
||||||
"- Jacob Devlin, Ming-Wei Chang, Kenton Lee, Kristina Toutanova. 2019. BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding. North American Association for Computational Linguistics (NAACL). \n",
|
"source": [
|
||||||
"\n",
|
"## Kontakt z prowadz\u0105cym\n",
|
||||||
"- Colin Raffel, Noam Shazeer, Adam Roberts, Katherine Lee, Sharan Narang, Michael Matena, Yanqi Zhou, Wei Li, Peter J. Liu. 2020. Exploring the Limits of Transfer Learning with a Unified Text-to-Text Transformer. Journal of Machine Learning Research vol 21, number 140, pages 1-67. \n",
|
"\n",
|
||||||
"\n",
|
"prowadz\u0105cy: mgr in\u017c. Jakub Pokrywka\n",
|
||||||
"- Flip Graliński, Tomasz Stanisławek, Anna Wróblewska, Dawid Lipiński, Agnieszka Kaliska, Paulina Rosalska, Bartosz Topolski, Przemysław Biecek. 2020. Kleister: A novel task for information extraction involving long documents with complex layout. URL https://arxiv.org/abs/2003.02356 \n",
|
"\n",
|
||||||
"\n",
|
"Najlepiej kontaktow\u0105\u0107 si\u0119 ze mn\u0105 przez MS TEAMS na grupie kana\u0142u (og\u00f3lne sprawy) lub w prywatnych wiadomo\u015bciach. Odpisuj\u0119 co 2-3 dni. Mo\u017cna te\u017c um\u00f3wi\u0107 si\u0119 na zdzwonko w godzinach dy\u017curu (wt 12.00-13.00) lub um\u00f3wi\u0107 si\u0119 w innym terminie.\n",
|
||||||
"- Łukasz Garncarek, Rafał Powalski, Tomasz Stanisławek, Bartosz Topolski, Piotr Halama, Filip Graliński. 2020. LAMBERT: Layout-Aware (Language) Modeling using BERT. URL https://arxiv.org/pdf/2002.08087 \n",
|
"\n",
|
||||||
"\n",
|
"\n",
|
||||||
"## Zaliczenie\n",
|
"## Literatura\n",
|
||||||
"\n",
|
"Polecana literatura do przedmiotu:\n",
|
||||||
"\n",
|
"\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Do zdobycia będzie conajmniej 600 punktów.\n",
|
"- https://www.manning.com/books/relevant-search#toc (darmowa) Polecam chocia\u017c przejrze\u0107.\n",
|
||||||
"\n",
|
"- Marie-Francine Moens. 2006. Information Extraction: Algorithms and Prospects in a Retrieval Context. Springer. (polecam mniej, jest troch\u0119 nieaktualna)\n",
|
||||||
"Ocena:\n",
|
"- Alex Graves. 2012. Supervised sequence labelling. Studies in Computational Intelligence, vol 385. Springer. Berlin, Heidelberg. \n",
|
||||||
"\n",
|
"\n",
|
||||||
"- -299 — 2\n",
|
"- Jacob Devlin, Ming-Wei Chang, Kenton Lee, Kristina Toutanova. 2019. BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding. North American Association for Computational Linguistics (NAACL). \n",
|
||||||
"\n",
|
"\n",
|
||||||
"- 300-349 — 3\n",
|
"- Colin Raffel, Noam Shazeer, Adam Roberts, Katherine Lee, Sharan Narang, Michael Matena, Yanqi Zhou, Wei Li, Peter J. Liu. 2020. Exploring the Limits of Transfer Learning with a Unified Text-to-Text Transformer. Journal of Machine Learning Research vol 21, number 140, pages 1-67. \n",
|
||||||
"\n",
|
"\n",
|
||||||
"- 350-399 — 3+\n",
|
"- Flip Grali\u0144ski, Tomasz Stanis\u0142awek, Anna Wr\u00f3blewska, Dawid Lipi\u0144ski, Agnieszka Kaliska, Paulina Rosalska, Bartosz Topolski, Przemys\u0142aw Biecek. 2020. Kleister: A novel task for information extraction involving long documents with complex layout. URL https://arxiv.org/abs/2003.02356 \n",
|
||||||
"\n",
|
"\n",
|
||||||
"- 400-449 — 4\n",
|
"- \u0141ukasz Garncarek, Rafa\u0142 Powalski, Tomasz Stanis\u0142awek, Bartosz Topolski, Piotr Halama, Filip Grali\u0144ski. 2020. LAMBERT: Layout-Aware (Language) Modeling using BERT. URL https://arxiv.org/pdf/2002.08087 \n",
|
||||||
"\n",
|
"\n",
|
||||||
"- 450—499 — 4+\n",
|
"## Zaliczenie\n",
|
||||||
"\n",
|
"\n",
|
||||||
"- 500- — 5\n",
|
"\n",
|
||||||
"\n",
|
"\n",
|
||||||
"\n",
|
"Do zdobycia b\u0119dzie conajmniej 600 punkt\u00f3w.\n",
|
||||||
"**Żeby zaliczyć przedmiot należy pojawiać się na laboratoriach. Maksymalna liczba nieobecności to 3. Obecność będę sprawdzał poprzez panel MS TEAMS, czyli będę sprawdzał czy ktoś jest wdzwoniony na ćwiczenia. Jeżeli kogoś nie będzie więcej niż 3 razy, to nie będzie miał zaliczonego przedmiotu** \n"
|
"\n",
|
||||||
]
|
"Ocena:\n",
|
||||||
},
|
"\n",
|
||||||
{
|
"- -299 \u2014 2\n",
|
||||||
"cell_type": "code",
|
"\n",
|
||||||
"execution_count": null,
|
"- 300-349 \u2014 3\n",
|
||||||
"metadata": {},
|
"\n",
|
||||||
"outputs": [],
|
"- 350-399 \u2014 3+\n",
|
||||||
"source": []
|
"\n",
|
||||||
}
|
"- 400-449 \u2014 4\n",
|
||||||
],
|
"\n",
|
||||||
"metadata": {
|
"- 450\u2014499 \u2014 4+\n",
|
||||||
"kernelspec": {
|
"\n",
|
||||||
"display_name": "Python 3",
|
"- 500- \u2014 5\n",
|
||||||
"language": "python",
|
"\n",
|
||||||
"name": "python3"
|
"\n",
|
||||||
},
|
"**\u017beby zaliczy\u0107 przedmiot nale\u017cy pojawia\u0107 si\u0119 na laboratoriach. Maksymalna liczba nieobecno\u015bci to 3. Obecno\u015b\u0107 b\u0119d\u0119 sprawdza\u0142 poprzez panel MS TEAMS, czyli b\u0119d\u0119 sprawdza\u0142 czy kto\u015b jest wdzwoniony na \u0107wiczenia. Je\u017celi kogo\u015b nie b\u0119dzie wi\u0119cej ni\u017c 3 razy, to nie b\u0119dzie mia\u0142 zaliczonego przedmiotu** \n"
|
||||||
"language_info": {
|
]
|
||||||
"codemirror_mode": {
|
},
|
||||||
"name": "ipython",
|
{
|
||||||
"version": 3
|
"cell_type": "code",
|
||||||
},
|
"execution_count": null,
|
||||||
"file_extension": ".py",
|
"metadata": {},
|
||||||
"mimetype": "text/x-python",
|
"outputs": [],
|
||||||
"name": "python",
|
"source": []
|
||||||
"nbconvert_exporter": "python",
|
}
|
||||||
"pygments_lexer": "ipython3",
|
],
|
||||||
"version": "3.8.3"
|
"metadata": {
|
||||||
}
|
"kernelspec": {
|
||||||
},
|
"display_name": "Python 3",
|
||||||
"nbformat": 4,
|
"language": "python",
|
||||||
"nbformat_minor": 4
|
"name": "python3"
|
||||||
}
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.8.3"
|
||||||
|
},
|
||||||
|
"author": "Jakub Pokrywka",
|
||||||
|
"email": "kubapok@wmi.amu.edu.pl",
|
||||||
|
"lang": "pl",
|
||||||
|
"subtitle": "0.Informacje na temat przedmiotu[\u0107wiczenia]",
|
||||||
|
"title": "Ekstrakcja informacji",
|
||||||
|
"year": "2021"
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 4
|
||||||
|
}
|
@ -1,181 +0,0 @@
|
|||||||
{
|
|
||||||
"cells": [
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"Opracować w języku Haskell wyspecjalizowanego robota pobierającego dane z konkretnego serwisu.\n",
|
|
||||||
"\n",
|
|
||||||
"Punkty: 80 (domyślnie - niektóre zadanie są trudniejsze, wówczas podaję osobno liczbę punktów)\n",
|
|
||||||
"\n",
|
|
||||||
"Ogólne zasady:\n",
|
|
||||||
"\n",
|
|
||||||
"* pobieramy informacje (metadane) o plikach PDF, DjVU, JPG itp, ale nie same pliki,\n",
|
|
||||||
"* nie pobierajmy całego serwisu, tylko tyle, ile trzeba, by pobrać metadane o interesujących nas zasobach,\n",
|
|
||||||
"* interesują nas tylko teksty polskie, jeśli nie jest to trudne, należy odfiltrować publikacje obcojęzyczne,\n",
|
|
||||||
"* staramy się ustalać datę z możliwie dużą dokładnością.\n",
|
|
||||||
"\n",
|
|
||||||
"Sposób pracy:\n",
|
|
||||||
"\n",
|
|
||||||
"0. Pobrać Haskell Stack\n",
|
|
||||||
"\n",
|
|
||||||
"~~~\n",
|
|
||||||
"curl -sSL https://get.haskellstack.org/ | sh -s - -d ~/bin\n",
|
|
||||||
"~~~\n",
|
|
||||||
"\n",
|
|
||||||
"Na fizycznych komputerach wydziałowych są błędnie ustawione prawa dostępu na dyskach sieciowych, Haskell Stack musi działać na fizycznym dysku:\n",
|
|
||||||
"\n",
|
|
||||||
"~~~\n",
|
|
||||||
"rm -rf /mnt/poligon/.stack\n",
|
|
||||||
"mkdir /mnt/poligon/.stack\n",
|
|
||||||
"mv ~/.stack ~/.stack-bak # gdyby już był... proszę się nie przejmować błędem\n",
|
|
||||||
"ln -s /mnt/poligon/.stack ~/.stack\n",
|
|
||||||
"~~~\n",
|
|
||||||
"\n",
|
|
||||||
"1. Pobrać repozytorium:\n",
|
|
||||||
"\n",
|
|
||||||
"~~~\n",
|
|
||||||
"git clone https://git.wmi.amu.edu.pl/filipg/twilight-library.git\n",
|
|
||||||
"~~~\n",
|
|
||||||
"\n",
|
|
||||||
"2. Wypchnąć na początek do swojego repozytorium (trzeba sobie najpierw założyć to repozytorium na <https://git.wmi.amu.edu.pl>)\n",
|
|
||||||
"\n",
|
|
||||||
"~~~\n",
|
|
||||||
"cd twilight-library\n",
|
|
||||||
"git remote set-url origin git@git.wmi.amu.edu.pl:YOURID/twilight-library\n",
|
|
||||||
"git push origin master\n",
|
|
||||||
"git remote add mother git://gonito.net/twilight-library\n",
|
|
||||||
"~~~\n",
|
|
||||||
"\n",
|
|
||||||
"3. Zobacz, czy przykładowy robot dla strony z „Alamanachem Muszyny” działa:\n",
|
|
||||||
"\n",
|
|
||||||
"~~~\n",
|
|
||||||
"~/bin/stack install # może trwać długo za pierwszym razem\n",
|
|
||||||
"~/bin/stack exec almanachmuszyny\n",
|
|
||||||
"~~~\n",
|
|
||||||
"\n",
|
|
||||||
"\n",
|
|
||||||
"W razie problemów z instalacją:\n",
|
|
||||||
"\n",
|
|
||||||
"~~~\n",
|
|
||||||
"sudo apt install libpcre3 libpcre3-dev\n",
|
|
||||||
"~~~\n",
|
|
||||||
"\n",
|
|
||||||
"3. Opracuj swojego robota wzorując się na pliku `almanachmuszyny.hs`.\n",
|
|
||||||
" (Ale dodaj swój plik, nie zmieniaj `almanachmuszyny.hs`!)\n",
|
|
||||||
"\n",
|
|
||||||
"4. Dopisz specyfikację swojego robota do `shadow-library.cabal`.\n",
|
|
||||||
"\n",
|
|
||||||
"5. Pracuj nad swoim robotem, uruchamiaj go w następujący sposób:\n",
|
|
||||||
"\n",
|
|
||||||
"~~~\n",
|
|
||||||
"~/bin/stack install\n",
|
|
||||||
"~/bin/stack exec mojrobot\n",
|
|
||||||
"~~~\n",
|
|
||||||
"\n",
|
|
||||||
"(Tzn. nie nazywaj go „mojrobot”, tylko użyj jakieś sensownej nazwy.)\n",
|
|
||||||
"\n",
|
|
||||||
"6. Jeśli publikacja (np. pojedynczy numer gazety) składa się z wielu plików, powinien zostać wygenerowany jeden\n",
|
|
||||||
"rekord, w `finalUrl` powinny znaleźć się URL do poszczególnych stron (np. plików JPR) oddzielone ` // `.\n",
|
|
||||||
"\n",
|
|
||||||
"7. Po zakończeniu prac prześlij mejla do prowadzącego zajęcia z URL-em do swojego repozytorium.\n",
|
|
||||||
"\n",
|
|
||||||
"Lista serwisów do wyboru (na każdy serwis 1 osoba):\n",
|
|
||||||
"\n",
|
|
||||||
"1. [Teksty Drugie](http://tekstydrugie.pl)\n",
|
|
||||||
"2. [Archiwum Inspektora Pracy](https://www.pip.gov.pl/pl/inspektor-pracy/66546,archiwum-inspektora-pracy-.html)\n",
|
|
||||||
"3. [Medycyna Weterynaryjna](http://www.medycynawet.edu.pl/archives) — również historyczne zasoby od 1945 roku, **120 punktów**\n",
|
|
||||||
"4. [Polskie Towarzystwo Botaniczne](https://pbsociety.org.pl/default/dzialalnosc-wydawnicza/) — wszystkie dostępne zdigitalizowane publikacje!, **130 punktow**\n",
|
|
||||||
"5. [Wieści Pepowa](http://archiwum2019.pepowo.pl/news/c-10/gazeta) — nie pominąć strony nr 2 z wynikami, **110 punktów**\n",
|
|
||||||
"6. [Czasopismo Kosmos](http://kosmos.icm.edu.pl/)\n",
|
|
||||||
"7. [Czasopismo Wszechświat](http://www.ptpk.org/archiwum.html)\n",
|
|
||||||
"8. [Czasopisma polonijne we Francji](https://argonnaute.parisnanterre.fr/ark:/14707/a011403267917yQQFAS) — najlepiej w postaci PDF-ów, jak np. [https://argonnaute.parisnanterre.fr/medias/customer_3/periodique/immi_pol_lotmz1_pdf/BDIC_GFP_2929_1945_039.pdf](), **220 punktów**\n",
|
|
||||||
"9. [Muzeum Sztuki — czasopisma](https://zasoby.msl.org.pl/mobjects/show), **220 punktów**, publikacje, teksty, czasopisma, wycinki\n",
|
|
||||||
"10. [Wiadomości Urzędu Patentowego](https://grab.uprp.pl/sites/Wydawnictwa/WydawnictwaArchiwum/WydawnictwaArchiwum/Forms/AllItems.aspx)\n",
|
|
||||||
"11. [Czas, czasopismo polonijne](https://digitalcollections.lib.umanitoba.ca/islandora/object/uofm:2222545), **140 punktów** S.G.\n",
|
|
||||||
"12. [Stenogramy Okrągłego Stołu](http://okragly-stol.pl/stenogramy/), **110 punktów**\n",
|
|
||||||
"13. [Nasze Popowice](https://smpopowice.pl/index.php/numery-archiwalne)\n",
|
|
||||||
"14. [Czasopisma entomologiczne](http://pte.au.poznan.pl/)\n",
|
|
||||||
"15. [Wiadomości matematyczne](https://wydawnictwa.ptm.org.pl/index.php/wiadomosci-matematyczne/issue/archive?issuesPage=2), **120 punktow**\n",
|
|
||||||
"16. [Alkoholizm i Narkomania](http://www.ain.ipin.edu.pl/archiwum-starsze.html)\n",
|
|
||||||
"17. [Czasopismo Etyka](https://etyka.uw.edu.pl/tag/etyka-562018/), O.K.\n",
|
|
||||||
"18. [Skup makulatury](https://chomikuj.pl/skup.makulatury.prl), **250 punktów**\n",
|
|
||||||
"19. [Hermes](https://chomikuj.pl/hermes50-1) i https://chomikuj.pl/hermes50-2, **250 punktów**\n",
|
|
||||||
"20. [E-dziennik Województwa Mazowieckiego](https://edziennik.mazowieckie.pl/actbymonths) **150 punktów**\n",
|
|
||||||
"21. [Czasopismo Węgiel Brunatny](http://www.ppwb.org.pl/wegiel_brunatny)\n",
|
|
||||||
"22. [Gazeta GUM](https://gazeta.gumed.edu.pl/61323.html)\n",
|
|
||||||
"23. [Nowiny Andrychowskie](https://radioandrychow.pl/nowiny/)\n",
|
|
||||||
"24. [Kawęczyniak](http://bip.kaweczyn.pl/kaweczyn/pl/dla-mieszkanca/publikacje/archiwalne-numery-kaweczyniaka-rok-1995-2005/kaweczyniaki-rok-1997.html)\n",
|
|
||||||
"25. [Zbór Chrześcijański w Bielawia](http://zborbielawa.pl/archiwum/)\n",
|
|
||||||
"26. [Gazeta Rytwiańska](http://www.rytwiany.com.pl/index.php?sid=5)\n",
|
|
||||||
"27. [Nasze Popowice](https://smpopowice.pl/gazeta/2005_12_nasze-popowice-nr_01.pdf)\n",
|
|
||||||
"28. [Echo Chełmka](http://moksir.chelmek.pl/o-nas/echo-chelmka)\n",
|
|
||||||
"29. [Głos Świdnika](http://s.bibliotekaswidnik.pl/index.php/archwium/116-glos-swidnika) **100 punktów**\n",
|
|
||||||
"30. [Aneks](https://aneks.kulturaliberalna.pl/archiwum-aneksu/) **90 punktów**\n",
|
|
||||||
"31. [Teatr Lalel](http://polunima.pl/teatr-lalek)\n",
|
|
||||||
"32. [Biuletyn Bezpieczna Chemia](https://www.pipc.org.pl/publikacje/biuletyn-bezpieczna-chemia)\n",
|
|
||||||
"33. [Głos Maszynisty](https://zzm.org.pl/glos-maszynisty/)\n",
|
|
||||||
"34. [Kultura Paryska](https://www.kulturaparyska.com/pl/index), całe archiwum z książkami i innymi czasopismami, **180 punktów**\n",
|
|
||||||
"35. [Gazeta Fabryczna - Kraśnik](https://80lat.flt.krasnik.pl/index.php/gazeta-fabryczna/) **120 punktów**\n",
|
|
||||||
"36. [Artykuły o Jujutsu](http://www.kobudo.pl/artykuly_jujutsu.html)\n",
|
|
||||||
"37. [Wycinki o Taekwon-Do](https://www2.pztkd.lublin.pl/archpras.html#z1996)\n",
|
|
||||||
"38. [Materiały o kolejnictwie](https://enkol.pl/Strona_g%C5%82%C3%B3wna) **180 punktów**\n",
|
|
||||||
"39. [Centralny Instytut Ochrony Pracy](http://archiwum.ciop.pl/), znaleźć wszystkie publikacje typu <http://archiwum.ciop.pl/44938>, wymaga trochę sprytu **130 punktów**\n",
|
|
||||||
"40. [Biblioteka Sejmowa - Zasoby Cyfrowe](https://biblioteka.sejm.gov.pl/zasoby_cyfrowe/), **200 punktów**\n",
|
|
||||||
"41. [Elektronika Praktyczna](https://ep.com.pl/archiwum), te numery, które dostępne w otwarty sposób, np. rok 1993\n",
|
|
||||||
"42. [Litewska Akademia Nauk](http://www.mab.lt/), tylko materiały w jęz. polskim, takie jak np.\n",
|
|
||||||
" <https://elibrary.mab.lt/handle/1/840>, **170 punktów**\n",
|
|
||||||
"43. [Litewska Biblioteka Cyfrowa](https://www.epaveldas.lt), wyłuskać tylko materiały w jęz. polskim, **190 punktów**\n",
|
|
||||||
"44. [Czasopisma Geologiczne](https://geojournals.pgi.gov.pl), **120 punktów**\n",
|
|
||||||
"45. [Czasopisma PTTK](https://www.czasopisma.centralnabibliotekapttk.pl/index.php?i3), **120 punktów**\n",
|
|
||||||
"46. [Czasopisma Polskiego Towarzystwa Dendrologicznego](https://www.ptd.pl/?page_id=7), **100 punktów**\n",
|
|
||||||
"47. [Kilka przedwojennych książek](https://dziemiela.com/documents.htm)\n",
|
|
||||||
"48. [Historia polskiej informatyki](http://klio.spit.iq.pl/a4-wyroby-polskiej-informatyki/a4-2-sprzet/) - wyjątkowo bez datowania\n",
|
|
||||||
"49. [Zeszyty Formacyjne Katolickiego Stowarzyszenia „Civitas Christania”](http://podkarpacki.civitaschristiana.pl/formacja/zeszyty-formacyjne/), tylko niektóre pliki można zdatować\n",
|
|
||||||
"50. [Józef Piłsudski Institute of America](https://archiwa.pilsudski.org/) - **220 punktów**\n",
|
|
||||||
"51. [Prasa podziemna — Częstochowa](http://www.podziemie.com.pl), również ulotki i inne materiały skanowane - **180 punktów**\n",
|
|
||||||
"52. [Tajemnica Atari](http://krap.pl/mirrorz/atari/horror.mirage.com.pl/pixel/), plik ZIP z DjVu\n",
|
|
||||||
"\n",
|
|
||||||
"\n",
|
|
||||||
"### F.A.Q.\n",
|
|
||||||
"\n",
|
|
||||||
"**P: Nie działają strony z protokołem https, co zrobić?**\n",
|
|
||||||
"\n",
|
|
||||||
"O: Trzeba użyć modułu opartego na bibliotece curl. Paczka Ubuntu została zainstalowana na komputerach wydziałowych. Na\n",
|
|
||||||
"swoim komputerze możemy zainstalować paczkę libcurl4-openssl-dev, a\n",
|
|
||||||
"następnie można sobie ściągnąć wersję twilight-library opartą na libcurl:\n",
|
|
||||||
"\n",
|
|
||||||
" git fetch git://gonito.net/twilight-library withcurl\n",
|
|
||||||
" git merge FETCH_HEAD\n"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": []
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"metadata": {
|
|
||||||
"kernelspec": {
|
|
||||||
"display_name": "Python 3",
|
|
||||||
"language": "python",
|
|
||||||
"name": "python3"
|
|
||||||
},
|
|
||||||
"language_info": {
|
|
||||||
"codemirror_mode": {
|
|
||||||
"name": "ipython",
|
|
||||||
"version": 3
|
|
||||||
},
|
|
||||||
"file_extension": ".py",
|
|
||||||
"mimetype": "text/x-python",
|
|
||||||
"name": "python",
|
|
||||||
"nbconvert_exporter": "python",
|
|
||||||
"pygments_lexer": "ipython3",
|
|
||||||
"version": "3.9.1"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"nbformat": 4,
|
|
||||||
"nbformat_minor": 4
|
|
||||||
}
|
|
@ -1,5 +1,19 @@
|
|||||||
{
|
{
|
||||||
"cells": [
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"![Logo 1](https://git.wmi.amu.edu.pl/AITech/Szablon/raw/branch/master/Logotyp_AITech1.jpg)\n",
|
||||||
|
"<div class=\"alert alert-block alert-info\">\n",
|
||||||
|
"<h1> Ekstrakcja informacji </h1>\n",
|
||||||
|
"<h2> 1. <i>Wyszukiwarki wprowadzenie</i> [ćwiczenia]</h2> \n",
|
||||||
|
"<h3> Jakub Pokrywka (2021)</h3>\n",
|
||||||
|
"</div>\n",
|
||||||
|
"\n",
|
||||||
|
"![Logo 2](https://git.wmi.amu.edu.pl/AITech/Szablon/raw/branch/master/Logotyp_AITech2.jpg)"
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
@ -234,11 +248,14 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
|
"author": "Jakub Pokrywka",
|
||||||
|
"email": "kubapok@wmi.amu.edu.pl",
|
||||||
"kernelspec": {
|
"kernelspec": {
|
||||||
"display_name": "Python 3",
|
"display_name": "Python 3",
|
||||||
"language": "python",
|
"language": "python",
|
||||||
"name": "python3"
|
"name": "python3"
|
||||||
},
|
},
|
||||||
|
"lang": "pl",
|
||||||
"language_info": {
|
"language_info": {
|
||||||
"codemirror_mode": {
|
"codemirror_mode": {
|
||||||
"name": "ipython",
|
"name": "ipython",
|
||||||
@ -249,8 +266,11 @@
|
|||||||
"name": "python",
|
"name": "python",
|
||||||
"nbconvert_exporter": "python",
|
"nbconvert_exporter": "python",
|
||||||
"pygments_lexer": "ipython3",
|
"pygments_lexer": "ipython3",
|
||||||
"version": "3.8.5"
|
"version": "3.8.3"
|
||||||
}
|
},
|
||||||
|
"subtitle": "1.Wyszukiwarki wprowadzenie[ćwiczenia]",
|
||||||
|
"title": "Ekstrakcja informacji",
|
||||||
|
"year": "2021"
|
||||||
},
|
},
|
||||||
"nbformat": 4,
|
"nbformat": 4,
|
||||||
"nbformat_minor": 4
|
"nbformat_minor": 4
|
||||||
|
@ -1,5 +1,19 @@
|
|||||||
{
|
{
|
||||||
"cells": [
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"![Logo 1](https://git.wmi.amu.edu.pl/AITech/Szablon/raw/branch/master/Logotyp_AITech1.jpg)\n",
|
||||||
|
"<div class=\"alert alert-block alert-info\">\n",
|
||||||
|
"<h1> Ekstrakcja informacji </h1>\n",
|
||||||
|
"<h2> 2. <i>Wyszukiwarki roboty</i> [ćwiczenia]</h2> \n",
|
||||||
|
"<h3> Jakub Pokrywka (2021)</h3>\n",
|
||||||
|
"</div>\n",
|
||||||
|
"\n",
|
||||||
|
"![Logo 2](https://git.wmi.amu.edu.pl/AITech/Szablon/raw/branch/master/Logotyp_AITech2.jpg)"
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
@ -272,11 +286,14 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
|
"author": "Jakub Pokrywka",
|
||||||
|
"email": "kubapok@wmi.amu.edu.pl",
|
||||||
"kernelspec": {
|
"kernelspec": {
|
||||||
"display_name": "Python 3",
|
"display_name": "Python 3",
|
||||||
"language": "python",
|
"language": "python",
|
||||||
"name": "python3"
|
"name": "python3"
|
||||||
},
|
},
|
||||||
|
"lang": "pl",
|
||||||
"language_info": {
|
"language_info": {
|
||||||
"codemirror_mode": {
|
"codemirror_mode": {
|
||||||
"name": "ipython",
|
"name": "ipython",
|
||||||
@ -287,8 +304,11 @@
|
|||||||
"name": "python",
|
"name": "python",
|
||||||
"nbconvert_exporter": "python",
|
"nbconvert_exporter": "python",
|
||||||
"pygments_lexer": "ipython3",
|
"pygments_lexer": "ipython3",
|
||||||
"version": "3.9.1"
|
"version": "3.8.3"
|
||||||
}
|
},
|
||||||
|
"subtitle": "2.Wyszukiwarki roboty[ćwiczenia]",
|
||||||
|
"title": "Ekstrakcja informacji",
|
||||||
|
"year": "2021"
|
||||||
},
|
},
|
||||||
"nbformat": 4,
|
"nbformat": 4,
|
||||||
"nbformat_minor": 4
|
"nbformat_minor": 4
|
||||||
|
1120
cw/03a_tfidf.ipynb
Normal file
1120
cw/03a_tfidf.ipynb
Normal file
File diff suppressed because it is too large
Load Diff
91
cw/03a_tfidf_ODPOWIEDZI.ipynb
Normal file
91
cw/03a_tfidf_ODPOWIEDZI.ipynb
Normal file
@ -0,0 +1,91 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {
|
||||||
|
"collapsed": false
|
||||||
|
},
|
||||||
|
"source": [
|
||||||
|
"![Logo 1](https://git.wmi.amu.edu.pl/AITech/Szablon/raw/branch/master/Logotyp_AITech1.jpg)\n",
|
||||||
|
"<div class=\"alert alert-block alert-info\">\n",
|
||||||
|
"<h1> Ekstrakcja informacji </h1>\n",
|
||||||
|
"<h2> 3. <i>tfidf (1)</i> [\u0107wiczenia]</h2> \n",
|
||||||
|
"<h3> Jakub Pokrywka (2021)</h3>\n",
|
||||||
|
"</div>\n",
|
||||||
|
"\n",
|
||||||
|
"![Logo 2](https://git.wmi.amu.edu.pl/AITech/Szablon/raw/branch/master/Logotyp_AITech2.jpg)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 1,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"def word_to_index(word):\n",
|
||||||
|
" vec = np.zeros(len(vocabulary))\n",
|
||||||
|
" if word in vocabulary:\n",
|
||||||
|
" idx = vocabulary.index(word)\n",
|
||||||
|
" vec[idx] = 1\n",
|
||||||
|
" else:\n",
|
||||||
|
" vec[-1] = 1\n",
|
||||||
|
" return vec"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"def tf(document):\n",
|
||||||
|
" document_vector = None\n",
|
||||||
|
" for word in document:\n",
|
||||||
|
" if document_vector is None:\n",
|
||||||
|
" document_vector = word_to_index(word)\n",
|
||||||
|
" else:\n",
|
||||||
|
" document_vector += word_to_index(word)\n",
|
||||||
|
" return document_vector"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"def similarity(query, document):\n",
|
||||||
|
" numerator = np.sum(query * document)\n",
|
||||||
|
" denominator = np.sqrt(np.sum(query*query)) * np.sqrt(np.sum(document*document)) \n",
|
||||||
|
" return numerator / denominator"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.8.3"
|
||||||
|
},
|
||||||
|
"author": "Jakub Pokrywka",
|
||||||
|
"email": "kubapok@wmi.amu.edu.pl",
|
||||||
|
"lang": "pl",
|
||||||
|
"subtitle": "3.tfidf (1)[\u0107wiczenia]",
|
||||||
|
"title": "Ekstrakcja informacji",
|
||||||
|
"year": "2021"
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 4
|
||||||
|
}
|
File diff suppressed because it is too large
Load Diff
@ -1,69 +0,0 @@
|
|||||||
{
|
|
||||||
"cells": [
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 1,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"def word_to_index(word):\n",
|
|
||||||
" vec = np.zeros(len(vocabulary))\n",
|
|
||||||
" if word in vocabulary:\n",
|
|
||||||
" idx = vocabulary.index(word)\n",
|
|
||||||
" vec[idx] = 1\n",
|
|
||||||
" else:\n",
|
|
||||||
" vec[-1] = 1\n",
|
|
||||||
" return vec"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"def tf(document):\n",
|
|
||||||
" document_vector = None\n",
|
|
||||||
" for word in document:\n",
|
|
||||||
" if document_vector is None:\n",
|
|
||||||
" document_vector = word_to_index(word)\n",
|
|
||||||
" else:\n",
|
|
||||||
" document_vector += word_to_index(word)\n",
|
|
||||||
" return document_vector"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"def similarity(query, document):\n",
|
|
||||||
" numerator = np.sum(query * document)\n",
|
|
||||||
" denominator = np.sqrt(np.sum(query*query)) * np.sqrt(np.sum(document*document)) \n",
|
|
||||||
" return numerator / denominator"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"metadata": {
|
|
||||||
"kernelspec": {
|
|
||||||
"display_name": "Python 3",
|
|
||||||
"language": "python",
|
|
||||||
"name": "python3"
|
|
||||||
},
|
|
||||||
"language_info": {
|
|
||||||
"codemirror_mode": {
|
|
||||||
"name": "ipython",
|
|
||||||
"version": 3
|
|
||||||
},
|
|
||||||
"file_extension": ".py",
|
|
||||||
"mimetype": "text/x-python",
|
|
||||||
"name": "python",
|
|
||||||
"nbconvert_exporter": "python",
|
|
||||||
"pygments_lexer": "ipython3",
|
|
||||||
"version": "3.8.3"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"nbformat": 4,
|
|
||||||
"nbformat_minor": 4
|
|
||||||
}
|
|
File diff suppressed because it is too large
Load Diff
@ -1,5 +1,19 @@
|
|||||||
{
|
{
|
||||||
"cells": [
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"![Logo 1](https://git.wmi.amu.edu.pl/AITech/Szablon/raw/branch/master/Logotyp_AITech1.jpg)\n",
|
||||||
|
"<div class=\"alert alert-block alert-info\">\n",
|
||||||
|
"<h1> Ekstrakcja informacji </h1>\n",
|
||||||
|
"<h2> 4. <i>Wyszukiwarki</i> [ćwiczenia]</h2> \n",
|
||||||
|
"<h3> Jakub Pokrywka (2021)</h3>\n",
|
||||||
|
"</div>\n",
|
||||||
|
"\n",
|
||||||
|
"![Logo 2](https://git.wmi.amu.edu.pl/AITech/Szablon/raw/branch/master/Logotyp_AITech2.jpg)"
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
@ -81,11 +95,14 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
|
"author": "Jakub Pokrywka",
|
||||||
|
"email": "kubapok@wmi.amu.edu.pl",
|
||||||
"kernelspec": {
|
"kernelspec": {
|
||||||
"display_name": "Python 3",
|
"display_name": "Python 3",
|
||||||
"language": "python",
|
"language": "python",
|
||||||
"name": "python3"
|
"name": "python3"
|
||||||
},
|
},
|
||||||
|
"lang": "pl",
|
||||||
"language_info": {
|
"language_info": {
|
||||||
"codemirror_mode": {
|
"codemirror_mode": {
|
||||||
"name": "ipython",
|
"name": "ipython",
|
||||||
@ -96,8 +113,11 @@
|
|||||||
"name": "python",
|
"name": "python",
|
||||||
"nbconvert_exporter": "python",
|
"nbconvert_exporter": "python",
|
||||||
"pygments_lexer": "ipython3",
|
"pygments_lexer": "ipython3",
|
||||||
"version": "3.9.1"
|
"version": "3.8.3"
|
||||||
}
|
},
|
||||||
|
"subtitle": "4.wyszukiwarki[ćwiczenia]",
|
||||||
|
"title": "Ekstrakcja informacji",
|
||||||
|
"year": "2021"
|
||||||
},
|
},
|
||||||
"nbformat": 4,
|
"nbformat": 4,
|
||||||
"nbformat_minor": 4
|
"nbformat_minor": 4
|
||||||
|
@ -1,5 +1,19 @@
|
|||||||
{
|
{
|
||||||
"cells": [
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"![Logo 1](https://git.wmi.amu.edu.pl/AITech/Szablon/raw/branch/master/Logotyp_AITech1.jpg)\n",
|
||||||
|
"<div class=\"alert alert-block alert-info\">\n",
|
||||||
|
"<h1> Ekstrakcja informacji </h1>\n",
|
||||||
|
"<h2> 5. <i>Ekstrakcja informacji z dokumentów</i> [ćwiczenia]</h2> \n",
|
||||||
|
"<h3> Jakub Pokrywka (2021)</h3>\n",
|
||||||
|
"</div>\n",
|
||||||
|
"\n",
|
||||||
|
"![Logo 2](https://git.wmi.amu.edu.pl/AITech/Szablon/raw/branch/master/Logotyp_AITech2.jpg)"
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
@ -213,11 +227,14 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
|
"author": "Jakub Pokrywka",
|
||||||
|
"email": "kubapok@wmi.amu.edu.pl",
|
||||||
"kernelspec": {
|
"kernelspec": {
|
||||||
"display_name": "Python 3",
|
"display_name": "Python 3",
|
||||||
"language": "python",
|
"language": "python",
|
||||||
"name": "python3"
|
"name": "python3"
|
||||||
},
|
},
|
||||||
|
"lang": "pl",
|
||||||
"language_info": {
|
"language_info": {
|
||||||
"codemirror_mode": {
|
"codemirror_mode": {
|
||||||
"name": "ipython",
|
"name": "ipython",
|
||||||
@ -229,7 +246,10 @@
|
|||||||
"nbconvert_exporter": "python",
|
"nbconvert_exporter": "python",
|
||||||
"pygments_lexer": "ipython3",
|
"pygments_lexer": "ipython3",
|
||||||
"version": "3.8.3"
|
"version": "3.8.3"
|
||||||
}
|
},
|
||||||
|
"subtitle": "5.ekEtrakcja informacji z dokumentCCow[ćwiczenia]",
|
||||||
|
"title": "Ekstrakcja informacji",
|
||||||
|
"year": "2021"
|
||||||
},
|
},
|
||||||
"nbformat": 4,
|
"nbformat": 4,
|
||||||
"nbformat_minor": 4
|
"nbformat_minor": 4
|
||||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@ -1,5 +1,19 @@
|
|||||||
{
|
{
|
||||||
"cells": [
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"![Logo 1](https://git.wmi.amu.edu.pl/AITech/Szablon/raw/branch/master/Logotyp_AITech1.jpg)\n",
|
||||||
|
"<div class=\"alert alert-block alert-info\">\n",
|
||||||
|
"<h1> Ekstrakcja informacji </h1>\n",
|
||||||
|
"<h2> 7. <i>Regresja liniowa</i> [ćwiczenia]</h2> \n",
|
||||||
|
"<h3> Jakub Pokrywka (2021)</h3>\n",
|
||||||
|
"</div>\n",
|
||||||
|
"\n",
|
||||||
|
"![Logo 2](https://git.wmi.amu.edu.pl/AITech/Szablon/raw/branch/master/Logotyp_AITech2.jpg)"
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
@ -1046,11 +1060,14 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
|
"author": "Jakub Pokrywka",
|
||||||
|
"email": "kubapok@wmi.amu.edu.pl",
|
||||||
"kernelspec": {
|
"kernelspec": {
|
||||||
"display_name": "Python 3",
|
"display_name": "Python 3",
|
||||||
"language": "python",
|
"language": "python",
|
||||||
"name": "python3"
|
"name": "python3"
|
||||||
},
|
},
|
||||||
|
"lang": "pl",
|
||||||
"language_info": {
|
"language_info": {
|
||||||
"codemirror_mode": {
|
"codemirror_mode": {
|
||||||
"name": "ipython",
|
"name": "ipython",
|
||||||
@ -1061,8 +1078,11 @@
|
|||||||
"name": "python",
|
"name": "python",
|
||||||
"nbconvert_exporter": "python",
|
"nbconvert_exporter": "python",
|
||||||
"pygments_lexer": "ipython3",
|
"pygments_lexer": "ipython3",
|
||||||
"version": "3.8.5"
|
"version": "3.8.3"
|
||||||
}
|
},
|
||||||
|
"subtitle": "7.Regresja liniowa[ćwiczenia]",
|
||||||
|
"title": "Ekstrakcja informacji",
|
||||||
|
"year": "2021"
|
||||||
},
|
},
|
||||||
"nbformat": 4,
|
"nbformat": 4,
|
||||||
"nbformat_minor": 4
|
"nbformat_minor": 4
|
||||||
|
@ -1,5 +1,19 @@
|
|||||||
{
|
{
|
||||||
"cells": [
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"![Logo 1](https://git.wmi.amu.edu.pl/AITech/Szablon/raw/branch/master/Logotyp_AITech1.jpg)\n",
|
||||||
|
"<div class=\"alert alert-block alert-info\">\n",
|
||||||
|
"<h1> Ekstrakcja informacji </h1>\n",
|
||||||
|
"<h2> 7. <i>Regresja liniowa</i> [ćwiczenia]</h2> \n",
|
||||||
|
"<h3> Jakub Pokrywka (2021)</h3>\n",
|
||||||
|
"</div>\n",
|
||||||
|
"\n",
|
||||||
|
"![Logo 2](https://git.wmi.amu.edu.pl/AITech/Szablon/raw/branch/master/Logotyp_AITech2.jpg)"
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
@ -1354,11 +1368,14 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
|
"author": "Jakub Pokrywka",
|
||||||
|
"email": "kubapok@wmi.amu.edu.pl",
|
||||||
"kernelspec": {
|
"kernelspec": {
|
||||||
"display_name": "Python 3",
|
"display_name": "Python 3",
|
||||||
"language": "python",
|
"language": "python",
|
||||||
"name": "python3"
|
"name": "python3"
|
||||||
},
|
},
|
||||||
|
"lang": "pl",
|
||||||
"language_info": {
|
"language_info": {
|
||||||
"codemirror_mode": {
|
"codemirror_mode": {
|
||||||
"name": "ipython",
|
"name": "ipython",
|
||||||
@ -1369,8 +1386,11 @@
|
|||||||
"name": "python",
|
"name": "python",
|
||||||
"nbconvert_exporter": "python",
|
"nbconvert_exporter": "python",
|
||||||
"pygments_lexer": "ipython3",
|
"pygments_lexer": "ipython3",
|
||||||
"version": "3.8.5"
|
"version": "3.8.3"
|
||||||
}
|
},
|
||||||
|
"subtitle": "7.Regresja liniowa[ćwiczenia]",
|
||||||
|
"title": "Ekstrakcja informacji",
|
||||||
|
"year": "2021"
|
||||||
},
|
},
|
||||||
"nbformat": 4,
|
"nbformat": 4,
|
||||||
"nbformat_minor": 4
|
"nbformat_minor": 4
|
||||||
|
@ -1,5 +1,19 @@
|
|||||||
{
|
{
|
||||||
"cells": [
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"![Logo 1](https://git.wmi.amu.edu.pl/AITech/Szablon/raw/branch/master/Logotyp_AITech1.jpg)\n",
|
||||||
|
"<div class=\"alert alert-block alert-info\">\n",
|
||||||
|
"<h1> Ekstrakcja informacji </h1>\n",
|
||||||
|
"<h2> 8. <i>Regresja logistyczna</i> [ćwiczenia]</h2> \n",
|
||||||
|
"<h3> Jakub Pokrywka (2021)</h3>\n",
|
||||||
|
"</div>\n",
|
||||||
|
"\n",
|
||||||
|
"![Logo 2](https://git.wmi.amu.edu.pl/AITech/Szablon/raw/branch/master/Logotyp_AITech2.jpg)"
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
@ -1024,11 +1038,14 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
|
"author": "Jakub Pokrywka",
|
||||||
|
"email": "kubapok@wmi.amu.edu.pl",
|
||||||
"kernelspec": {
|
"kernelspec": {
|
||||||
"display_name": "Python 3",
|
"display_name": "Python 3",
|
||||||
"language": "python",
|
"language": "python",
|
||||||
"name": "python3"
|
"name": "python3"
|
||||||
},
|
},
|
||||||
|
"lang": "pl",
|
||||||
"language_info": {
|
"language_info": {
|
||||||
"codemirror_mode": {
|
"codemirror_mode": {
|
||||||
"name": "ipython",
|
"name": "ipython",
|
||||||
@ -1039,8 +1056,11 @@
|
|||||||
"name": "python",
|
"name": "python",
|
||||||
"nbconvert_exporter": "python",
|
"nbconvert_exporter": "python",
|
||||||
"pygments_lexer": "ipython3",
|
"pygments_lexer": "ipython3",
|
||||||
"version": "3.8.5"
|
"version": "3.8.3"
|
||||||
}
|
},
|
||||||
|
"subtitle": "8.Regresja logistyczna[ćwiczenia]",
|
||||||
|
"title": "Ekstrakcja informacji",
|
||||||
|
"year": "2021"
|
||||||
},
|
},
|
||||||
"nbformat": 4,
|
"nbformat": 4,
|
||||||
"nbformat_minor": 4
|
"nbformat_minor": 4
|
||||||
|
@ -1,5 +1,19 @@
|
|||||||
{
|
{
|
||||||
"cells": [
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"![Logo 1](https://git.wmi.amu.edu.pl/AITech/Szablon/raw/branch/master/Logotyp_AITech1.jpg)\n",
|
||||||
|
"<div class=\"alert alert-block alert-info\">\n",
|
||||||
|
"<h1> Ekstrakcja informacji </h1>\n",
|
||||||
|
"<h2> 8. <i>Regresja logistyczna</i> [ćwiczenia]</h2> \n",
|
||||||
|
"<h3> Jakub Pokrywka (2021)</h3>\n",
|
||||||
|
"</div>\n",
|
||||||
|
"\n",
|
||||||
|
"![Logo 2](https://git.wmi.amu.edu.pl/AITech/Szablon/raw/branch/master/Logotyp_AITech2.jpg)"
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
@ -1216,11 +1230,14 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
|
"author": "Jakub Pokrywka",
|
||||||
|
"email": "kubapok@wmi.amu.edu.pl",
|
||||||
"kernelspec": {
|
"kernelspec": {
|
||||||
"display_name": "Python 3",
|
"display_name": "Python 3",
|
||||||
"language": "python",
|
"language": "python",
|
||||||
"name": "python3"
|
"name": "python3"
|
||||||
},
|
},
|
||||||
|
"lang": "pl",
|
||||||
"language_info": {
|
"language_info": {
|
||||||
"codemirror_mode": {
|
"codemirror_mode": {
|
||||||
"name": "ipython",
|
"name": "ipython",
|
||||||
@ -1231,8 +1248,11 @@
|
|||||||
"name": "python",
|
"name": "python",
|
||||||
"nbconvert_exporter": "python",
|
"nbconvert_exporter": "python",
|
||||||
"pygments_lexer": "ipython3",
|
"pygments_lexer": "ipython3",
|
||||||
"version": "3.8.5"
|
"version": "3.8.3"
|
||||||
}
|
},
|
||||||
|
"subtitle": "8.Regresja logistyczna[ćwiczenia]",
|
||||||
|
"title": "Ekstrakcja informacji",
|
||||||
|
"year": "2021"
|
||||||
},
|
},
|
||||||
"nbformat": 4,
|
"nbformat": 4,
|
||||||
"nbformat_minor": 4
|
"nbformat_minor": 4
|
||||||
|
@ -1,5 +1,19 @@
|
|||||||
{
|
{
|
||||||
"cells": [
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"![Logo 1](https://git.wmi.amu.edu.pl/AITech/Szablon/raw/branch/master/Logotyp_AITech1.jpg)\n",
|
||||||
|
"<div class=\"alert alert-block alert-info\">\n",
|
||||||
|
"<h1> Ekstrakcja informacji </h1>\n",
|
||||||
|
"<h2> 9. <i>Sequence labeling</i> [ćwiczenia]</h2> \n",
|
||||||
|
"<h3> Jakub Pokrywka (2021)</h3>\n",
|
||||||
|
"</div>\n",
|
||||||
|
"\n",
|
||||||
|
"![Logo 2](https://git.wmi.amu.edu.pl/AITech/Szablon/raw/branch/master/Logotyp_AITech2.jpg)"
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
@ -12357,11 +12371,14 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
|
"author": "Jakub Pokrywka",
|
||||||
|
"email": "kubapok@wmi.amu.edu.pl",
|
||||||
"kernelspec": {
|
"kernelspec": {
|
||||||
"display_name": "Python 3",
|
"display_name": "Python 3",
|
||||||
"language": "python",
|
"language": "python",
|
||||||
"name": "python3"
|
"name": "python3"
|
||||||
},
|
},
|
||||||
|
"lang": "pl",
|
||||||
"language_info": {
|
"language_info": {
|
||||||
"codemirror_mode": {
|
"codemirror_mode": {
|
||||||
"name": "ipython",
|
"name": "ipython",
|
||||||
@ -12372,8 +12389,11 @@
|
|||||||
"name": "python",
|
"name": "python",
|
||||||
"nbconvert_exporter": "python",
|
"nbconvert_exporter": "python",
|
||||||
"pygments_lexer": "ipython3",
|
"pygments_lexer": "ipython3",
|
||||||
"version": "3.8.5"
|
"version": "3.8.3"
|
||||||
}
|
},
|
||||||
|
"subtitle": "9.Sequence labeling[ćwiczenia]",
|
||||||
|
"title": "Ekstrakcja informacji",
|
||||||
|
"year": "2021"
|
||||||
},
|
},
|
||||||
"nbformat": 4,
|
"nbformat": 4,
|
||||||
"nbformat_minor": 4
|
"nbformat_minor": 4
|
||||||
|
@ -1,5 +1,19 @@
|
|||||||
{
|
{
|
||||||
"cells": [
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"![Logo 1](https://git.wmi.amu.edu.pl/AITech/Szablon/raw/branch/master/Logotyp_AITech1.jpg)\n",
|
||||||
|
"<div class=\"alert alert-block alert-info\">\n",
|
||||||
|
"<h1> Ekstrakcja informacji </h1>\n",
|
||||||
|
"<h2> 9. <i>Sequence labeling</i> [ćwiczenia]</h2> \n",
|
||||||
|
"<h3> Jakub Pokrywka (2021)</h3>\n",
|
||||||
|
"</div>\n",
|
||||||
|
"\n",
|
||||||
|
"![Logo 2](https://git.wmi.amu.edu.pl/AITech/Szablon/raw/branch/master/Logotyp_AITech2.jpg)"
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
@ -908,11 +922,14 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
|
"author": "Jakub Pokrywka",
|
||||||
|
"email": "kubapok@wmi.amu.edu.pl",
|
||||||
"kernelspec": {
|
"kernelspec": {
|
||||||
"display_name": "Python 3",
|
"display_name": "Python 3",
|
||||||
"language": "python",
|
"language": "python",
|
||||||
"name": "python3"
|
"name": "python3"
|
||||||
},
|
},
|
||||||
|
"lang": "pl",
|
||||||
"language_info": {
|
"language_info": {
|
||||||
"codemirror_mode": {
|
"codemirror_mode": {
|
||||||
"name": "ipython",
|
"name": "ipython",
|
||||||
@ -923,8 +940,11 @@
|
|||||||
"name": "python",
|
"name": "python",
|
||||||
"nbconvert_exporter": "python",
|
"nbconvert_exporter": "python",
|
||||||
"pygments_lexer": "ipython3",
|
"pygments_lexer": "ipython3",
|
||||||
"version": "3.8.5"
|
"version": "3.8.3"
|
||||||
}
|
},
|
||||||
|
"subtitle": "9.Sequence labeling[ćwiczenia]",
|
||||||
|
"title": "Ekstrakcja informacji",
|
||||||
|
"year": "2021"
|
||||||
},
|
},
|
||||||
"nbformat": 4,
|
"nbformat": 4,
|
||||||
"nbformat_minor": 4
|
"nbformat_minor": 4
|
||||||
|
@ -1,5 +1,19 @@
|
|||||||
{
|
{
|
||||||
"cells": [
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"![Logo 1](https://git.wmi.amu.edu.pl/AITech/Szablon/raw/branch/master/Logotyp_AITech1.jpg)\n",
|
||||||
|
"<div class=\"alert alert-block alert-info\">\n",
|
||||||
|
"<h1> Ekstrakcja informacji </h1>\n",
|
||||||
|
"<h2> 10. <i>CRF</i> [ćwiczenia]</h2> \n",
|
||||||
|
"<h3> Jakub Pokrywka (2021)</h3>\n",
|
||||||
|
"</div>\n",
|
||||||
|
"\n",
|
||||||
|
"![Logo 2](https://git.wmi.amu.edu.pl/AITech/Szablon/raw/branch/master/Logotyp_AITech2.jpg)"
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
@ -404,11 +418,14 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
|
"author": "Jakub Pokrywka",
|
||||||
|
"email": "kubapok@wmi.amu.edu.pl",
|
||||||
"kernelspec": {
|
"kernelspec": {
|
||||||
"display_name": "Python 3",
|
"display_name": "Python 3",
|
||||||
"language": "python",
|
"language": "python",
|
||||||
"name": "python3"
|
"name": "python3"
|
||||||
},
|
},
|
||||||
|
"lang": "pl",
|
||||||
"language_info": {
|
"language_info": {
|
||||||
"codemirror_mode": {
|
"codemirror_mode": {
|
||||||
"name": "ipython",
|
"name": "ipython",
|
||||||
@ -419,8 +436,11 @@
|
|||||||
"name": "python",
|
"name": "python",
|
||||||
"nbconvert_exporter": "python",
|
"nbconvert_exporter": "python",
|
||||||
"pygments_lexer": "ipython3",
|
"pygments_lexer": "ipython3",
|
||||||
"version": "3.8.5"
|
"version": "3.8.3"
|
||||||
}
|
},
|
||||||
|
"subtitle": "10.CRF[ćwiczenia]",
|
||||||
|
"title": "Ekstrakcja informacji",
|
||||||
|
"year": "2021"
|
||||||
},
|
},
|
||||||
"nbformat": 4,
|
"nbformat": 4,
|
||||||
"nbformat_minor": 4
|
"nbformat_minor": 4
|
||||||
|
@ -1,5 +1,19 @@
|
|||||||
{
|
{
|
||||||
"cells": [
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"![Logo 1](https://git.wmi.amu.edu.pl/AITech/Szablon/raw/branch/master/Logotyp_AITech1.jpg)\n",
|
||||||
|
"<div class=\"alert alert-block alert-info\">\n",
|
||||||
|
"<h1> Ekstrakcja informacji </h1>\n",
|
||||||
|
"<h2> 11. <i>NER RNN</i> [ćwiczenia]</h2> \n",
|
||||||
|
"<h3> Jakub Pokrywka (2021)</h3>\n",
|
||||||
|
"</div>\n",
|
||||||
|
"\n",
|
||||||
|
"![Logo 2](https://git.wmi.amu.edu.pl/AITech/Szablon/raw/branch/master/Logotyp_AITech2.jpg)"
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
@ -805,11 +819,14 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
|
"author": "Jakub Pokrywka",
|
||||||
|
"email": "kubapok@wmi.amu.edu.pl",
|
||||||
"kernelspec": {
|
"kernelspec": {
|
||||||
"display_name": "Python 3",
|
"display_name": "Python 3",
|
||||||
"language": "python",
|
"language": "python",
|
||||||
"name": "python3"
|
"name": "python3"
|
||||||
},
|
},
|
||||||
|
"lang": "pl",
|
||||||
"language_info": {
|
"language_info": {
|
||||||
"codemirror_mode": {
|
"codemirror_mode": {
|
||||||
"name": "ipython",
|
"name": "ipython",
|
||||||
@ -820,8 +837,11 @@
|
|||||||
"name": "python",
|
"name": "python",
|
||||||
"nbconvert_exporter": "python",
|
"nbconvert_exporter": "python",
|
||||||
"pygments_lexer": "ipython3",
|
"pygments_lexer": "ipython3",
|
||||||
"version": "3.8.5"
|
"version": "3.8.3"
|
||||||
}
|
},
|
||||||
|
"subtitle": "11.NER RNN[ćwiczenia]",
|
||||||
|
"title": "Ekstrakcja informacji",
|
||||||
|
"year": "2021"
|
||||||
},
|
},
|
||||||
"nbformat": 4,
|
"nbformat": 4,
|
||||||
"nbformat_minor": 4
|
"nbformat_minor": 4
|
||||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@ -1,271 +1,293 @@
|
|||||||
{
|
{
|
||||||
"cells": [
|
"cells": [
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {
|
||||||
"source": [
|
"collapsed": false
|
||||||
"### SIMILARITY SEARCH\n",
|
},
|
||||||
"1. zainstaluj faiss i zrób tutorial: https://github.com/facebookresearch/faiss\n",
|
"source": [
|
||||||
"2. wczytaj treści artykułów z BBC News Train.csv\n",
|
"![Logo 1](https://git.wmi.amu.edu.pl/AITech/Szablon/raw/branch/master/Logotyp_AITech1.jpg)\n",
|
||||||
"3. Użyj któregoś z transformerów (możesz użyć biblioteki sentence-transformers) do stworzenia embeddingów dokumentów\n",
|
"<div class=\"alert alert-block alert-info\">\n",
|
||||||
"4. wczytaj embeddingi do bazy danych faiss\n",
|
"<h1> Ekstrakcja informacji </h1>\n",
|
||||||
"5. wyszukaj query 'consumer electronics market'"
|
"<h2> 14. <i>Ekstrakcja informacji seq2seq</i> [\u0107wiczenia]</h2> \n",
|
||||||
]
|
"<h3> Jakub Pokrywka (2021)</h3>\n",
|
||||||
},
|
"</div>\n",
|
||||||
{
|
"\n",
|
||||||
"cell_type": "markdown",
|
"![Logo 2](https://git.wmi.amu.edu.pl/AITech/Szablon/raw/branch/master/Logotyp_AITech2.jpg)"
|
||||||
"metadata": {},
|
]
|
||||||
"source": [
|
},
|
||||||
"https://www.kaggle.com/avishi/bbc-news-train-data"
|
{
|
||||||
]
|
"cell_type": "markdown",
|
||||||
},
|
"metadata": {},
|
||||||
{
|
"source": [
|
||||||
"cell_type": "code",
|
"### SIMILARITY SEARCH\n",
|
||||||
"execution_count": 25,
|
"1. zainstaluj faiss i zr\u00f3b tutorial: https://github.com/facebookresearch/faiss\n",
|
||||||
"metadata": {},
|
"2. wczytaj tre\u015bci artyku\u0142\u00f3w z BBC News Train.csv\n",
|
||||||
"outputs": [],
|
"3. U\u017cyj kt\u00f3rego\u015b z transformer\u00f3w (mo\u017cesz u\u017cy\u0107 biblioteki sentence-transformers) do stworzenia embedding\u00f3w dokument\u00f3w\n",
|
||||||
"source": [
|
"4. wczytaj embeddingi do bazy danych faiss\n",
|
||||||
"import pandas as pd\n",
|
"5. wyszukaj query 'consumer electronics market'"
|
||||||
"import pickle\n",
|
]
|
||||||
"import numpy as np\n",
|
},
|
||||||
"import faiss\n",
|
{
|
||||||
"from sklearn.metrics import ndcg_score, dcg_score, average_precision_score"
|
"cell_type": "markdown",
|
||||||
]
|
"metadata": {},
|
||||||
},
|
"source": [
|
||||||
{
|
"https://www.kaggle.com/avishi/bbc-news-train-data"
|
||||||
"cell_type": "code",
|
]
|
||||||
"execution_count": 26,
|
},
|
||||||
"metadata": {
|
{
|
||||||
"scrolled": true
|
"cell_type": "code",
|
||||||
},
|
"execution_count": 25,
|
||||||
"outputs": [
|
"metadata": {},
|
||||||
{
|
"outputs": [],
|
||||||
"name": "stdout",
|
"source": [
|
||||||
"output_type": "stream",
|
"import pandas as pd\n",
|
||||||
"text": [
|
"import pickle\n",
|
||||||
"Requirement already satisfied: sentence-transformers in /media/kuba/ssdsam/anaconda3/lib/python3.8/site-packages (1.2.0)\n",
|
"import numpy as np\n",
|
||||||
"Requirement already satisfied: sentencepiece in /media/kuba/ssdsam/anaconda3/lib/python3.8/site-packages (from sentence-transformers) (0.1.91)\n",
|
"import faiss\n",
|
||||||
"Requirement already satisfied: torchvision in /media/kuba/ssdsam/anaconda3/lib/python3.8/site-packages (from sentence-transformers) (0.6.0)\n",
|
"from sklearn.metrics import ndcg_score, dcg_score, average_precision_score"
|
||||||
"Requirement already satisfied: scipy in /media/kuba/ssdsam/anaconda3/lib/python3.8/site-packages (from sentence-transformers) (1.4.1)\n",
|
]
|
||||||
"Requirement already satisfied: torch>=1.6.0 in /media/kuba/ssdsam/anaconda3/lib/python3.8/site-packages (from sentence-transformers) (1.8.1)\n",
|
},
|
||||||
"Requirement already satisfied: tqdm in /media/kuba/ssdsam/anaconda3/lib/python3.8/site-packages (from sentence-transformers) (4.48.2)\n",
|
{
|
||||||
"Requirement already satisfied: scikit-learn in /media/kuba/ssdsam/anaconda3/lib/python3.8/site-packages (from sentence-transformers) (0.23.2)\n",
|
"cell_type": "code",
|
||||||
"Requirement already satisfied: nltk in /media/kuba/ssdsam/anaconda3/lib/python3.8/site-packages (from sentence-transformers) (3.5)\n",
|
"execution_count": 26,
|
||||||
"Requirement already satisfied: transformers<5.0.0,>=3.1.0 in /media/kuba/ssdsam/anaconda3/lib/python3.8/site-packages (from sentence-transformers) (4.4.2)\n",
|
"metadata": {
|
||||||
"Requirement already satisfied: numpy in /media/kuba/ssdsam/anaconda3/lib/python3.8/site-packages (from sentence-transformers) (1.20.3)\n",
|
"scrolled": true
|
||||||
"Requirement already satisfied: pillow>=4.1.1 in /media/kuba/ssdsam/anaconda3/lib/python3.8/site-packages (from torchvision->sentence-transformers) (8.0.1)\n",
|
},
|
||||||
"Requirement already satisfied: typing-extensions in /media/kuba/ssdsam/anaconda3/lib/python3.8/site-packages (from torch>=1.6.0->sentence-transformers) (3.7.4.3)\n",
|
"outputs": [
|
||||||
"Requirement already satisfied: joblib>=0.11 in /media/kuba/ssdsam/anaconda3/lib/python3.8/site-packages (from scikit-learn->sentence-transformers) (0.16.0)\n",
|
{
|
||||||
"Requirement already satisfied: threadpoolctl>=2.0.0 in /media/kuba/ssdsam/anaconda3/lib/python3.8/site-packages (from scikit-learn->sentence-transformers) (2.1.0)\n",
|
"name": "stdout",
|
||||||
"Requirement already satisfied: click in /media/kuba/ssdsam/anaconda3/lib/python3.8/site-packages (from nltk->sentence-transformers) (7.1.2)\n",
|
"output_type": "stream",
|
||||||
"Requirement already satisfied: regex in /media/kuba/ssdsam/anaconda3/lib/python3.8/site-packages (from nltk->sentence-transformers) (2020.7.14)\n",
|
"text": [
|
||||||
"Requirement already satisfied: sacremoses in /media/kuba/ssdsam/anaconda3/lib/python3.8/site-packages (from transformers<5.0.0,>=3.1.0->sentence-transformers) (0.0.43)\n",
|
"Requirement already satisfied: sentence-transformers in /media/kuba/ssdsam/anaconda3/lib/python3.8/site-packages (1.2.0)\n",
|
||||||
"Requirement already satisfied: packaging in /media/kuba/ssdsam/anaconda3/lib/python3.8/site-packages (from transformers<5.0.0,>=3.1.0->sentence-transformers) (20.4)\n",
|
"Requirement already satisfied: sentencepiece in /media/kuba/ssdsam/anaconda3/lib/python3.8/site-packages (from sentence-transformers) (0.1.91)\n",
|
||||||
"Requirement already satisfied: filelock in /media/kuba/ssdsam/anaconda3/lib/python3.8/site-packages (from transformers<5.0.0,>=3.1.0->sentence-transformers) (3.0.12)\n",
|
"Requirement already satisfied: torchvision in /media/kuba/ssdsam/anaconda3/lib/python3.8/site-packages (from sentence-transformers) (0.6.0)\n",
|
||||||
"Requirement already satisfied: tokenizers<0.11,>=0.10.1 in /media/kuba/ssdsam/anaconda3/lib/python3.8/site-packages (from transformers<5.0.0,>=3.1.0->sentence-transformers) (0.10.1)\n",
|
"Requirement already satisfied: scipy in /media/kuba/ssdsam/anaconda3/lib/python3.8/site-packages (from sentence-transformers) (1.4.1)\n",
|
||||||
"Requirement already satisfied: requests in /media/kuba/ssdsam/anaconda3/lib/python3.8/site-packages (from transformers<5.0.0,>=3.1.0->sentence-transformers) (2.24.0)\n",
|
"Requirement already satisfied: torch>=1.6.0 in /media/kuba/ssdsam/anaconda3/lib/python3.8/site-packages (from sentence-transformers) (1.8.1)\n",
|
||||||
"Requirement already satisfied: six in /media/kuba/ssdsam/anaconda3/lib/python3.8/site-packages (from sacremoses->transformers<5.0.0,>=3.1.0->sentence-transformers) (1.15.0)\n",
|
"Requirement already satisfied: tqdm in /media/kuba/ssdsam/anaconda3/lib/python3.8/site-packages (from sentence-transformers) (4.48.2)\n",
|
||||||
"Requirement already satisfied: pyparsing>=2.0.2 in /media/kuba/ssdsam/anaconda3/lib/python3.8/site-packages (from packaging->transformers<5.0.0,>=3.1.0->sentence-transformers) (2.4.7)\n",
|
"Requirement already satisfied: scikit-learn in /media/kuba/ssdsam/anaconda3/lib/python3.8/site-packages (from sentence-transformers) (0.23.2)\n",
|
||||||
"Requirement already satisfied: certifi>=2017.4.17 in /media/kuba/ssdsam/anaconda3/lib/python3.8/site-packages (from requests->transformers<5.0.0,>=3.1.0->sentence-transformers) (2020.6.20)\n",
|
"Requirement already satisfied: nltk in /media/kuba/ssdsam/anaconda3/lib/python3.8/site-packages (from sentence-transformers) (3.5)\n",
|
||||||
"Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /media/kuba/ssdsam/anaconda3/lib/python3.8/site-packages (from requests->transformers<5.0.0,>=3.1.0->sentence-transformers) (1.25.10)\n",
|
"Requirement already satisfied: transformers<5.0.0,>=3.1.0 in /media/kuba/ssdsam/anaconda3/lib/python3.8/site-packages (from sentence-transformers) (4.4.2)\n",
|
||||||
"Requirement already satisfied: idna<3,>=2.5 in /media/kuba/ssdsam/anaconda3/lib/python3.8/site-packages (from requests->transformers<5.0.0,>=3.1.0->sentence-transformers) (2.10)\n",
|
"Requirement already satisfied: numpy in /media/kuba/ssdsam/anaconda3/lib/python3.8/site-packages (from sentence-transformers) (1.20.3)\n",
|
||||||
"Requirement already satisfied: chardet<4,>=3.0.2 in /media/kuba/ssdsam/anaconda3/lib/python3.8/site-packages (from requests->transformers<5.0.0,>=3.1.0->sentence-transformers) (3.0.4)\n"
|
"Requirement already satisfied: pillow>=4.1.1 in /media/kuba/ssdsam/anaconda3/lib/python3.8/site-packages (from torchvision->sentence-transformers) (8.0.1)\n",
|
||||||
]
|
"Requirement already satisfied: typing-extensions in /media/kuba/ssdsam/anaconda3/lib/python3.8/site-packages (from torch>=1.6.0->sentence-transformers) (3.7.4.3)\n",
|
||||||
}
|
"Requirement already satisfied: joblib>=0.11 in /media/kuba/ssdsam/anaconda3/lib/python3.8/site-packages (from scikit-learn->sentence-transformers) (0.16.0)\n",
|
||||||
],
|
"Requirement already satisfied: threadpoolctl>=2.0.0 in /media/kuba/ssdsam/anaconda3/lib/python3.8/site-packages (from scikit-learn->sentence-transformers) (2.1.0)\n",
|
||||||
"source": [
|
"Requirement already satisfied: click in /media/kuba/ssdsam/anaconda3/lib/python3.8/site-packages (from nltk->sentence-transformers) (7.1.2)\n",
|
||||||
"!pip install sentence-transformers"
|
"Requirement already satisfied: regex in /media/kuba/ssdsam/anaconda3/lib/python3.8/site-packages (from nltk->sentence-transformers) (2020.7.14)\n",
|
||||||
]
|
"Requirement already satisfied: sacremoses in /media/kuba/ssdsam/anaconda3/lib/python3.8/site-packages (from transformers<5.0.0,>=3.1.0->sentence-transformers) (0.0.43)\n",
|
||||||
},
|
"Requirement already satisfied: packaging in /media/kuba/ssdsam/anaconda3/lib/python3.8/site-packages (from transformers<5.0.0,>=3.1.0->sentence-transformers) (20.4)\n",
|
||||||
{
|
"Requirement already satisfied: filelock in /media/kuba/ssdsam/anaconda3/lib/python3.8/site-packages (from transformers<5.0.0,>=3.1.0->sentence-transformers) (3.0.12)\n",
|
||||||
"cell_type": "code",
|
"Requirement already satisfied: tokenizers<0.11,>=0.10.1 in /media/kuba/ssdsam/anaconda3/lib/python3.8/site-packages (from transformers<5.0.0,>=3.1.0->sentence-transformers) (0.10.1)\n",
|
||||||
"execution_count": 27,
|
"Requirement already satisfied: requests in /media/kuba/ssdsam/anaconda3/lib/python3.8/site-packages (from transformers<5.0.0,>=3.1.0->sentence-transformers) (2.24.0)\n",
|
||||||
"metadata": {},
|
"Requirement already satisfied: six in /media/kuba/ssdsam/anaconda3/lib/python3.8/site-packages (from sacremoses->transformers<5.0.0,>=3.1.0->sentence-transformers) (1.15.0)\n",
|
||||||
"outputs": [
|
"Requirement already satisfied: pyparsing>=2.0.2 in /media/kuba/ssdsam/anaconda3/lib/python3.8/site-packages (from packaging->transformers<5.0.0,>=3.1.0->sentence-transformers) (2.4.7)\n",
|
||||||
{
|
"Requirement already satisfied: certifi>=2017.4.17 in /media/kuba/ssdsam/anaconda3/lib/python3.8/site-packages (from requests->transformers<5.0.0,>=3.1.0->sentence-transformers) (2020.6.20)\n",
|
||||||
"name": "stdout",
|
"Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /media/kuba/ssdsam/anaconda3/lib/python3.8/site-packages (from requests->transformers<5.0.0,>=3.1.0->sentence-transformers) (1.25.10)\n",
|
||||||
"output_type": "stream",
|
"Requirement already satisfied: idna<3,>=2.5 in /media/kuba/ssdsam/anaconda3/lib/python3.8/site-packages (from requests->transformers<5.0.0,>=3.1.0->sentence-transformers) (2.10)\n",
|
||||||
"text": [
|
"Requirement already satisfied: chardet<4,>=3.0.2 in /media/kuba/ssdsam/anaconda3/lib/python3.8/site-packages (from requests->transformers<5.0.0,>=3.1.0->sentence-transformers) (3.0.4)\n"
|
||||||
"[[-0.07142266 -0.07716199 -0.03047761 ... 0.01356028 -0.04016104\n",
|
]
|
||||||
" -0.02446149]\n",
|
}
|
||||||
" [-0.06508802 -0.06923407 -0.03735013 ... 0.01013562 -0.04027328\n",
|
],
|
||||||
" -0.02171571]]\n"
|
"source": [
|
||||||
]
|
"!pip install sentence-transformers"
|
||||||
}
|
]
|
||||||
],
|
},
|
||||||
"source": [
|
{
|
||||||
"from sentence_transformers import SentenceTransformer\n",
|
"cell_type": "code",
|
||||||
"sentences = [\"Hello World\", \"Hallo Welt\"]\n",
|
"execution_count": 27,
|
||||||
"\n",
|
"metadata": {},
|
||||||
"model = SentenceTransformer('LaBSE')\n",
|
"outputs": [
|
||||||
"embeddings = model.encode(sentences)\n",
|
{
|
||||||
"print(embeddings)"
|
"name": "stdout",
|
||||||
]
|
"output_type": "stream",
|
||||||
},
|
"text": [
|
||||||
{
|
"[[-0.07142266 -0.07716199 -0.03047761 ... 0.01356028 -0.04016104\n",
|
||||||
"cell_type": "code",
|
" -0.02446149]\n",
|
||||||
"execution_count": 28,
|
" [-0.06508802 -0.06923407 -0.03735013 ... 0.01013562 -0.04027328\n",
|
||||||
"metadata": {
|
" -0.02171571]]\n"
|
||||||
"scrolled": true
|
]
|
||||||
},
|
}
|
||||||
"outputs": [],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"r = pd.read_csv('BBC News Train.csv')"
|
"from sentence_transformers import SentenceTransformer\n",
|
||||||
]
|
"sentences = [\"Hello World\", \"Hallo Welt\"]\n",
|
||||||
},
|
"\n",
|
||||||
{
|
"model = SentenceTransformer('LaBSE')\n",
|
||||||
"cell_type": "code",
|
"embeddings = model.encode(sentences)\n",
|
||||||
"execution_count": 29,
|
"print(embeddings)"
|
||||||
"metadata": {},
|
]
|
||||||
"outputs": [],
|
},
|
||||||
"source": [
|
{
|
||||||
"DOCUMENTS = list(r.Text)"
|
"cell_type": "code",
|
||||||
]
|
"execution_count": 28,
|
||||||
},
|
"metadata": {
|
||||||
{
|
"scrolled": true
|
||||||
"cell_type": "code",
|
},
|
||||||
"execution_count": 30,
|
"outputs": [],
|
||||||
"metadata": {},
|
"source": [
|
||||||
"outputs": [],
|
"r = pd.read_csv('BBC News Train.csv')"
|
||||||
"source": [
|
]
|
||||||
"embeddings = model.encode(DOCUMENTS)"
|
},
|
||||||
]
|
{
|
||||||
},
|
"cell_type": "code",
|
||||||
{
|
"execution_count": 29,
|
||||||
"cell_type": "code",
|
"metadata": {},
|
||||||
"execution_count": 31,
|
"outputs": [],
|
||||||
"metadata": {},
|
"source": [
|
||||||
"outputs": [],
|
"DOCUMENTS = list(r.Text)"
|
||||||
"source": [
|
]
|
||||||
"embeddings = model.encode(list(r.Text))"
|
},
|
||||||
]
|
{
|
||||||
},
|
"cell_type": "code",
|
||||||
{
|
"execution_count": 30,
|
||||||
"cell_type": "code",
|
"metadata": {},
|
||||||
"execution_count": 32,
|
"outputs": [],
|
||||||
"metadata": {},
|
"source": [
|
||||||
"outputs": [],
|
"embeddings = model.encode(DOCUMENTS)"
|
||||||
"source": [
|
]
|
||||||
"QUERY_STR = 'consumer electronics market'"
|
},
|
||||||
]
|
{
|
||||||
},
|
"cell_type": "code",
|
||||||
{
|
"execution_count": 31,
|
||||||
"cell_type": "code",
|
"metadata": {},
|
||||||
"execution_count": 33,
|
"outputs": [],
|
||||||
"metadata": {},
|
"source": [
|
||||||
"outputs": [],
|
"embeddings = model.encode(list(r.Text))"
|
||||||
"source": [
|
]
|
||||||
"query = model.encode([QUERY_STR])"
|
},
|
||||||
]
|
{
|
||||||
},
|
"cell_type": "code",
|
||||||
{
|
"execution_count": 32,
|
||||||
"cell_type": "code",
|
"metadata": {},
|
||||||
"execution_count": 34,
|
"outputs": [],
|
||||||
"metadata": {},
|
"source": [
|
||||||
"outputs": [],
|
"QUERY_STR = 'consumer electronics market'"
|
||||||
"source": [
|
]
|
||||||
"index = faiss.IndexFlatL2(embeddings.shape[1]) "
|
},
|
||||||
]
|
{
|
||||||
},
|
"cell_type": "code",
|
||||||
{
|
"execution_count": 33,
|
||||||
"cell_type": "code",
|
"metadata": {},
|
||||||
"execution_count": 35,
|
"outputs": [],
|
||||||
"metadata": {},
|
"source": [
|
||||||
"outputs": [],
|
"query = model.encode([QUERY_STR])"
|
||||||
"source": [
|
]
|
||||||
"index.add(np.ascontiguousarray(embeddings))"
|
},
|
||||||
]
|
{
|
||||||
},
|
"cell_type": "code",
|
||||||
{
|
"execution_count": 34,
|
||||||
"cell_type": "code",
|
"metadata": {},
|
||||||
"execution_count": 36,
|
"outputs": [],
|
||||||
"metadata": {},
|
"source": [
|
||||||
"outputs": [],
|
"index = faiss.IndexFlatL2(embeddings.shape[1]) "
|
||||||
"source": [
|
]
|
||||||
"D, I = index.search(query, 5) "
|
},
|
||||||
]
|
{
|
||||||
},
|
"cell_type": "code",
|
||||||
{
|
"execution_count": 35,
|
||||||
"cell_type": "code",
|
"metadata": {},
|
||||||
"execution_count": 37,
|
"outputs": [],
|
||||||
"metadata": {},
|
"source": [
|
||||||
"outputs": [
|
"index.add(np.ascontiguousarray(embeddings))"
|
||||||
{
|
]
|
||||||
"data": {
|
},
|
||||||
"text/plain": [
|
{
|
||||||
"array([[1363, 1371, 898, 744, 292]])"
|
"cell_type": "code",
|
||||||
]
|
"execution_count": 36,
|
||||||
},
|
"metadata": {},
|
||||||
"execution_count": 37,
|
"outputs": [],
|
||||||
"metadata": {},
|
"source": [
|
||||||
"output_type": "execute_result"
|
"D, I = index.search(query, 5) "
|
||||||
}
|
]
|
||||||
],
|
},
|
||||||
"source": [
|
{
|
||||||
"I"
|
"cell_type": "code",
|
||||||
]
|
"execution_count": 37,
|
||||||
},
|
"metadata": {},
|
||||||
{
|
"outputs": [
|
||||||
"cell_type": "code",
|
{
|
||||||
"execution_count": 38,
|
"data": {
|
||||||
"metadata": {},
|
"text/plain": [
|
||||||
"outputs": [
|
"array([[1363, 1371, 898, 744, 292]])"
|
||||||
{
|
]
|
||||||
"data": {
|
},
|
||||||
"text/plain": [
|
"execution_count": 37,
|
||||||
"array([[1.3110979, 1.4027181, 1.4045265, 1.4421673, 1.4421673]],\n",
|
"metadata": {},
|
||||||
" dtype=float32)"
|
"output_type": "execute_result"
|
||||||
]
|
}
|
||||||
},
|
],
|
||||||
"execution_count": 38,
|
"source": [
|
||||||
"metadata": {},
|
"I"
|
||||||
"output_type": "execute_result"
|
]
|
||||||
}
|
},
|
||||||
],
|
{
|
||||||
"source": [
|
"cell_type": "code",
|
||||||
"D"
|
"execution_count": 38,
|
||||||
]
|
"metadata": {},
|
||||||
},
|
"outputs": [
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"data": {
|
||||||
"execution_count": 39,
|
"text/plain": [
|
||||||
"metadata": {},
|
"array([[1.3110979, 1.4027181, 1.4045265, 1.4421673, 1.4421673]],\n",
|
||||||
"outputs": [
|
" dtype=float32)"
|
||||||
{
|
]
|
||||||
"data": {
|
},
|
||||||
"text/plain": [
|
"execution_count": 38,
|
||||||
"'internet boom for gift shopping cyberspace is becoming a very popular destination for christmas shoppers. forecasts predict that british people will spend £4bn buying gifts online during the festive season an increase of 64% on 2003. surveys also show that the average amount that people are spending is rising as is the range of goods that they are happy to buy online. savvy shoppers are also using the net to find the hot presents that are all but sold out in high street stores. almost half of the uk population now shop online according to figures collected by the interactive media in retail group which represents web retailers. about 85% of this group 18m people expect to do a lot of their christmas gift buying online this year reports the industry group. on average each shopper will spend £220 and britons lead europe in their affection for online shopping. almost a third of all the money spent online this christmas will come out of british wallets and purses compared to 29% from german shoppers and only 4% from italian gift buyers. james roper director of the imrg said shoppers were now much happier to buy so-called big ticket items such as lcd television sets and digital cameras. mr roper added that many retailers were working hard to reassure consumers that online shopping was safe and that goods ordered as presents would arrive in time for christmas. he advised consumers to give shops a little more time than usual to fulfil orders given that online buying is proving so popular. a survey by hostway suggests that many men prefer to shop online to avoid the embarrassment of buying some types of presents such as lingerie for wives and girlfriends. much of this online shopping is likely to be done during work time according to research carried out by security firm saint bernard software. the research reveals that up to two working days will be lost by staff who do their shopping via their work computer. worst offenders will be those in the 18-35 age bracket suggests the research who will spend up to five hours per week in december browsing and buying at online shops. iggy fanlo chief revenue officer at shopping.com said that the growing numbers of people using broadband was driving interest in online shopping. when you consider narrowband and broadband the conversion to sale is two times higher he said. higher speeds meant that everything happened much faster he said which let people spend time browsing and finding out about products before they buy. the behaviour of online shoppers was also changing he said. the single biggest reason people went online before this year was price he said. the number one reason now is convenience. very few consumers click on the lowest price he said. they are looking for good prices and merchant reliability. consumer comments and reviews were also proving popular with shoppers keen to find out who had the most reliable customer service. data collected by ebay suggests that some smart shoppers are getting round the shortages of hot presents by buying them direct through the auction site. according to ebay uk there are now more than 150 robosapiens remote control robots for sale via the site. the robosapiens toy is almost impossible to find in online and offline stores. similarly many shoppers are turning to ebay to help them get hold of the hard-to-find slimline playstation 2 which many retailers are only selling as part of an expensive bundle. the high demand for the playstation 2 has meant that prices for it are being driven up. in shops the ps2 is supposed to sell for £104.99. in some ebay uk auctions the price has risen to more than double this figure. many people are also using ebay to get hold of gadgets not even released in this country. the portable version of the playstation has only just gone on sale in japan yet some enterprising ebay users are selling the device to uk gadget fans.'"
|
"metadata": {},
|
||||||
]
|
"output_type": "execute_result"
|
||||||
},
|
}
|
||||||
"execution_count": 39,
|
],
|
||||||
"metadata": {},
|
"source": [
|
||||||
"output_type": "execute_result"
|
"D"
|
||||||
}
|
]
|
||||||
],
|
},
|
||||||
"source": [
|
{
|
||||||
"DOCUMENTS[1363]"
|
"cell_type": "code",
|
||||||
]
|
"execution_count": 39,
|
||||||
}
|
"metadata": {},
|
||||||
],
|
"outputs": [
|
||||||
"metadata": {
|
{
|
||||||
"kernelspec": {
|
"data": {
|
||||||
"display_name": "Python 3",
|
"text/plain": [
|
||||||
"language": "python",
|
"'internet boom for gift shopping cyberspace is becoming a very popular destination for christmas shoppers. forecasts predict that british people will spend \u00a34bn buying gifts online during the festive season an increase of 64% on 2003. surveys also show that the average amount that people are spending is rising as is the range of goods that they are happy to buy online. savvy shoppers are also using the net to find the hot presents that are all but sold out in high street stores. almost half of the uk population now shop online according to figures collected by the interactive media in retail group which represents web retailers. about 85% of this group 18m people expect to do a lot of their christmas gift buying online this year reports the industry group. on average each shopper will spend \u00a3220 and britons lead europe in their affection for online shopping. almost a third of all the money spent online this christmas will come out of british wallets and purses compared to 29% from german shoppers and only 4% from italian gift buyers. james roper director of the imrg said shoppers were now much happier to buy so-called big ticket items such as lcd television sets and digital cameras. mr roper added that many retailers were working hard to reassure consumers that online shopping was safe and that goods ordered as presents would arrive in time for christmas. he advised consumers to give shops a little more time than usual to fulfil orders given that online buying is proving so popular. a survey by hostway suggests that many men prefer to shop online to avoid the embarrassment of buying some types of presents such as lingerie for wives and girlfriends. much of this online shopping is likely to be done during work time according to research carried out by security firm saint bernard software. the research reveals that up to two working days will be lost by staff who do their shopping via their work computer. worst offenders will be those in the 18-35 age bracket suggests the research who will spend up to five hours per week in december browsing and buying at online shops. iggy fanlo chief revenue officer at shopping.com said that the growing numbers of people using broadband was driving interest in online shopping. when you consider narrowband and broadband the conversion to sale is two times higher he said. higher speeds meant that everything happened much faster he said which let people spend time browsing and finding out about products before they buy. the behaviour of online shoppers was also changing he said. the single biggest reason people went online before this year was price he said. the number one reason now is convenience. very few consumers click on the lowest price he said. they are looking for good prices and merchant reliability. consumer comments and reviews were also proving popular with shoppers keen to find out who had the most reliable customer service. data collected by ebay suggests that some smart shoppers are getting round the shortages of hot presents by buying them direct through the auction site. according to ebay uk there are now more than 150 robosapiens remote control robots for sale via the site. the robosapiens toy is almost impossible to find in online and offline stores. similarly many shoppers are turning to ebay to help them get hold of the hard-to-find slimline playstation 2 which many retailers are only selling as part of an expensive bundle. the high demand for the playstation 2 has meant that prices for it are being driven up. in shops the ps2 is supposed to sell for \u00a3104.99. in some ebay uk auctions the price has risen to more than double this figure. many people are also using ebay to get hold of gadgets not even released in this country. the portable version of the playstation has only just gone on sale in japan yet some enterprising ebay users are selling the device to uk gadget fans.'"
|
||||||
"name": "python3"
|
]
|
||||||
},
|
},
|
||||||
"language_info": {
|
"execution_count": 39,
|
||||||
"codemirror_mode": {
|
"metadata": {},
|
||||||
"name": "ipython",
|
"output_type": "execute_result"
|
||||||
"version": 3
|
}
|
||||||
},
|
],
|
||||||
"file_extension": ".py",
|
"source": [
|
||||||
"mimetype": "text/x-python",
|
"DOCUMENTS[1363]"
|
||||||
"name": "python",
|
]
|
||||||
"nbconvert_exporter": "python",
|
}
|
||||||
"pygments_lexer": "ipython3",
|
],
|
||||||
"version": "3.8.3"
|
"metadata": {
|
||||||
}
|
"kernelspec": {
|
||||||
},
|
"display_name": "Python 3",
|
||||||
"nbformat": 4,
|
"language": "python",
|
||||||
"nbformat_minor": 4
|
"name": "python3"
|
||||||
}
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.8.3"
|
||||||
|
},
|
||||||
|
"author": "Jakub Pokrywka",
|
||||||
|
"email": "kubapok@wmi.amu.edu.pl",
|
||||||
|
"lang": "pl",
|
||||||
|
"subtitle": "14.Ekstrakcja informacji seq2seq[\u0107wiczenia]",
|
||||||
|
"title": "Ekstrakcja informacji",
|
||||||
|
"year": "2021"
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 4
|
||||||
|
}
|
File diff suppressed because one or more lines are too long
5
run_conversion.sh
Normal file
5
run_conversion.sh
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
for i in {cw,wyk}/*.ipynb;
|
||||||
|
do
|
||||||
|
bash convert_ipynb_to_md.sh $i
|
||||||
|
echo $i done
|
||||||
|
done
|
@ -3,6 +3,22 @@
|
|||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"![Logo 1](https://git.wmi.amu.edu.pl/AITech/Szablon/raw/branch/master/Logotyp_AITech1.jpg)\n",
|
||||||
|
"<div class=\"alert alert-block alert-info\">\n",
|
||||||
|
"<h1> Ekstrakcja informacji </h1>\n",
|
||||||
|
"<h2> 1. <i>Wyszukiwarki — wprowadzenie</i> [wykład]</h2> \n",
|
||||||
|
"<h3> Filip Graliński (2021)</h3>\n",
|
||||||
|
"</div>\n",
|
||||||
|
"\n",
|
||||||
|
"![Logo 2](https://git.wmi.amu.edu.pl/AITech/Szablon/raw/branch/master/Logotyp_AITech2.jpg)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"# Wyszukiwarki - wprowadzenie\n",
|
"# Wyszukiwarki - wprowadzenie\n",
|
||||||
"\n",
|
"\n",
|
||||||
@ -13,7 +29,10 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {
|
||||||
|
"jp-MarkdownHeadingCollapsed": true,
|
||||||
|
"tags": []
|
||||||
|
},
|
||||||
"source": [
|
"source": [
|
||||||
"## Wyszukiwarki\n",
|
"## Wyszukiwarki\n",
|
||||||
"\n",
|
"\n",
|
||||||
@ -817,823 +836,7 @@
|
|||||||
"User-agent: *\n",
|
"User-agent: *\n",
|
||||||
"Disallow: /*/wyszukaj/\n",
|
"Disallow: /*/wyszukaj/\n",
|
||||||
"Disallow: /*servlet\n",
|
"Disallow: /*servlet\n",
|
||||||
"Disallow: /reloadwww?\n",
|
"...\n",
|
||||||
"Disallow: /dfptools/adview/\n",
|
|
||||||
"Disallow: /pub/ips/*\n",
|
|
||||||
"Disallow: /ods?\n",
|
|
||||||
"Disallow: /getFile.servlet*\n",
|
|
||||||
"Disallow: /aliasy/blad.jsp\n",
|
|
||||||
"Disallow: /znajdz.do\n",
|
|
||||||
"Disallow: /portalSearch.do\n",
|
|
||||||
"Disallow: /im/ab/b4/10/z17515435Q.jpg\n",
|
|
||||||
"Disallow: /75224259/\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: Googlebot-News\n",
|
|
||||||
"Disallow: /nowy/\n",
|
|
||||||
"Disallow: /mapa_strony\n",
|
|
||||||
"Disallow: /*/wyszukaj/\n",
|
|
||||||
"Disallow: /*/51,\n",
|
|
||||||
"Disallow: /*/55,\n",
|
|
||||||
"Disallow: /*/2,\n",
|
|
||||||
"Disallow: /*order=\n",
|
|
||||||
"Disallow: /*obxx=\n",
|
|
||||||
"Disallow: /*tag=\n",
|
|
||||||
"Disallow: /reloadwww?\n",
|
|
||||||
"Disallow: /ods?\n",
|
|
||||||
"Disallow: /*servlet\n",
|
|
||||||
"Disallow: /dfptools/adview/\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: Yandex\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-Agent: bingbot\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: 008\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: 010\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: 360Spider\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: 80legs\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: Aboundex\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: accelobot\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: Add\\ Catalog\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: AhrefsBot\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: aiHitBot\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: Alexibot\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: Aqua_Products\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: AskJeeves\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: asterias\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: awcheckBot\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: b2w/0.1\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: BackDoorBot/1.0\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: BacklinkCrawler\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: Baiduspider\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: BecomeBot\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: BLEXBot\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: BlowFish/1.0\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: Bookmark search tool\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: BotALot\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: brandwatch.net\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: BuiltBotTough\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: Bullseye/1.0\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: BunnySlippers\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: Butterfly\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: CatchBot\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: Charlotte\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: CheeseBot\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: CherryPicker\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: CherryPickerElite/1.0\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: CherryPickerSE/1.0\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: CLIPish\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: Cliqzbot\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: COMODO\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: Comodo-Certificates-Spider\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: CompSpyBot\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: Copernic\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: CopyRightCheck\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: cosmos\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: crawler\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: Crescent\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: Crescent Internet ToolPak HTTP OLE Control v.1.0\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: Curious\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: curl\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: dataprovider\\.com\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: DinoPing\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: discoverybot\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: DittoSpyder\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: DomainCrawler\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: DomainCrawler\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: dotbot\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: dotnetdotcom\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: Dow\\ Jones\\ Searchbot\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: dumbot\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: EasouSpider\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: EmailCollector\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: EmailSiphon\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: EmailWolf\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: Enterprise_Search\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: Enterprise_Search/1.0\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: EroCrawler\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: es\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: Exabot\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: ExtractorPro\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: EzineArticlesLinkScanner\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: Ezooms\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: FairAd Client\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: Flaming AttackBot\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: Foobot\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: FreeFind\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: FTRF\\:\\ Friendly\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: Gaisbot\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: GetRight/4.2\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: gigabot\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: grub\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: grub-client\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: Harvest/1.5\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: Hatena Antenna\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: hloader\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: http://www.SearchEngineWorld.com bot\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: http://www.WebmasterWorld.com bot\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: HTTP_Request\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: HTTP_Request2\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: httplib\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: humanlinks\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: ia_archiver\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: ia_archiver\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: ia_archiver/1.6\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: Indy\\ Library\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: InfoNaviRobot\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: ip\\-web\\-crawler\\.com\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: Iron33/1.0.2\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: Jakarta\\ Commons-HttpClient\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: Jeeves\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: JennyBot\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: Jetbot\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: Jetbot/1.0\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: JikeSpider\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: Kenjin Spider\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: Keyword Density/0.9\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: larbin\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: LexiBot\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: libWeb/clsHTTP\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: libwww-perl\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: lindex\\.com\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: linkdex\\.com\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: linkdexbot\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: LinkextractorPro\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: LinkScan/8.1a Unix\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: LinkWalker\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: lipperhey\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: LNSpiderguy\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: looksmart\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: ltbot\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: lwp-trivial\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: lwp-trivial/1.34\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: Lynx\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: magpie\\-crawler\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: Mata Hari\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: Microsoft URL Control\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: Microsoft URL Control - 5.01.4511\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: Microsoft URL Control - 6.00.8169\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: MIIxpc\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: MIIxpc/4.2\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: Mister PiX\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: MJ12bot\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: moget\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: moget/2.1\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: Mozilla/4.0 (compatible; BullsEye; Windows 95)\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: MSIE\\ or\\ Firefox\\ mutant\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: MSIECrawler\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: naver\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: NCBot\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: NetAnts\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: NetcraftSurveyAgent\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: netEstate\\ NE\\ Crawler\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: NetMechanic\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: Netseer\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: NextGenSearchBot\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: NICErsPRO\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: Nutch\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: Nutch\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: Ocelli\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: Offline Explorer\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: OmniExplorer_Bot\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: Openbot\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: Openfind\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: Openfind\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: Openfind data gathere\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: OpenWebIndex\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: Oracle Ultra Search\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: PagesInventory\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: PEAR\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: PeoplePal\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: PerMan\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: ProCogSEOBot\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: ProPowerBot/2.14\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: ProWebWalker\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: proximic\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: psbot\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: purebot\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: QueryN Metasearch\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: QuerySeekerSpider\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: Radiation Retriever 1.1\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: RepoMonkey\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: RepoMonkey Bait & Tackle/v1.01\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: Riddler\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: RMA\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: rojerbot\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: RyteBot\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: scooter\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: ScoutJet\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: Scrapy\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: ScreenerBot\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: searchmetrics\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: searchpreview\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: SemrushBot\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: sentibot\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: SEO-CRAWLING\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: SEOENGWorldBot\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: SEOkicks-Robot\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: ShopWiki\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: sistrix\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: sitebot\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: SiteSnagger\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: Snoopy\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: SocialSearcher\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: Sogou\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: SolomonoBot\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: sootle\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: Sosospider\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: SpankBot\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: spanner\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: spbot\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: Speedy\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: Stanford\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: Stanford Comp Sci\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: SurveyBot\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: suzuran\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: Szukacz/1.4\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: Szukacz/1.4\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: Teleport\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: TeleportPro\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: Telesoft\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: Teoma\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: The Intraformant\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: The\\ Incutio\\ XML-RPC\\ PHP\\ Library\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: TheNomad\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: toCrawl/UrlDispatcher\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: True_Robot\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: True_Robot/1.0\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: turingos\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: TurnitinBot\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: uCrawler\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: URL Control\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: URL_Spider_Pro\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: URLy Warning\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: VCI\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: VCI WebViewer VCI WebViewer Win32\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: visaduhoc\\.info\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: WBSearchBot\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: Web Image Collector\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: WebAuto\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: WebBandit\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: WebBandit/3.50\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: WebCapture\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: WebCopier\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: WebEnhancer\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: WebInDetail\\.com\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: WebmasterWorld Extractor\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: WebmasterWorldForumBot\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: WebSauger\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: Website Quester\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: WEBSITEtheWEB\\.COM\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: Webster Pro\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: WebStripper\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: WebVac\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: WebZip\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: WebZip/4.0\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: Wget\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: Wget/1.5.3\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: Wget/1.6\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: Wotbot\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: www\\.integromedb\\.org\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: WWW-Collector-E\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: Xenu's\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: Xenu's Link Sleuth 1.1c\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: xpymep\\.exe\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: YamanaLab-Robot\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: YisouSpider\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: YodaoBot\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: YoudaoBot\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: Zend_Http_Client\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: Zeus\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: Zeus 32297 Webster Pro V2.9 Win32\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: Zeus Link Scout\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: ZmEu\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: ZumBot\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
|
||||||
"User-agent: Linguee\n",
|
|
||||||
"Disallow: /\n",
|
|
||||||
"\n",
|
"\n",
|
||||||
"User-agent: sogou\n",
|
"User-agent: sogou\n",
|
||||||
"Disallow: /\n"
|
"Disallow: /\n"
|
||||||
@ -1675,11 +878,14 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
|
"author": "Filip Graliński",
|
||||||
|
"email": "filipg@amu.edu.pl",
|
||||||
"kernelspec": {
|
"kernelspec": {
|
||||||
"display_name": "Python 3",
|
"display_name": "Python 3 (ipykernel)",
|
||||||
"language": "python",
|
"language": "python",
|
||||||
"name": "python3"
|
"name": "python3"
|
||||||
},
|
},
|
||||||
|
"lang": "pl",
|
||||||
"language_info": {
|
"language_info": {
|
||||||
"codemirror_mode": {
|
"codemirror_mode": {
|
||||||
"name": "ipython",
|
"name": "ipython",
|
||||||
@ -1690,8 +896,11 @@
|
|||||||
"name": "python",
|
"name": "python",
|
||||||
"nbconvert_exporter": "python",
|
"nbconvert_exporter": "python",
|
||||||
"pygments_lexer": "ipython3",
|
"pygments_lexer": "ipython3",
|
||||||
"version": "3.9.1"
|
"version": "3.9.6"
|
||||||
}
|
},
|
||||||
|
"subtitle": "2.Wyszukiwarki — wprowadzenie[wykład]",
|
||||||
|
"title": "Ekstrakcja informacji",
|
||||||
|
"year": "2021"
|
||||||
},
|
},
|
||||||
"nbformat": 4,
|
"nbformat": 4,
|
||||||
"nbformat_minor": 4
|
"nbformat_minor": 4
|
||||||
|
@ -1,5 +1,19 @@
|
|||||||
{
|
{
|
||||||
"cells": [
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"![Logo 1](https://git.wmi.amu.edu.pl/AITech/Szablon/raw/branch/master/Logotyp_AITech1.jpg)\n",
|
||||||
|
"<div class=\"alert alert-block alert-info\">\n",
|
||||||
|
"<h1> Ekstrakcja informacji </h1>\n",
|
||||||
|
"<h2> 2. <i>Wyszukiwarki — roboty</i> [wykład]</h2> \n",
|
||||||
|
"<h3> Filip Graliński (2021)</h3>\n",
|
||||||
|
"</div>\n",
|
||||||
|
"\n",
|
||||||
|
"![Logo 2](https://git.wmi.amu.edu.pl/AITech/Szablon/raw/branch/master/Logotyp_AITech2.jpg)"
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
@ -494,11 +508,14 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
|
"author": "Filip Graliński",
|
||||||
|
"email": "filipg@amu.edu.pl",
|
||||||
"kernelspec": {
|
"kernelspec": {
|
||||||
"display_name": "Python 3",
|
"display_name": "Python 3 (ipykernel)",
|
||||||
"language": "python",
|
"language": "python",
|
||||||
"name": "python3"
|
"name": "python3"
|
||||||
},
|
},
|
||||||
|
"lang": "pl",
|
||||||
"language_info": {
|
"language_info": {
|
||||||
"codemirror_mode": {
|
"codemirror_mode": {
|
||||||
"name": "ipython",
|
"name": "ipython",
|
||||||
@ -509,8 +526,11 @@
|
|||||||
"name": "python",
|
"name": "python",
|
||||||
"nbconvert_exporter": "python",
|
"nbconvert_exporter": "python",
|
||||||
"pygments_lexer": "ipython3",
|
"pygments_lexer": "ipython3",
|
||||||
"version": "3.9.1"
|
"version": "3.9.6"
|
||||||
}
|
},
|
||||||
|
"subtitle": "2.Wyszukiwarki — roboty[wykład]",
|
||||||
|
"title": "Ekstrakcja informacji",
|
||||||
|
"year": "2021"
|
||||||
},
|
},
|
||||||
"nbformat": 4,
|
"nbformat": 4,
|
||||||
"nbformat_minor": 4
|
"nbformat_minor": 4
|
||||||
|
9120
wyk/03_Tfidf.ipynb
9120
wyk/03_Tfidf.ipynb
File diff suppressed because one or more lines are too long
File diff suppressed because it is too large
Load Diff
@ -1,5 +1,19 @@
|
|||||||
{
|
{
|
||||||
"cells": [
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"![Logo 1](https://git.wmi.amu.edu.pl/AITech/Szablon/raw/branch/master/Logotyp_AITech1.jpg)\n",
|
||||||
|
"<div class=\"alert alert-block alert-info\">\n",
|
||||||
|
"<h1> Ekstrakcja informacji </h1>\n",
|
||||||
|
"<h2> 6. <i>Wyzwania uczenia maszynowego</i> [wykład]</h2> \n",
|
||||||
|
"<h3> Filip Graliński (2021)</h3>\n",
|
||||||
|
"</div>\n",
|
||||||
|
"\n",
|
||||||
|
"![Logo 2](https://git.wmi.amu.edu.pl/AITech/Szablon/raw/branch/master/Logotyp_AITech2.jpg)"
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
@ -367,11 +381,14 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
|
"author": "Filip Graliński",
|
||||||
|
"email": "filipg@amu.edu.pl",
|
||||||
"kernelspec": {
|
"kernelspec": {
|
||||||
"display_name": "Python 3",
|
"display_name": "Python 3 (ipykernel)",
|
||||||
"language": "python",
|
"language": "python",
|
||||||
"name": "python3"
|
"name": "python3"
|
||||||
},
|
},
|
||||||
|
"lang": "pl",
|
||||||
"language_info": {
|
"language_info": {
|
||||||
"codemirror_mode": {
|
"codemirror_mode": {
|
||||||
"name": "ipython",
|
"name": "ipython",
|
||||||
@ -382,8 +399,11 @@
|
|||||||
"name": "python",
|
"name": "python",
|
||||||
"nbconvert_exporter": "python",
|
"nbconvert_exporter": "python",
|
||||||
"pygments_lexer": "ipython3",
|
"pygments_lexer": "ipython3",
|
||||||
"version": "3.9.2"
|
"version": "3.9.6"
|
||||||
}
|
},
|
||||||
|
"subtitle": "6.Wyzwania uczenia maszynowego[wykład]",
|
||||||
|
"title": "Ekstrakcja informacji",
|
||||||
|
"year": "2021"
|
||||||
},
|
},
|
||||||
"nbformat": 4,
|
"nbformat": 4,
|
||||||
"nbformat_minor": 4
|
"nbformat_minor": 4
|
||||||
|
@ -1,5 +1,20 @@
|
|||||||
{
|
{
|
||||||
"cells": [
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "45264aad",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"![Logo 1](https://git.wmi.amu.edu.pl/AITech/Szablon/raw/branch/master/Logotyp_AITech1.jpg)\n",
|
||||||
|
"<div class=\"alert alert-block alert-info\">\n",
|
||||||
|
"<h1> Ekstrakcja informacji </h1>\n",
|
||||||
|
"<h2> 7. <i>Naiwny klasyfikator bayesowski w ekstrakcji informacji</i> [wykład]</h2> \n",
|
||||||
|
"<h3> Filip Graliński (2021)</h3>\n",
|
||||||
|
"</div>\n",
|
||||||
|
"\n",
|
||||||
|
"![Logo 2](https://git.wmi.amu.edu.pl/AITech/Szablon/raw/branch/master/Logotyp_AITech2.jpg)"
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"id": "moderate-array",
|
"id": "moderate-array",
|
||||||
@ -347,11 +362,14 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
|
"author": "Filip Graliński",
|
||||||
|
"email": "filipg@amu.edu.pl",
|
||||||
"kernelspec": {
|
"kernelspec": {
|
||||||
"display_name": "Python 3",
|
"display_name": "Python 3 (ipykernel)",
|
||||||
"language": "python",
|
"language": "python",
|
||||||
"name": "python3"
|
"name": "python3"
|
||||||
},
|
},
|
||||||
|
"lang": "pl",
|
||||||
"language_info": {
|
"language_info": {
|
||||||
"codemirror_mode": {
|
"codemirror_mode": {
|
||||||
"name": "ipython",
|
"name": "ipython",
|
||||||
@ -362,8 +380,11 @@
|
|||||||
"name": "python",
|
"name": "python",
|
||||||
"nbconvert_exporter": "python",
|
"nbconvert_exporter": "python",
|
||||||
"pygments_lexer": "ipython3",
|
"pygments_lexer": "ipython3",
|
||||||
"version": "3.9.2"
|
"version": "3.9.6"
|
||||||
}
|
},
|
||||||
|
"subtitle": "7.Naiwny klasyfikator bayesowski w ekstrakcji informacji[wykład]",
|
||||||
|
"title": "Ekstrakcja informacji",
|
||||||
|
"year": "2021"
|
||||||
},
|
},
|
||||||
"nbformat": 4,
|
"nbformat": 4,
|
||||||
"nbformat_minor": 5
|
"nbformat_minor": 5
|
||||||
|
@ -1,5 +1,20 @@
|
|||||||
{
|
{
|
||||||
"cells": [
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"id": "35c19016",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"![Logo 1](https://git.wmi.amu.edu.pl/AITech/Szablon/raw/branch/master/Logotyp_AITech1.jpg)\n",
|
||||||
|
"<div class=\"alert alert-block alert-info\">\n",
|
||||||
|
"<h1> Ekstrakcja informacji </h1>\n",
|
||||||
|
"<h2> 8. <i>Regresja liniowa</i> [wykład]</h2> \n",
|
||||||
|
"<h3> Filip Graliński (2021)</h3>\n",
|
||||||
|
"</div>\n",
|
||||||
|
"\n",
|
||||||
|
"![Logo 2](https://git.wmi.amu.edu.pl/AITech/Szablon/raw/branch/master/Logotyp_AITech2.jpg)"
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"id": "cathedral-newark",
|
"id": "cathedral-newark",
|
||||||
@ -141,6 +156,8 @@
|
|||||||
"\n",
|
"\n",
|
||||||
"![Morskie Oko - Krzysztof Dudzik](08_files/morskieoko.jpg)\n",
|
"![Morskie Oko - Krzysztof Dudzik](08_files/morskieoko.jpg)\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
"(Źródło: https://pl.wikipedia.org/wiki/Morskie_Oko#/media/Plik:Morskie_Oko_ze_szlaku_przez_%C5%9Awist%C3%B3wk%C4%99.jpg, licencja CC BY 3.0)\n",
|
||||||
|
"\n",
|
||||||
"Schodź wzdłuż lokalnego spadku funkcji błędu.\n",
|
"Schodź wzdłuż lokalnego spadku funkcji błędu.\n",
|
||||||
"\n",
|
"\n",
|
||||||
"Tak więc w praktyce zamiast podstawiać do wzoru lepiej się uczyć iteracyjnie -\n",
|
"Tak więc w praktyce zamiast podstawiać do wzoru lepiej się uczyć iteracyjnie -\n",
|
||||||
@ -279,11 +296,14 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
|
"author": "Filip Graliński",
|
||||||
|
"email": "filipg@amu.edu.pl",
|
||||||
"kernelspec": {
|
"kernelspec": {
|
||||||
"display_name": "Python 3",
|
"display_name": "Python 3 (ipykernel)",
|
||||||
"language": "python",
|
"language": "python",
|
||||||
"name": "python3"
|
"name": "python3"
|
||||||
},
|
},
|
||||||
|
"lang": "pl",
|
||||||
"language_info": {
|
"language_info": {
|
||||||
"codemirror_mode": {
|
"codemirror_mode": {
|
||||||
"name": "ipython",
|
"name": "ipython",
|
||||||
@ -294,8 +314,11 @@
|
|||||||
"name": "python",
|
"name": "python",
|
||||||
"nbconvert_exporter": "python",
|
"nbconvert_exporter": "python",
|
||||||
"pygments_lexer": "ipython3",
|
"pygments_lexer": "ipython3",
|
||||||
"version": "3.9.2"
|
"version": "3.9.6"
|
||||||
}
|
},
|
||||||
|
"subtitle": "8.Regresja liniowa[wykład]",
|
||||||
|
"title": "Ekstrakcja informacji",
|
||||||
|
"year": "2021"
|
||||||
},
|
},
|
||||||
"nbformat": 4,
|
"nbformat": 4,
|
||||||
"nbformat_minor": 5
|
"nbformat_minor": 5
|
||||||
|
@ -1,5 +1,19 @@
|
|||||||
{
|
{
|
||||||
"cells": [
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"![Logo 1](https://git.wmi.amu.edu.pl/AITech/Szablon/raw/branch/master/Logotyp_AITech1.jpg)\n",
|
||||||
|
"<div class=\"alert alert-block alert-info\">\n",
|
||||||
|
"<h1> Ekstrakcja informacji </h1>\n",
|
||||||
|
"<h2> 9. <i>Przegląd składowych sieci neuronowych</i> [wykład]</h2> \n",
|
||||||
|
"<h3> Filip Graliński (2021)</h3>\n",
|
||||||
|
"</div>\n",
|
||||||
|
"\n",
|
||||||
|
"![Logo 2](https://git.wmi.amu.edu.pl/AITech/Szablon/raw/branch/master/Logotyp_AITech2.jpg)"
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
@ -1423,229 +1437,9 @@
|
|||||||
"output_type": "stream",
|
"output_type": "stream",
|
||||||
"text": [
|
"text": [
|
||||||
"0.04162006452679634 2.081003189086914 0 0 tensor([[0.1248, 0.1249, 0.1252, 0.1248, 0.1248, 0.1253, 0.1251, 0.1251]],\n",
|
"0.04162006452679634 2.081003189086914 0 0 tensor([[0.1248, 0.1249, 0.1252, 0.1248, 0.1248, 0.1253, 0.1251, 0.1251]],\n",
|
||||||
" grad_fn=<ExpBackward>) MŚ w hokeju: mocny początek Finów w Danii. Francja podniosła się po laniu od Rosjan Reprezentacja Finlandii po niepowodzeniach na ostatnich igrzyskach olimpijskich rozpoczęła dobrze tegoroczny turniej mistrzostw świata elity od pewnej wygranej z Koreą Południową. Francuzi zdobyli pierwsze punkty po pokonaniu Białorusi.\n",
|
" grad_fn=<ExpBackward>) MŚ w hokeju: [...]\n",
|
||||||
"2.0791335105895996 2.0685672760009766 50 4 tensor([[0.1260, 0.1265, 0.1247, 0.1244, 0.1264, 0.1241, 0.1239, 0.1241]],\n",
|
|
||||||
" grad_fn=<ExpBackward>) Fotorelacja: Ważenie przed galą KSW 27 - Cage Time Prezentujemy fotorelację z oficjalnej ceremonii ważenia przed sobotnią galą KSW 27 - Cage Time.\n",
|
|
||||||
"2.069852828979492 2.1081838607788086 100 5 tensor([[0.1268, 0.1265, 0.1260, 0.1250, 0.1262, 0.1215, 0.1222, 0.1258]],\n",
|
|
||||||
" grad_fn=<ExpBackward>) Wisła podpisała swój najważniejszy kontrakt Gdyby nie firma Can-Pack S.A. nie byłoby w ostatnich latach wielkich sukcesów koszykarek z Krakowa. We wtorek ogłoszono przedłużenie umowy i koszykarki spod znaku Białej Gwiazdy nadal będą występować pod nazwą Wisła Can-Pack Kraków.\n",
|
|
||||||
"2.0650603771209717 2.0957107543945312 150 5 tensor([[0.1307, 0.1279, 0.1246, 0.1245, 0.1251, 0.1230, 0.1177, 0.1266]],\n",
|
|
||||||
" grad_fn=<ExpBackward>) Basket 90 znalazł nową podkoszową. W poprzednim sezonie grała w polskim klubie Nie trzeba było długo czekać na koszykarkę, która wypełni podkoszową lukę w Baskecie 90 Gdynia. Nową zawodniczką ekipy z Trójmiasta została Niemka Sonja Greinacher, która ostatni sezon spędziła w Wiśle CanPack Kraków.\n",
|
|
||||||
"2.058483362197876 2.0446863174438477 200 0 tensor([[0.1294, 0.1279, 0.1260, 0.1219, 0.1224, 0.1241, 0.1218, 0.1263]],\n",
|
|
||||||
" grad_fn=<ExpBackward>) Wicemistrzyni olimpijska zakończyła karierę Olga Wiłuchina podjęła decyzję o zakończeniu sportowej kariery. Największymi sukcesami rosyjskiej biathlonistki są dwa srebrne medale wywalczone na igrzyskach olimpijskich w Soczi.\n",
|
|
||||||
"2.048689126968384 2.0999209880828857 250 5 tensor([[0.1226, 0.1300, 0.1219, 0.1229, 0.1239, 0.1225, 0.1261, 0.1300]],\n",
|
|
||||||
" grad_fn=<ExpBackward>) Duże wzmocnienie reprezentacji Polski. Wraca Maciej Lampe Maciej Lampe dołączył już do reprezentacji Polski, która przygotowuje się do eliminacyjnych meczów z Litwą oraz Kosowem. 33-latek wraca do kadry po dwuletniej przerwie.\n",
|
|
||||||
"2.0371503829956055 2.0841071605682373 300 5 tensor([[0.1309, 0.1342, 0.1226, 0.1171, 0.1202, 0.1244, 0.1222, 0.1284]],\n",
|
|
||||||
" grad_fn=<ExpBackward>) Kibice Celtów mogą spać spokojnie. Kyrie Irving planuje zostać w Bostonie Rozgrywający Boston Celtics, Kyrie Irving, przyznał, że w następne lato zamierza przedłużyć swoją umowę z klubem, o ile ten nadal będzie go uwzględniał w swoich planach.\n",
|
|
||||||
"2.0365874767303467 2.0405309200286865 350 6 tensor([[0.1256, 0.1288, 0.1273, 0.1186, 0.1148, 0.1229, 0.1300, 0.1319]],\n",
|
|
||||||
" grad_fn=<ExpBackward>) We włoskiej Serie A nie ma żartów. Kolejny trener stracił posadę Brak cierpliwości włodarzy włoskich klubów charakteryzuje tegoroczne rozgrywki Serie A. Ostatnio z rolą szkoleniowca Exprivia Molfetta pożegnał się Vincenzo Di Pinto. Nie jest on pierwszym trenerem, który po 7. kolejce rozgrywek stracił posadę.\n",
|
|
||||||
"2.0280144214630127 2.0845110416412354 400 3 tensor([[0.1276, 0.1223, 0.1311, 0.1244, 0.1142, 0.1192, 0.1308, 0.1304]],\n",
|
|
||||||
" grad_fn=<ExpBackward>) Polska B rozpoczyna turniej w Płocku Od piątku do niedzieli w płockiej Orlen Arenie odbędzie się turniej z udziałem reprezentacji Polski B. Wezmą w nim też udział druga reprezentacja Danii, a także pierwsze kadry Wysp Owczych i Estonii.\n",
|
|
||||||
"2.0075767040252686 2.0585439205169678 450 0 tensor([[0.1276, 0.1289, 0.1236, 0.1232, 0.1221, 0.1172, 0.1307, 0.1266]],\n",
|
|
||||||
" grad_fn=<ExpBackward>) Najpiękniesze polskie sportsmenki! Sprawdź kto znalazł się w zestawieniu! Przygotowaliśmy dla was zestawienie 20 najpiękniejszych polskich sportsmenek! Zgadzacie się z naszym wyborem? Swoje typy wpisujcie w komentarzach! Razem wybierzemy tę najładniejszą.\n",
|
|
||||||
"2.011418581008911 1.9737845659255981 500 0 tensor([[0.1389, 0.1302, 0.1213, 0.1188, 0.1176, 0.1193, 0.1243, 0.1295]],\n",
|
|
||||||
" grad_fn=<ExpBackward>) Aleksander Zniszczoł został ojcem. Urodziła mu się córka Piątek był niezwykle ważnym dniem w życiu Aleksandra Zniszczoła. Polskiemu skoczkowi narciarskiemu urodziła się córka. Pochwalił się jej zdjęciem na Instagramie.\n",
|
|
||||||
"2.0087714195251465 2.0379459857940674 550 1 tensor([[0.1339, 0.1303, 0.1195, 0.1186, 0.1221, 0.1194, 0.1287, 0.1274]],\n",
|
|
||||||
" grad_fn=<ExpBackward>) Rajd Azorów: czołówka miała problemy. Łukjaniuk się broni Aleksiej Łukjaniuk pod nieobecność Kajetana Kajetanowicza obronił pozycję lidera klasyfikacji generalnej Rajdu Azorów. Do czołowej dziesiątki przebił się Łukasz Habaj.\n",
|
|
||||||
"1.996700406074524 2.0396344661712646 600 4 tensor([[0.1274, 0.1270, 0.1201, 0.1230, 0.1301, 0.1189, 0.1227, 0.1307]],\n",
|
|
||||||
" grad_fn=<ExpBackward>) Andrzej Kostyra: Ciosy muszą się kumulować Podczas sobotniej gali Tomasz Adamek zmierzy się z Arturem Szpilką. Andrzej Kostyra większe szanse daje temu pierwszemu, ale zauważa też pewne mankamenty.\n",
|
|
||||||
"1.9674354791641235 2.064871072769165 650 3 tensor([[0.1315, 0.1254, 0.1290, 0.1268, 0.1149, 0.1162, 0.1210, 0.1350]],\n",
|
|
||||||
" grad_fn=<ExpBackward>) IO 2016: skład Chorwacji na przygotowania do Rio. Na liście Lovro Mihić, Manuel Strlek i Ivan Cupić Chorwacki selekcjoner Żeljko Babić powołał kadrę na przygotowania do igrzysk olimpijskich w Rio de Janeiro. Na liście nazwisk nie zabrakło \"polskich\" akcentów. Na zgrupowanie pojadą Manuel Strlek, Lovro Mihić, Filip Ivić i Ivan Cupić.\n",
|
|
||||||
"1.9685375690460205 1.9720581769943237 700 2 tensor([[0.1150, 0.1331, 0.1392, 0.1129, 0.1123, 0.1159, 0.1247, 0.1469]],\n",
|
|
||||||
" grad_fn=<ExpBackward>) Andy Murray: Kostka boli, ale wszystko z nią w porządku W swoim felietonie dla BBC Andy Murray ocenił środowy pojedynek II rundy z Andriejem Rublowem i cieszył się z sukcesu Daniela Evansa w Australian Open 2017. Lider rankingu ATP przyznał, że z jego kostką jest wszystko w porządku.\n",
|
|
||||||
"1.9755648374557495 1.8178434371948242 750 1 tensor([[0.1203, 0.1624, 0.1207, 0.1146, 0.1109, 0.1177, 0.1230, 0.1304]],\n",
|
|
||||||
" grad_fn=<ExpBackward>) Daniel Ricciardo sugeruje Red Bullowi wybór silnika Daniel Ricciardo przyznał, że patrząc na obecny rozwój jednostki napędowej Renault, byłby gotowy zaufać francuskiemu producentowi także w kolejnym sezonie F1.\n",
|
|
||||||
"1.9461817741394043 2.115739107131958 800 3 tensor([[0.1252, 0.1380, 0.1297, 0.1205, 0.1064, 0.1249, 0.1235, 0.1317]],\n",
|
|
||||||
" grad_fn=<ExpBackward>) Kamil Mokrzki jako jedyny gracz Gwardii Opole dobił do granicy 100 goli 24-letni rozgrywający opolan był najlepszym strzelcem zespołu w sezonie 2015/2016. Drugiego w wewnętrznej klasyfikacji Antoniego Łangowskiego wyprzedził o 10 trafień.\n",
|
|
||||||
"1.976528286933899 2.018634080886841 850 4 tensor([[0.1226, 0.1330, 0.1265, 0.1109, 0.1328, 0.1160, 0.1143, 0.1438]],\n",
|
|
||||||
" grad_fn=<ExpBackward>) Deontay Wilder rzucił wyzwanie Anthony'emu Joshui. \"Aż krew się we mnie gotuje!\" - Anthony Joshua to facet, z którym chcę walczyć - mówi Deontay Wilder. Mistrz świata organizacji WBC wyzwał Anglika na pojedynek i liczy na to, że ten potraktuje jego propozycję na poważnie.\n",
|
|
||||||
"1.9369778633117676 2.0240039825439453 900 2 tensor([[0.1282, 0.1321, 0.1321, 0.1175, 0.1185, 0.1192, 0.1179, 0.1344]],\n",
|
|
||||||
" grad_fn=<ExpBackward>) Ronaldo, Bouchard, Bolt. Gwiazdy sportu wybierają stroje na Halloween Znani sportowcy wzięli udział w zabawie w wymyślaniu kostiumów na wieczór halloweenowych szaleństw. Kto zaprezentował najbardziej oryginalne przebranie?\n",
|
|
||||||
"1.938151240348816 1.998972773551941 950 4 tensor([[0.1241, 0.1263, 0.1215, 0.1199, 0.1355, 0.1184, 0.1261, 0.1283]],\n",
|
|
||||||
" grad_fn=<ExpBackward>) Andrzej Kostyra stworzył \"idealnego polskiego boksera\". Jest dużo cech Tomasza Adamka Andrzej Kostyra, ekspert bokserski, stworzył model \"idealnego polskiego pięściarza\". Wymienił najlepsze cechy poszczególnych bokserów. Najwięcej jest Tomasza Adamka.\n",
|
|
||||||
"1.928910732269287 1.9361062049865723 1000 1 tensor([[0.1222, 0.1443, 0.1320, 0.1216, 0.1117, 0.1137, 0.1200, 0.1346]],\n",
|
|
||||||
" grad_fn=<ExpBackward>) Rajd Niemiec: Andreas Mikkelsen i Jari-Matti Latvala najszybsi na shakedown W czwartek kierowcy mieli do pokonania odcinek testowy przed Rajdem Niemiec. Na mecie okazało się, że Andreas Mikkelsen i Jari-Matti Latvala uzyskali identyczny czas.\n"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "stdout",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"1.9247257709503174 1.9077305793762207 1050 4 tensor([[0.1264, 0.1246, 0.1286, 0.1161, 0.1484, 0.1108, 0.1174, 0.1276]],\n",
|
|
||||||
" grad_fn=<ExpBackward>) Była rywalka Joanny Jędrzejczyk na dopingu. Czeka ją zawieszenie Była pretendenta to tytułu mistrzyni UFC w wadze słomkowej, Jessica Penne (MMA 12-5) została zawieszona przez Amerykańską Agencję Antydopingową za stosowanie niedozwolonego środka. Amerykankę czeka 1,5-roczne zawieszenie.\n",
|
|
||||||
"1.9094451665878296 1.8653218746185303 1100 2 tensor([[0.1117, 0.1150, 0.1548, 0.1148, 0.1137, 0.1239, 0.1094, 0.1566]],\n",
|
|
||||||
" grad_fn=<ExpBackward>) Finał WTA Sydney: Radwańska - Konta na żywo. Transmisja TV, stream online W piątek Agnieszka Radwańska zmierzy się z Johanną Kontą w ramach finału WTA Sydney. Transmisja TV na antenie TVP 1 i TVP Sport. Stream online w sport.tvp.pl.\n",
|
|
||||||
"1.9157683849334717 1.9492340087890625 1150 7 tensor([[0.1213, 0.1256, 0.1152, 0.1315, 0.1243, 0.1176, 0.1222, 0.1424]],\n",
|
|
||||||
" grad_fn=<ExpBackward>) Piękne zachowanie piłkarza Borussii. Pomógł kibicowi Takim zachowaniem piłkarze zyskują ogromny szacunek u kibiców. Christian Pulisić uratował fana, którym podczas próby zrobienia wspólnego zdjęcia z zawodnikiem Borussii Dortmund zajęła się ochrona.\n",
|
|
||||||
"1.865821123123169 2.0228006839752197 1200 4 tensor([[0.1116, 0.1368, 0.1280, 0.1275, 0.1323, 0.1158, 0.1208, 0.1272]],\n",
|
|
||||||
" grad_fn=<ExpBackward>) W przyszłym roku dojdzie do walki Joshua - Kliczko. \"Umowa jest dogadana\" Po fiasku wcześniejszych negocjacji wreszcie osiągnięto porozumienie. W przyszłym roku Anthony Joshua zmierzy się z Władimirem Kliczką, a w stawce będą dwa pasy mistrzowskie - informują menadżerowie obu pięściarzy. Został tylko jeden warunek.\n",
|
|
||||||
"1.8944953680038452 1.8922208547592163 1250 4 tensor([[0.1134, 0.1291, 0.1183, 0.1147, 0.1507, 0.1225, 0.1236, 0.1276]],\n",
|
|
||||||
" grad_fn=<ExpBackward>) MMA: Bartosz Fabiński zasila powracającą Fighters Arenę Bartosz Fabiński wystąpi na gali Fighters Arena 9, która 8 czerwca odbędzie się w Józefowie. Dla zawodnika z Warszawy będzie to już czwarta walka w tym roku.\n",
|
|
||||||
"1.880069375038147 1.9415850639343262 1300 7 tensor([[0.1157, 0.1171, 0.1131, 0.1464, 0.1034, 0.1277, 0.1331, 0.1435]],\n",
|
|
||||||
" grad_fn=<ExpBackward>) Oficjalnie: Polski mecz w Serie A! Godzinę przed pierwszym gwizdkiem (20:45) meczu między Sampdorią, a Napoli potwierdziły się doniesienia włoskiej prasy. Po raz pierwszy w obecnym sezonie to samo spotkanie Serie A rozpocznie w wyjściowej jedenastce aż 4 polskich piłkarzy.\n",
|
|
||||||
"1.856698751449585 1.7814764976501465 1350 1 tensor([[0.1144, 0.1684, 0.1176, 0.1232, 0.1149, 0.1156, 0.1183, 0.1275]],\n",
|
|
||||||
" grad_fn=<ExpBackward>) Bernie Ecclestone: Ferrari ma lepszy bolid Zdaniem Berniego Ecclestone'a, sezon 2017 będzie należeć do Ferrari. Włoski zespół ma spore szanse na pierwszy mistrzowski tytuł wśród konstruktorów od wielu lat.\n",
|
|
||||||
"1.864432692527771 1.7103632688522339 1400 1 tensor([[0.1176, 0.1808, 0.1134, 0.1188, 0.1131, 0.1201, 0.1122, 0.1240]],\n",
|
|
||||||
" grad_fn=<ExpBackward>) Sauber potwierdza brak Pascala Wehrleina na testach. Kto go zastąpi? Po czwartkowych medialnych doniesieniach, w piątek zespół Sauber F1 Team oficjalnie potwierdził, iż Pascal Wehrlein opuści pierwszą turę przedsezonowych testów pod Barceloną.\n",
|
|
||||||
"1.8556106090545654 1.6862224340438843 1450 6 tensor([[0.1019, 0.1062, 0.1194, 0.1385, 0.0955, 0.1352, 0.1852, 0.1180]],\n",
|
|
||||||
" grad_fn=<ExpBackward>) Polski Cukier Muszynianka Enea - Giacomini Budowlani: przełamać pasmo porażek W 7. kolejce Orlen Ligi siatkarki Polskiego Cukru Muszynianki Enea Muszyna podejmą Giacomini Budowlani Toruń. Przyjezdne w czterech ostatnich meczach rozgrywek nie wygrały nawet seta i tą złą serię chcą przerwać w Małopolsce.\n",
|
|
||||||
"1.8279104232788086 1.714841604232788 1500 6 tensor([[0.1144, 0.1191, 0.1098, 0.1388, 0.0905, 0.1315, 0.1800, 0.1160]],\n",
|
|
||||||
" grad_fn=<ExpBackward>) Przyjmująca zostaje w Toruniu. Budowlani zamknęli skład W sezonie 2017/2018 Orlen Ligi w Budowlanych Toruń nadal będzie występować Marina Paulava. Ta siatkarka zamknęła skład zespołu.\n",
|
|
||||||
"1.8162095546722412 1.6665536165237427 1550 7 tensor([[0.1020, 0.1058, 0.1215, 0.1180, 0.1061, 0.1215, 0.1362, 0.1889]],\n",
|
|
||||||
" grad_fn=<ExpBackward>) Borussia Dortmund - Red Bull Salzburg na żywo. Transmisja TV, stream online W czwartek, w ramach 1/8 finału Ligi Europy, odbędzie się mecz Borussia Dortmund - Red Bull Salzburg. Transmisja TV na antenie Eurosport 1. Stream online na platformie Eurosport Player i Ipla TV. Relacja LIVE w WP SportoweFakty.\n",
|
|
||||||
"1.81099271774292 1.7798329591751099 1600 0 tensor([[0.1687, 0.1025, 0.1230, 0.1102, 0.1030, 0.1188, 0.1179, 0.1559]],\n",
|
|
||||||
" grad_fn=<ExpBackward>) PŚ w Lahti: konkurs drużynowy na żywo. Transmisja TV, stream online za darmo W sobotę, w ramach Pucharu Świata w skokach narciarskich w Lahti odbędzie się konkurs drużynowy. Transmisja TV na antenie TVP 1 i Eurosport. Stream online za darmo w WP Pilot. Relacja LIVE w WP SportoweFakty.\n",
|
|
||||||
"1.8140941858291626 1.8500407934188843 1650 5 tensor([[0.0951, 0.1068, 0.1140, 0.1381, 0.1080, 0.1572, 0.1369, 0.1438]],\n",
|
|
||||||
" grad_fn=<ExpBackward>) Panathinaikos ma dość Euroligi. Wielki klub chce się wycofać z rozgrywek! Koszykarska Euroliga bez Panathinaikosu Ateny? To bardzo możliwy scenariusz. Właściciel klubu - Dimitrios Giannakopoulos - zapowiedział wycofanie drużyny ze stolicy Grecji z elitarnych rozgrywek.\n",
|
|
||||||
"1.792924404144287 1.8001683950424194 1700 4 tensor([[0.1270, 0.1361, 0.1167, 0.1184, 0.1653, 0.1104, 0.0993, 0.1269]],\n",
|
|
||||||
" grad_fn=<ExpBackward>) Tomasz Adamek wrócił na salę treningową. Zobacz, w jakiej formie jest \"Góral\" (wideo) Coraz więcej wskazuje na to, że Tomasz Adamek raz jeszcze powróci na ring. Były mistrz świata kategorii półciężkiej i junior ciężkiej regularnie pojawia się na sali treningowej. W jakiej formie jest 40-latek?\n",
|
|
||||||
"1.795984148979187 1.843177080154419 1750 0 tensor([[0.1583, 0.1008, 0.1063, 0.1622, 0.1017, 0.1220, 0.1319, 0.1169]],\n",
|
|
||||||
" grad_fn=<ExpBackward>) 18 zawodników w finskiej kadrze. Aino-Kaisa Saarinen poza reprezentacją Finowie ogłosili skład reprezentacji na nadchodzący Puchar Świata w biegach narciarskich. W drużynie znalazło się 8 zawodniczek i 10 zawodników.\n",
|
|
||||||
"1.8073369264602661 1.7174080610275269 1800 4 tensor([[0.1065, 0.1124, 0.1060, 0.1050, 0.1795, 0.1232, 0.1324, 0.1350]],\n",
|
|
||||||
" grad_fn=<ExpBackward>) FEN 19: karta walk skompletowana Znamy ostatnie, dziesiąte zestawienie na gali Fight Exclusive Night 19 \"Bitwa o Wrocław\". W kategorii do 70 kilogramów w formule K-1 zmierzą się ze sobą Marcin Stopka (2-2) i Krzysztof Kottas (0-0).\n",
|
|
||||||
"1.813085675239563 1.8584522008895874 1850 1 tensor([[0.1218, 0.1559, 0.1244, 0.1247, 0.1089, 0.1195, 0.1167, 0.1281]],\n",
|
|
||||||
" grad_fn=<ExpBackward>) Rajd Nadwiślański: Grzegorz Grzyb Liderem Grzegorz Grzyb i Robert Hundla zostali liderami Rajdu Nadwiślańskiego po przejechaniu dwóch sobotnich odcinków specjalnych.\n",
|
|
||||||
"1.8126273155212402 2.0152177810668945 1900 3 tensor([[0.1352, 0.1214, 0.1278, 0.1333, 0.1161, 0.1320, 0.1177, 0.1165]],\n",
|
|
||||||
" grad_fn=<ExpBackward>) Bundesliga: Berlińskie TGV. Kolejna stacja w Hanowerze Füchse Berlin w niedzielę powalczy w Hanowerze o dziewiąte kolejne zwycięstwo w sezonie. Takiego otwarcia „Lisy” nie miały jeszcze nigdy. Z kolei wieczorem polski pojedynek w Magdeburgu: Piotr Chrapkowski vs Andrzej Rojewski. Oba mecze w Sportklubie.\n",
|
|
||||||
"1.8037822246551514 1.9507031440734863 1950 4 tensor([[0.1258, 0.1343, 0.1078, 0.1292, 0.1422, 0.1202, 0.1165, 0.1241]],\n",
|
|
||||||
" grad_fn=<ExpBackward>) Utytułowany pięściarz zakończył karierę Czterokrotny obrońca tytułu mistrza świata kategorii super średniej Mikkel Kessler ogłosił zakończenie kariery pięściarskiej. To najbardziej utytułowany zawodnik w historii duńskiego boksu.\n",
|
|
||||||
"1.7356246709823608 1.938697099685669 2000 6 tensor([[0.1114, 0.0960, 0.1303, 0.1193, 0.1003, 0.1257, 0.1439, 0.1731]],\n",
|
|
||||||
" grad_fn=<ExpBackward>) KMŚ 2017: ZAKSA - Sarmayeh Bank Teheran na żywo. Gdzie oglądać transmisję TV i online? We wtorek, ZAKSA Kędzierzyn-Koźle zmierzy się z Sarmayeh Bank Teheran w ramach Klubowych Mistrzostw Świata w siatkówce. Transmisja TV na antenie Polsat Sport. Stream online w Ipla TV. Relacja LIVE w WP SportoweFakty za darmo.\n",
|
|
||||||
"1.7901594638824463 1.9917528629302979 2050 1 tensor([[0.1212, 0.1365, 0.1351, 0.1287, 0.1104, 0.1252, 0.1179, 0.1250]],\n",
|
|
||||||
" grad_fn=<ExpBackward>) Wakacyjny freestyle Przygońskiego i Pawlusiaka na pustyni Pędzące po wydmach dakarowe MINI, specjalnie dostosowany snowboard, lina i dwóch utalentowanych sportowców - tak w skrócie można opisać projekt \"Przygoński & Pawlusiak Dune Freestyle\".\n"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "stdout",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"1.7326788902282715 1.8687950372695923 2100 5 tensor([[0.1091, 0.1428, 0.1050, 0.1267, 0.1092, 0.1543, 0.1100, 0.1429]],\n",
|
|
||||||
" grad_fn=<ExpBackward>) Martynas Sajus: Sobin jest bardziej doświadczonym graczem, ale w przyszłości będę od niego lepszy Pojedynek Josipa Sobina z Martynasem Sajusem może być jednym ze smaczków piątkowego spotkania Anwilu z Polpharmą. Który ze środkowych da więcej swojej ekipie? - On jest bardziej doświadczony, ale w przyszłości to ja będę lepszy - śmieje się Sajus.\n",
|
|
||||||
"1.7521668672561646 1.5104379653930664 2150 2 tensor([[0.0978, 0.1259, 0.2208, 0.1105, 0.1043, 0.1174, 0.1048, 0.1186]],\n",
|
|
||||||
" grad_fn=<ExpBackward>) Łukasz Iwanek: Każda tenisistka może być Williams, nie każda może zostać Radwańską (komentarz) W II rundzie Australian Open najlepsza polska tenisistka została stłamszona przez rywalkę uderzającą szybko i celnie. Każda tenisistka może w pojedynczym meczu zostać Sereną Williams, nie każda może być Agnieszką Radwańską.\n",
|
|
||||||
"1.7391993999481201 1.7570909261703491 2200 5 tensor([[0.1101, 0.0949, 0.1162, 0.1437, 0.0984, 0.1725, 0.1351, 0.1290]],\n",
|
|
||||||
" grad_fn=<ExpBackward>) Kolejny występ Przemysława Karnowskiego w Lidze Letniej NBA Kolejny występ w rozgrywkach Ligi Letniej NBA zanotował Przemysław Karnowski. Polak, który reprezentuje Charlotte Hornets, w przegranym meczu z Indianą Pacers (77:84) zdobył cztery punkty i miał trzy zbiórki.\n",
|
|
||||||
"1.6614245176315308 1.5924513339996338 2250 1 tensor([[0.1038, 0.2034, 0.1055, 0.1092, 0.1229, 0.1111, 0.1111, 0.1329]],\n",
|
|
||||||
" grad_fn=<ExpBackward>) Niebieskie flagi mogą zniknąć z F1 Formuła 1 rozważa, czy nie zrezygnować ze stosowania niebieskich flag podczas wyścigu. W ostatnich sezonach kierowcy często narzekali, iż rywale nie stosowali się do takiej sygnalizacji.\n",
|
|
||||||
"1.661994218826294 1.8809857368469238 2300 5 tensor([[0.1059, 0.1278, 0.1030, 0.1494, 0.1201, 0.1524, 0.1246, 0.1169]],\n",
|
|
||||||
" grad_fn=<ExpBackward>) Nemanja Jaramaz nowym zawodnikiem Anwilu Włocławek! Doskonale znany na polskich parkietach z występów w drużynie ze Zgorzelca Nemanja Jaramaz został nowym zawodnikiem Anwilu Włocławek. Kontrakt z Serbem będzie obowiązywał do końca bieżącego sezonu.\n",
|
|
||||||
"1.7554911375045776 1.7680193185806274 2350 5 tensor([[0.1027, 0.1128, 0.1075, 0.1352, 0.1200, 0.1707, 0.1245, 0.1267]],\n",
|
|
||||||
" grad_fn=<ExpBackward>) TOP5 zagrań minionej nocy NBA (wideo) 3 mecze odbyły się w nocy z wtorku na środę w NBA, dlatego liga miała mniejszy wybór do zestawienia najlepszych akcji. Na czele listy TOP5 zagrań znalazły się rzuty z elektryzującej końcówki spotkania Dallas Mavericks-Portland Trail Blazers.\n",
|
|
||||||
"1.7369928359985352 1.9102388620376587 2400 2 tensor([[0.1144, 0.1583, 0.1480, 0.1184, 0.1097, 0.1227, 0.1076, 0.1208]],\n",
|
|
||||||
" grad_fn=<ExpBackward>) Roger Federer będzie występował w Bazylei aż do 2019 roku Roger Federer potwierdził, że na pewno do sezonu 2019 będzie występował w turnieju Swiss Indoors Basel, który jest organizowany pod koniec października w jego rodzinnej miejscowości.\n",
|
|
||||||
"1.671631932258606 1.793396234512329 2450 4 tensor([[0.1104, 0.1304, 0.1188, 0.1231, 0.1664, 0.1170, 0.1101, 0.1238]],\n",
|
|
||||||
" grad_fn=<ExpBackward>) Łukasz Wichowski przed DSF Kickboxing Challenge: Będzie ciężka walka i duże widowisko Już w sobotę odbędzie się gala DSF Kickboxing Challenge: Bitwa w Piasecznie. Walką wieczoru będzie starcie Łukasza Wichowskiego z Piotrem Kołakowskim. - To dodatkowa mobilizacja - mówi Wichowski.\n",
|
|
||||||
"1.6898339986801147 1.8184137344360352 2500 5 tensor([[0.1091, 0.1339, 0.1128, 0.1309, 0.1144, 0.1623, 0.1074, 0.1292]],\n",
|
|
||||||
" grad_fn=<ExpBackward>) Specjaliści od dzikich kart. Co GTK Gliwice może wnieśc do PLK? GTK Gliwice prawdopodobnie będzie 17. zespołem w ekstraklasie. Przybliżamy sylwetkę ekipy ze Śląska, dla której gra w PLK będzie absolutnym debiutem.\n",
|
|
||||||
"1.6765532493591309 1.7610383033752441 2550 7 tensor([[0.1266, 0.1095, 0.1140, 0.1418, 0.1051, 0.1029, 0.1283, 0.1719]],\n",
|
|
||||||
" grad_fn=<ExpBackward>) Agent Oezila przerywa milczenie i oskarża reprezentantów Niemiec Erkut Sogut, agent Mesuta Oezila przemówił na temat zakończenia kariery reprezentacyjnej przez pomocnika. Oberwało się trzem reprezentantom Niemiec.\n",
|
|
||||||
"1.6663236618041992 1.6887623071670532 2600 5 tensor([[0.0998, 0.1291, 0.0841, 0.1349, 0.1218, 0.1847, 0.1053, 0.1402]],\n",
|
|
||||||
" grad_fn=<ExpBackward>) NBA: ci koszykarze nie mają jeszcze kontraktu Dobiega końca lipiec, a wciąż bez kontraktów na kolejny sezon pozostaje kilku zawodników o znanych nazwiskach. Najbardziej znany to oczywiście LeBron James, ale on akurat lada moment ma podpisać nową umowę z Cleveland Cavaliers.\n",
|
|
||||||
"1.660627841949463 1.0679386854171753 2650 2 tensor([[0.0997, 0.0816, 0.3437, 0.1018, 0.0988, 0.0842, 0.0996, 0.0905]],\n",
|
|
||||||
" grad_fn=<ExpBackward>) ATP Toronto: Tuzin zwycięstw nad Gaelem Monfilsem. Novak Djoković zmierza po kolejne trofeum Novak Djoković powalczy w niedzielę z Keiem Nishikorim o triumf w turnieju ATP World Tour Masters 1000 na kortach twardych w Toronto. W sobotnim półfinale Serb pewnie rozprawił się z Gaelem Monfilsem, zwyciężając Francuza 6:3, 6:2.\n",
|
|
||||||
"1.6437948942184448 1.9171419143676758 2700 6 tensor([[0.1121, 0.1250, 0.0957, 0.1340, 0.1204, 0.1323, 0.1470, 0.1335]],\n",
|
|
||||||
" grad_fn=<ExpBackward>) Joanna Wołosz: Mamy prawo do małego dołka Chemik Police poniósł trzecią ligową porażkę, tym razem ze zdecydowanie niżej notowanym Atomem Trefl Sopot. Kryzys mistrza Polski? Joanna Wołosz uspokaja zaniepokojonych kibiców.\n",
|
|
||||||
"1.6391946077346802 2.0095250606536865 2750 4 tensor([[0.1318, 0.1551, 0.1222, 0.1035, 0.1341, 0.1162, 0.1034, 0.1338]],\n",
|
|
||||||
" grad_fn=<ExpBackward>) Legenda MMA czuje się jak wrak człowieka. Przeszedł 22 operacje Po raz pierwszy trafił na stół operacyjny jako dziecko. Antonio Rodrigo Nogueira wpadł pod koła ciężarówki, walczył o życie. Później musiał poddawać się zabiegom po kontuzjach odniesionych na treningach i w walkach. - Jestem cały rozbity - przyznaje.\n",
|
|
||||||
"1.6332921981811523 1.2052042484283447 2800 2 tensor([[0.1205, 0.1072, 0.2996, 0.1031, 0.0922, 0.0857, 0.0875, 0.1042]],\n",
|
|
||||||
" grad_fn=<ExpBackward>) Ostatni sprawdzian Kamila Majchrzaka przed Rolandem Garrosem. Polak zagra w Niemczech Kamil Majchrzak weźmie udział w turnieju ATP Challenger Tour na kortach ziemnych w niemieckim Heilbronn. Dla Polaka będzie to ostatni sprawdzian przed eliminacjami do wielkoszlemowego Rolanda Garrosa 2018.\n",
|
|
||||||
"1.6281371116638184 1.3348133563995361 2850 7 tensor([[0.1050, 0.0828, 0.1172, 0.1081, 0.0920, 0.1131, 0.1186, 0.2632]],\n",
|
|
||||||
" grad_fn=<ExpBackward>) Real Sociedad - Atletico Madryt na żywo. Gdzie oglądać transmisję TV i stream online? W czwartek, w ramach Primera Division, odbędzie się spotkanie Real Sociedad - Atletico Madryt. Transmisja TV na antenie Eleven Sports 1. Stream online w WP Pilot. Relacja LIVE w WP SportoweFakty.\n",
|
|
||||||
"1.6222891807556152 1.3981242179870605 2900 1 tensor([[0.1193, 0.2471, 0.1043, 0.0929, 0.0984, 0.1156, 0.1038, 0.1185]],\n",
|
|
||||||
" grad_fn=<ExpBackward>) Williams został w tyle za rywalami. \"Nie odrobiliśmy swojej pracy domowej\" Problemy Williamsa w tym sezonie zdają się nie mieć końca. Paddy Lowe jest zdania, że na sytuację wpływa zacięta rywalizacja w Formule 1. - Obecnie każdy z zespołów funkcjonuje na bardzo wysokim poziomie - twierdzi Brytyjczyk.\n",
|
|
||||||
"1.6525822877883911 1.6196324825286865 2950 5 tensor([[0.1021, 0.1232, 0.0984, 0.1353, 0.1010, 0.1980, 0.1131, 0.1289]],\n",
|
|
||||||
" grad_fn=<ExpBackward>) Basket 90 Gdynia zamknął \"zagraniczną\" kadrę na nowy sezon Basket 90 Gdynia zakończył poszukiwania zawodniczek zagranicznych na sezon 2016/2017. Ostatnią koszykarką spoza granic naszego kraju, która związała się z ekipą z Trójmiasta, jest Litwinka Monika Grigalauskyte.\n",
|
|
||||||
"1.6379656791687012 1.4863052368164062 3000 3 tensor([[0.0881, 0.0816, 0.1089, 0.2262, 0.0698, 0.1202, 0.1658, 0.1394]],\n",
|
|
||||||
" grad_fn=<ExpBackward>) Liga Mistrzów: Paris Saint-Germain HB kolejnym uczestnikiem Final Four Paris Saint-Germain HB zremisował z MOL-Pickiem Szeged 30:30 w rewanżowym meczu ćwierćfinałowym Ligi Mistrzów 2016/2017, tym samym zdobywając awans do turnieju finałowego w Kolonii.\n",
|
|
||||||
"1.620102047920227 1.955077886581421 3050 5 tensor([[0.0998, 0.1599, 0.1024, 0.1031, 0.1239, 0.1416, 0.1172, 0.1520]],\n",
|
|
||||||
" grad_fn=<ExpBackward>) Chewbacca ma nową twarz. Jak koszykarz z Finlandii trafił do \"Gwiezdnych Wojen\" Zbliżający się weekend będzie tym, w którym miliony fanów \"Gwiezdnych Wojen\" zaczną szturmować kina, by obejrzeć 8. część sagi. Wielu z nich nie wie, że za maską Chewbakki od niedawna skrywa się nowa twarz - fińskiego koszykarza, Joonasa Suotamo.\n"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "stdout",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"1.6508986949920654 1.7872048616409302 3100 7 tensor([[0.1113, 0.1329, 0.0890, 0.1126, 0.1327, 0.1295, 0.1246, 0.1674]],\n",
|
|
||||||
" grad_fn=<ExpBackward>) Ireneusz Mamrot liczy na przełamanie. \"Jest sportowa złość, która musi się przełożyć na naszą korzyść\" - Nie ma zdenerwowania, ale jest duża sportowa złość. To musi się przełożyć na naszą korzyść - mówi przed sobotnim pojedynkiem z Koroną Kielce trener Jagiellonii Białystok, Ireneusz Mamrot. - Nie można wiecznie mieć gorszego okresu - dodaje.\n",
|
|
||||||
"1.5091105699539185 1.5536433458328247 3150 2 tensor([[0.1030, 0.1194, 0.2115, 0.1183, 0.1021, 0.1098, 0.1085, 0.1274]],\n",
|
|
||||||
" grad_fn=<ExpBackward>) Roland Garros: bogaty program gier na środę. Matkowski czeka na dokończenie meczu z braćmi Bryanami Przez ostatnie dwa dni tenisiści niemal nie rywalizowali na kortach Rolanda Garrosa. Plan gier na 11. dzień turnieju jest naprawdę bogaty.\n",
|
|
||||||
"1.64374577999115 1.6283594369888306 3200 4 tensor([[0.0947, 0.1278, 0.1324, 0.1141, 0.1963, 0.1059, 0.1183, 0.1106]],\n",
|
|
||||||
" grad_fn=<ExpBackward>) Deontaya Wildera czekają dwie operacje. Na ring wróci w 2017 roku Deontay Wilder bez problemów pokonał Chrisa Arreolę w Birmingham i obronił pas mistrza świata federacji WBC. Podczas pojedynku \"Brązowy Bombardier\" nabawił się jednak dwóch kontuzji, które na dłuższy okres wykluczą go z walk.\n",
|
|
||||||
"1.4641040563583374 1.66716468334198 3250 5 tensor([[0.0888, 0.0941, 0.0920, 0.1388, 0.1155, 0.1888, 0.1545, 0.1276]],\n",
|
|
||||||
" grad_fn=<ExpBackward>) Sfrustrowany Tweety Carter: Powinienem zatrzymać Irelanda W ramach 20. kolejki PLK PGE Turów przegrał na własnym parkiecie z Treflem Sopot 79:81. - To bardzo, bardzo frustrująca porażka - mówi Tweety Carter, który zawiódł w ostatniej minucie tego starcia.\n",
|
|
||||||
"1.6305592060089111 1.6195734739303589 3300 1 tensor([[0.1346, 0.1980, 0.1101, 0.1144, 0.1032, 0.1227, 0.1072, 0.1099]],\n",
|
|
||||||
" grad_fn=<ExpBackward>) Rajd Estonii: Kajetanowicz utrzymuje podium Polska załoga Kajetan Kajetanowicz - Jarosław Baran jest coraz bliżej sięgnięcia po kolejne podium w wyścigach ERC. Po pewne zwycięstwo w Rajdzie Estonii zmierza Aleksiej Łukjaniuk.\n",
|
|
||||||
"1.5722557306289673 1.681894302368164 3350 5 tensor([[0.0974, 0.1164, 0.1069, 0.1205, 0.0956, 0.1860, 0.1081, 0.1691]],\n",
|
|
||||||
" grad_fn=<ExpBackward>) Basket Poznań wrócił na właściwe tory? \"Do stacji końcowej jeszcze daleka droga\" I-ligowy Biofarm Basket Poznań jest projektem, który skupia wokół siebie wielu młodych graczy. W zespole możemy znaleźć także dwóch doświadczonych zawodników, a jednym z nich jest Tomasz Smorawiński.\n",
|
|
||||||
"1.513804316520691 1.353588581085205 3400 4 tensor([[0.1068, 0.1058, 0.1009, 0.1070, 0.2583, 0.1113, 0.1052, 0.1048]],\n",
|
|
||||||
" grad_fn=<ExpBackward>) Łukasz Rambalski i Wojciech Wierzbicki w karcie walk gali FEN 10 Do ciekawej walki dojdzie na gali FEN 10, 9 stycznia w Lubinie. Jeden z najbardziej utytułowanych polskich zawodników kickboxingu, Łukasz Rambalski, zmierzy się z aktualnym zawodowym mistrzem Europy organizacji WAKO-PRO, Wojciechem Wierzbickim.\n",
|
|
||||||
"1.549415111541748 1.5709004402160645 3450 5 tensor([[0.0968, 0.0769, 0.1763, 0.1564, 0.0748, 0.2079, 0.1181, 0.0927]],\n",
|
|
||||||
" grad_fn=<ExpBackward>) Świetny mecz Marcina Sroki. Sokół z Pogonią w finale turnieju w Prudniku Marcin Sroka zdobył 25 punktów i poprowadził Max Elektro Sokół Łańcut do zwycięstwa 95:69 nad BK NH Ostrava. W drugim półfinale międzynarodowego koszykarskiego turnieju w Prudniku gospodarze pokonali Jamalex Polonię 1912 Leszno.\n",
|
|
||||||
"1.5036178827285767 1.6746760606765747 3500 1 tensor([[0.1181, 0.1874, 0.1078, 0.1015, 0.1282, 0.1220, 0.1091, 0.1260]],\n",
|
|
||||||
" grad_fn=<ExpBackward>) Niki Lauda wspomina Jamesa Hunta. \"Jego cząstka żyje we mnie\" Przed laty rywalizacja Nikiego Laudy i Jamesa Hunta emocjonowała kibiców Formuły 1. Austriaka i Brytyjczyka, choć na torze byli rywalami, połączyła specjalna więź. - Jakaś cząstka Jamesa żyje teraz we mnie - mówi Lauda.\n",
|
|
||||||
"1.458483099937439 1.312494158744812 3550 1 tensor([[0.1023, 0.2691, 0.1057, 0.0845, 0.1266, 0.0953, 0.0950, 0.1214]],\n",
|
|
||||||
" grad_fn=<ExpBackward>) Eksperci o słowach Felipe Massy. \"Sam wykluczył się z walki o miejsce w Williamsie\" Felipe Massa w ostrych słowach skomentował możliwy powrót Roberta Kubicy do Formuły 1. - Wygląda na to, że Brazylijczyk sam wykluczył się z walki o miejsce w Williamsie - twierdzi Tiff Needell, były prowadzący \"Top Gear\".\n",
|
|
||||||
"1.6343040466308594 1.5306463241577148 3600 3 tensor([[0.1112, 0.0902, 0.0923, 0.2164, 0.0924, 0.1184, 0.1386, 0.1404]],\n",
|
|
||||||
" grad_fn=<ExpBackward>) AMŚ: pierwsza wygrana Biało-Czerwonych. Polska rozbiła Chińskie Tajpej Reprezentacja Polski piłkarzy ręcznych odniosła pierwsze zwycięstwo na Akademickich Mistrzostwach Świata. Drużyna Piotra Przybeckiego zgodnie z oczekiwaniami pokonała Chińskie Tajpej (35:20).\n",
|
|
||||||
"1.4308090209960938 0.6783717274665833 3650 2 tensor([[0.0740, 0.0568, 0.5074, 0.0749, 0.0623, 0.0691, 0.0896, 0.0659]],\n",
|
|
||||||
" grad_fn=<ExpBackward>) ATP Tokio: Kei Nishikori wygrał japońskie starcie. W II rundzie także Milos Raonić i Denis Shapovalov Faworyt gospodarzy Kei Nishikori w dwóch setach pokonał Yuichiego Sugitę w I rundzie rozgrywanego na kortach twardych w hali turnieju ATP World Tour 500 w Tokio. Do 1/8 finału awansowali też Kanadyjczycy - Milos Raonić i Denis Shapovalov.\n",
|
|
||||||
"1.523067831993103 1.5078588724136353 3700 2 tensor([[0.1061, 0.1269, 0.2214, 0.0988, 0.0970, 0.1362, 0.1111, 0.1025]],\n",
|
|
||||||
" grad_fn=<ExpBackward>) James Blake jak Tommy Haas czy Feliciano Lopez. Został dyrektorem turnieju w Miami Organizatorzy turnieju Miami Open ogłosili, że nowym dyrektorem tych kobiecych i męskich zawodów został były amerykański tenisista, James Blake. Tym samym potwierdziły się medialne doniesienia z ostatnich tygodni.\n",
|
|
||||||
"1.498782992362976 1.39162015914917 3750 7 tensor([[0.1257, 0.0678, 0.0980, 0.1364, 0.0640, 0.1027, 0.1567, 0.2487]],\n",
|
|
||||||
" grad_fn=<ExpBackward>) Mundial 2018. Historyczny wyczyn reprezentacji Rosji! Takiego pogromu w meczu otwarcia jeszcze nie było Na inaugurację MŚ 2018 Rosja pokonała Arabię Saudyjską aż 5:0 i ustanowiła nowy rekord mundialu - nigdy wcześniej w meczu otwarcia mistrzostw świata nie padł tak wysoki wynik.\n",
|
|
||||||
"1.494296669960022 1.5732029676437378 3800 3 tensor([[0.1084, 0.1078, 0.1186, 0.2074, 0.0942, 0.1287, 0.1335, 0.1015]],\n",
|
|
||||||
" grad_fn=<ExpBackward>) Wojciech Gumiński odnalazł się w Azotach. \"Start w nowym klubie bywa trudny\" Przeciętny początek rozgrywek i znacznie lepsza druga połowa sezonu. Wojciech Gumiński zaczyna spełniać oczekiwania w Azotach Puławy, stał się czołowym strzelcem brązowych medalistów PGNiG Superligi.\n",
|
|
||||||
"1.4970916509628296 1.9664472341537476 3850 2 tensor([[0.1420, 0.1164, 0.1400, 0.1578, 0.1105, 0.1187, 0.1133, 0.1014]],\n",
|
|
||||||
" grad_fn=<ExpBackward>) Rio 2016. To nie są igrzyska faworytów Tenisowe turnieje olimpijskie rządzą się swoimi prawami i wielkie niespodzianki są w nich na porządku dziennym, ale chyba mało kto przypuszczał, że w Rio de Janeiro dojdzie do aż tylu niespodziewanych rozstrzygnięć.\n",
|
|
||||||
"1.4386285543441772 1.7763383388519287 3900 5 tensor([[0.1042, 0.0926, 0.1017, 0.1574, 0.0956, 0.1693, 0.1595, 0.1197]],\n",
|
|
||||||
" grad_fn=<ExpBackward>) Niezawodny Klima, szalejący Obarek. Najlepsi gracze 18. kolejki I ligi Marcin Dymała oraz Maciej Klima to stali bywalce w naszym rankingu. Którzy zawodnicy znaleźli się jeszcze w najlepszej piątce 18. kolejki?\n",
|
|
||||||
"1.4847553968429565 1.4366589784622192 3950 4 tensor([[0.0837, 0.1276, 0.1146, 0.1064, 0.2377, 0.1054, 0.1089, 0.1156]],\n",
|
|
||||||
" grad_fn=<ExpBackward>) Czołowy brytyjski pięściarz zaproponował pojedynek Tomaszowi Adamkowi Tomasz Adamek otrzymał propozycję walki z Davidem Pricem. Jak poinformował portal worldboxingnews.net, obóz brytyjskiego pięściarza złożył \"Góralowi\" atrakcyjną ofertę.\n",
|
|
||||||
"1.4597876071929932 1.3940199613571167 4000 7 tensor([[0.0933, 0.1557, 0.0803, 0.0930, 0.1256, 0.1070, 0.0970, 0.2481]],\n",
|
|
||||||
" grad_fn=<ExpBackward>) Grzegorz Krychowiak na zakręcie. Mundial to ostatnia szansa Grzegorz Krychowiak znowu jest na zakręcie i musi szukać nowego klubu. Paris-Saint Germain chce się pozbyć Polaka na dobre. Mundial w Rosji to dla mistrzów Francji ostatnia szansa, żeby sprzedać go za godne pieniądze.\n",
|
|
||||||
"1.4579588174819946 1.5661852359771729 4050 6 tensor([[0.0991, 0.1113, 0.0903, 0.1400, 0.0902, 0.1380, 0.2088, 0.1223]],\n",
|
|
||||||
" grad_fn=<ExpBackward>) ZAKSA Kędzierzyn-Koźle trenuje już niemal w komplecie Na początku tygodnia do kędzierzyńskiej drużyny dołączyli zawodnicy, którzy brali udział w mistrzostwach Europy. Wyjątkiem jest francuski rozgrywający Benjamin Toniutti.\n"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "stdout",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"1.524263858795166 1.2569677829742432 4100 1 tensor([[0.0736, 0.2845, 0.0688, 0.0741, 0.1107, 0.1046, 0.1125, 0.1710]],\n",
|
|
||||||
" grad_fn=<ExpBackward>) Krzysztof Hołowczyc trzyma kciuki za Kubicę. \"Ci, którzy nie chcą jego powrotu, po prostu się go boją\" Trwa walka Roberta Kubicy o powrót do Formuły 1. Polak jest jednym z kandydatów do reprezentowania w przyszłym sezonie barw zespołu Williams. Za Kubicę kciuki trzyma Krzysztof Hołowczyc.\n",
|
|
||||||
"1.4493881464004517 1.4371377229690552 4150 1 tensor([[0.1067, 0.2376, 0.1001, 0.0918, 0.1164, 0.1187, 0.1077, 0.1211]],\n",
|
|
||||||
" grad_fn=<ExpBackward>) Honda znów ma problem z silnikiem. \"Musimy znaleźć główną przyczynę niepowodzeń\" Honda po raz kolejny ma problemy ze swoim silnikiem. Japończycy uważają jednak, że w przypadku Brendona Hartleya we Francji doszło do innej usterki niż w jednostce napędowej Pierre'a Gasly'ego w Kanadzie.\n",
|
|
||||||
"1.5475350618362427 1.0887583494186401 4200 2 tensor([[0.0726, 0.0993, 0.3366, 0.0964, 0.0821, 0.1156, 0.1075, 0.0898]],\n",
|
|
||||||
" grad_fn=<ExpBackward>) Jelena Janković marzy o drugim tygodniu Wimbledonu. We wtorek Serbka zagra z Agnieszką Radwańską Jelena Janković chciałaby dojść do drugiego tygodnia Wimbledonu 2017. Serbka będzie rywalką Agnieszki Radwańskiej w I rundzie.\n",
|
|
||||||
"1.4706852436065674 1.9419291019439697 4250 4 tensor([[0.1107, 0.1165, 0.1035, 0.1279, 0.1434, 0.1301, 0.1392, 0.1287]],\n",
|
|
||||||
" grad_fn=<ExpBackward>) Kontrowersyjny klub MMA w Chinach. Walczą 12-letnie dzieci W Enbo Fight Club trenuje nawet 400 młodych ludzi. Część z nich stanowią osierocone dzieci. Działalność klubu wywołuje spore emocje w Chinach.\n",
|
|
||||||
"1.4309391975402832 0.8164663314819336 4300 1 tensor([[0.1202, 0.4420, 0.0652, 0.0702, 0.0816, 0.0791, 0.0730, 0.0687]],\n",
|
|
||||||
" grad_fn=<ExpBackward>) MotoGP: Marc Marquez najlepszy w ostatnim treningu Marc Marquez wygrał ostatnią sesję treningową przed wyścigiem o Grand Prix Ameryk na torze w Austin. Kolejne pozycje zajęli Dani Pedrosa oraz Maverick Vinales.\n",
|
|
||||||
"1.421707034111023 1.5885950326919556 4350 3 tensor([[0.0740, 0.1260, 0.0926, 0.2042, 0.1067, 0.1426, 0.1267, 0.1271]],\n",
|
|
||||||
" grad_fn=<ExpBackward>) Rafał Przybylski odchodzi z Azotów Puławy. Kierunek - zagranica Rafał Przybylski po zakończeniu sezonu 2016/17 odejdzie z Azotów Puławy. 25-letni prawy rozgrywający wyjedzie z Polski. W jakim kierunku? Tego jeszcze nie wiadomo. Po Polaka zgłosiło się kilka klubów. Rozmowy trwają.\n",
|
|
||||||
"1.419579029083252 0.8860330581665039 4400 2 tensor([[0.0848, 0.0764, 0.4123, 0.0851, 0.0743, 0.0952, 0.0910, 0.0810]],\n",
|
|
||||||
" grad_fn=<ExpBackward>) WTA Lugano: Stefanie Voegele wygrała dwudniowy mecz z Magdaleną Fręch. Polka jeszcze bez zwycięstwa w tourze Stefanie Vögele okazała się w dwóch setach lepsza od Magdaleny Fręch w I rundzie turnieju WTA International na kortach ziemnych w Lugano. Polska tenisistka musi jeszcze poczekać na premierową wygraną w głównym cyklu.\n",
|
|
||||||
"1.3413941860198975 1.3380266427993774 4450 0 tensor([[0.2624, 0.1457, 0.0719, 0.1039, 0.0734, 0.0958, 0.1211, 0.1259]],\n",
|
|
||||||
" grad_fn=<ExpBackward>) MŚ w Lahti: Niemcy na czele klasyfikacji medalowej. To był ich piątek marzeń Niemcy znajdą się na prowadzeniu w klasyfikacji medalowej mistrzostw świata w Lahti po trzech dniach imprezy. W piątkowych konkurencjach reprezentanci tego kraju byli zdecydowanie najlepsi.\n",
|
|
||||||
"1.4630906581878662 1.8144276142120361 4500 0 tensor([[0.1629, 0.1532, 0.0742, 0.1222, 0.0932, 0.1240, 0.1055, 0.1647]],\n",
|
|
||||||
" grad_fn=<ExpBackward>) Pjongczang 2018. Austriacy zabiorą nam Horngachera? Trzeba jak najszybciej podpisać kontrakt Stefan Horngacher, jeden z autorów trzeciego złota Kamila Stocha, nie ma jeszcze podpisanego nowego kontraktu. PZN powinien uczynić to jak najszybciej, by sprzed nosa świetnego trenera nie zabrali polskiej kadrze Austriacy.\n",
|
|
||||||
"1.4926103353500366 0.9573735594749451 4550 1 tensor([[0.0927, 0.3839, 0.0863, 0.0762, 0.1012, 0.0837, 0.0782, 0.0977]],\n",
|
|
||||||
" grad_fn=<ExpBackward>) Wewnętrzna rywalizacja w Red Bull Racing. \"Powinienem wiedzieć, gdzie jest limit\" Wewnętrzna rywalizacja Daniela Ricciardo z Maxem Verstappenem korzystnie wpływa na formę kierowców Red Bull Racing. Australijczyk zdradził jednak, że w niektórych wyścigach przesadził z jazdą na limicie. - Zawsze byłem przed nim - odpowiada Holender.\n",
|
|
||||||
"1.493160367012024 1.1073386669158936 4600 6 tensor([[0.0816, 0.0615, 0.0737, 0.1037, 0.0659, 0.1300, 0.3304, 0.1531]],\n",
|
|
||||||
" grad_fn=<ExpBackward>) Puchar Polski: Skra Bełchatów - Asseco Resovia na żywo. Gdzie oglądać transmisję? W środę, w ramach Pucharu Polski w siatkówce odbędzie się mecz PGE Skra Bełchatów - Asseco Resovia Rzeszów. Tranmisja TV na antenie Polsat Sport. Stream online w Ipla TV. Relacja LIVE w WP SportoweFakty.\n",
|
|
||||||
"1.4486230611801147 1.1242272853851318 4650 1 tensor([[0.0861, 0.3249, 0.0828, 0.1040, 0.0873, 0.0995, 0.1066, 0.1088]],\n",
|
|
||||||
" grad_fn=<ExpBackward>) Mercedes z czwartym tytułem z rzędu jako czwarty zespół w historii Mercedes dzięki wygranej Lewisa Hamiltona w Austin zapewnił sobie kolejny z rzędu tytuł mistrzowski, który klasyfikuje niemiecki zespół wśród największych stajni w historii Formuły 1.\n",
|
|
||||||
"1.4269776344299316 1.2119630575180054 4700 6 tensor([[0.0843, 0.0775, 0.0978, 0.1276, 0.0649, 0.1554, 0.2976, 0.0949]],\n",
|
|
||||||
" grad_fn=<ExpBackward>) PlusLiga: ta ostatnia, decydująca niedziela W niedzielę zostanie rozegrana ostatnia kolejka PlusLigi. Poznamy w niej odpowiedzi na pytania, które drużyny zagrają o medale, a która pożegna się z rozgrywkami. Czy Indykpol AZS, Asseco Resovia i Jastrzębski Węgiel wykorzystają potknięcie ONICO?\n",
|
|
||||||
"1.4327268600463867 0.915303111076355 4750 0 tensor([[0.4004, 0.0969, 0.0800, 0.0819, 0.0717, 0.0826, 0.0907, 0.0959]],\n",
|
|
||||||
" grad_fn=<ExpBackward>) Hula, Żyła i Kubacki szczęśliwi po MŚ w lotach. Już szykują się na konkurs drużynowy Polscy skoczkowie byli w bardzo dobrych nastrojach po indywidualnych MŚ w lotach narciarskich. Teraz wszyscy są myślami przy niedzielnej rywalizacji drużynowej.\n",
|
|
||||||
"1.4646613597869873 1.0098087787628174 4800 1 tensor([[0.1289, 0.3643, 0.0871, 0.0714, 0.0703, 0.0957, 0.0766, 0.1056]],\n",
|
|
||||||
" grad_fn=<ExpBackward>) Maverick Vinales myśli tylko o wygranej. \"Walka o podium mi nie wystarczy\" Upadek w Grand Prix Holandii sprawił, że Maverick Vinales stracił prowadzenie w klasyfikacji generalnej MotoGP. Hiszpan ma teraz jeden cel. Wygrać wyścig na Sachsenringu i wrócić na szczyt tabeli.\n",
|
|
||||||
"1.4902805089950562 1.4387915134429932 4850 5 tensor([[0.0798, 0.1133, 0.1002, 0.1091, 0.1097, 0.2372, 0.1071, 0.1435]],\n",
|
|
||||||
" grad_fn=<ExpBackward>) Zagrał na własne życzenie i... doznał kontuzji. Co dalej z Markiem Carterem? Marc Carter za wszelką cenę chciał pomóc drużynie w meczu z PGE Turowem Zgorzelec (93:78). Niestety Amerykanin swój występ przepłacił kontuzją ścięgna Achillesa. Na razie nie wiadomo, jak poważny jest uraz jednego z czołowych zawodników BM Slam Stal.\n",
|
|
||||||
"1.4317628145217896 1.2453986406326294 4900 7 tensor([[0.0937, 0.0673, 0.0873, 0.1039, 0.0762, 0.1123, 0.1714, 0.2878]],\n",
|
|
||||||
" grad_fn=<ExpBackward>) Anglia - Nigeria na żywo. Transmisja TV, stream online W sobotę, w ramach meczu towarzyskiego, odbędzie się starcie Anglia - Nigeria. Transmisja TV na antenie Eleven Sports 1. Stream online w WP Pilot. Relacja LIVE w WP SportoweFakty.\n",
|
|
||||||
"1.3499293327331543 1.4718239307403564 4950 5 tensor([[0.1089, 0.0784, 0.1235, 0.1255, 0.0868, 0.2295, 0.1285, 0.1189]],\n",
|
"1.3499293327331543 1.4718239307403564 4950 5 tensor([[0.1089, 0.0784, 0.1235, 0.1255, 0.0868, 0.2295, 0.1285, 0.1189]],\n",
|
||||||
" grad_fn=<ExpBackward>) Liga Letnia NBA: Zespół Ponitki w ćwierćfinale, 4 punkty Polaka Mateusz Ponitka zdobył cztery punkty dla Denver Nuggets, którzy pokonali Utah Jazz 80:60 i awansowali do ćwierćfinału Ligi Letniej NBA w Las Vegas.\n"
|
" grad_fn=<ExpBackward>) Liga Letnia NBA: [...].\n"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
@ -2226,11 +2020,14 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
|
"author": "Filip Graliński",
|
||||||
|
"email": "filipg@amu.edu.pl",
|
||||||
"kernelspec": {
|
"kernelspec": {
|
||||||
"display_name": "Python 3",
|
"display_name": "Python 3 (ipykernel)",
|
||||||
"language": "python",
|
"language": "python",
|
||||||
"name": "python3"
|
"name": "python3"
|
||||||
},
|
},
|
||||||
|
"lang": "pl",
|
||||||
"language_info": {
|
"language_info": {
|
||||||
"codemirror_mode": {
|
"codemirror_mode": {
|
||||||
"name": "ipython",
|
"name": "ipython",
|
||||||
@ -2241,10 +2038,13 @@
|
|||||||
"name": "python",
|
"name": "python",
|
||||||
"nbconvert_exporter": "python",
|
"nbconvert_exporter": "python",
|
||||||
"pygments_lexer": "ipython3",
|
"pygments_lexer": "ipython3",
|
||||||
"version": "3.9.2"
|
"version": "3.9.6"
|
||||||
},
|
},
|
||||||
"org": null
|
"org": null,
|
||||||
|
"subtitle": "9.Przegląd składowych sieci neuronowych[wykład]",
|
||||||
|
"title": "Ekstrakcja informacji",
|
||||||
|
"year": "2021"
|
||||||
},
|
},
|
||||||
"nbformat": 4,
|
"nbformat": 4,
|
||||||
"nbformat_minor": 1
|
"nbformat_minor": 4
|
||||||
}
|
}
|
||||||
|
@ -1,5 +1,19 @@
|
|||||||
{
|
{
|
||||||
"cells": [
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"![Logo 1](https://git.wmi.amu.edu.pl/AITech/Szablon/raw/branch/master/Logotyp_AITech1.jpg)\n",
|
||||||
|
"<div class=\"alert alert-block alert-info\">\n",
|
||||||
|
"<h1> Ekstrakcja informacji </h1>\n",
|
||||||
|
"<h2> 11. <i>Sieci rekurencyjne</i> [wykład]</h2> \n",
|
||||||
|
"<h3> Filip Graliński (2021)</h3>\n",
|
||||||
|
"</div>\n",
|
||||||
|
"\n",
|
||||||
|
"![Logo 2](https://git.wmi.amu.edu.pl/AITech/Szablon/raw/branch/master/Logotyp_AITech2.jpg)"
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
@ -498,11 +512,14 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
|
"author": "Filip Graliński",
|
||||||
|
"email": "filipg@amu.edu.pl",
|
||||||
"kernelspec": {
|
"kernelspec": {
|
||||||
"display_name": "Python 3",
|
"display_name": "Python 3 (ipykernel)",
|
||||||
"language": "python",
|
"language": "python",
|
||||||
"name": "python3"
|
"name": "python3"
|
||||||
},
|
},
|
||||||
|
"lang": "pl",
|
||||||
"language_info": {
|
"language_info": {
|
||||||
"codemirror_mode": {
|
"codemirror_mode": {
|
||||||
"name": "ipython",
|
"name": "ipython",
|
||||||
@ -513,9 +530,12 @@
|
|||||||
"name": "python",
|
"name": "python",
|
||||||
"nbconvert_exporter": "python",
|
"nbconvert_exporter": "python",
|
||||||
"pygments_lexer": "ipython3",
|
"pygments_lexer": "ipython3",
|
||||||
"version": "3.9.2"
|
"version": "3.9.6"
|
||||||
},
|
},
|
||||||
"org": null
|
"org": null,
|
||||||
|
"subtitle": "11.Sieci rekurencyjne[wykład]",
|
||||||
|
"title": "Ekstrakcja informacji",
|
||||||
|
"year": "2021"
|
||||||
},
|
},
|
||||||
"nbformat": 4,
|
"nbformat": 4,
|
||||||
"nbformat_minor": 4
|
"nbformat_minor": 4
|
||||||
|
@ -1,5 +1,19 @@
|
|||||||
{
|
{
|
||||||
"cells": [
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"![Logo 1](https://git.wmi.amu.edu.pl/AITech/Szablon/raw/branch/master/Logotyp_AITech1.jpg)\n",
|
||||||
|
"<div class=\"alert alert-block alert-info\">\n",
|
||||||
|
"<h1> Ekstrakcja informacji </h1>\n",
|
||||||
|
"<h2> 12. <i>Kodowanie BPE</i> [wykład]</h2> \n",
|
||||||
|
"<h3> Filip Graliński (2021)</h3>\n",
|
||||||
|
"</div>\n",
|
||||||
|
"\n",
|
||||||
|
"![Logo 2](https://git.wmi.amu.edu.pl/AITech/Szablon/raw/branch/master/Logotyp_AITech2.jpg)"
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
@ -797,12 +811,12 @@
|
|||||||
"name": "stdout",
|
"name": "stdout",
|
||||||
"output_type": "stream",
|
"output_type": "stream",
|
||||||
"text": [
|
"text": [
|
||||||
"Cier@@ piałem na straszne la@@ gi kilkanaście sekund lub dłużej czarnego ekranu przy próbie przełą@@ czenia się uruchomienia prawie każdej aplikacji Dodatkowo telefon mi się wyłą@@ czał czasem bez powodu sam z siebie albo rese@@ tował Ostatnio nawet przeglądarka zaczęła się często zawie@@ szać i Android proponował wymu@@ szone zamknięcie Do tego te problemy z połączeniem do komputera przez USB "
|
"Cier@@ piałem na straszne la@@ gi [...]"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"! echo 'Cierpiałem na straszne lagi – kilkanaście sekund lub dłużej czarnego ekranu przy próbie przełączenia się / uruchomienia prawie każdej aplikacji. Dodatkowo telefon mi się wyłączał czasem bez powodu – sam z siebie, albo resetował. Ostatnio nawet przeglądarka zaczęła się często zawieszać i Android proponował wymuszone zamknięcie. Do tego te problemy z połączeniem do komputera przez USB.' | perl -C -ne 'print \"$& \" while/\\p{L}+/g;' | python -m subword_nmt.apply_bpe -c bpe_vocab.txt"
|
"! echo 'Cierpiałem na straszne lagi [...]' | perl -C -ne 'print \"$& \" while/\\p{L}+/g;' | python -m subword_nmt.apply_bpe -c bpe_vocab.txt"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -815,11 +829,14 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
|
"author": "Filip Graliński",
|
||||||
|
"email": "filipg@amu.edu.pl",
|
||||||
"kernelspec": {
|
"kernelspec": {
|
||||||
"display_name": "Python 3",
|
"display_name": "Python 3 (ipykernel)",
|
||||||
"language": "python",
|
"language": "python",
|
||||||
"name": "python3"
|
"name": "python3"
|
||||||
},
|
},
|
||||||
|
"lang": "pl",
|
||||||
"language_info": {
|
"language_info": {
|
||||||
"codemirror_mode": {
|
"codemirror_mode": {
|
||||||
"name": "ipython",
|
"name": "ipython",
|
||||||
@ -830,9 +847,12 @@
|
|||||||
"name": "python",
|
"name": "python",
|
||||||
"nbconvert_exporter": "python",
|
"nbconvert_exporter": "python",
|
||||||
"pygments_lexer": "ipython3",
|
"pygments_lexer": "ipython3",
|
||||||
"version": "3.9.2"
|
"version": "3.9.6"
|
||||||
},
|
},
|
||||||
"org": null
|
"org": null,
|
||||||
|
"subtitle": "12.Kodowanie BPE[wykład]",
|
||||||
|
"title": "Ekstrakcja informacji",
|
||||||
|
"year": "2021"
|
||||||
},
|
},
|
||||||
"nbformat": 4,
|
"nbformat": 4,
|
||||||
"nbformat_minor": 4
|
"nbformat_minor": 4
|
||||||
|
@ -1,5 +1,19 @@
|
|||||||
{
|
{
|
||||||
"cells": [
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"![Logo 1](https://git.wmi.amu.edu.pl/AITech/Szablon/raw/branch/master/Logotyp_AITech1.jpg)\n",
|
||||||
|
"<div class=\"alert alert-block alert-info\">\n",
|
||||||
|
"<h1> Ekstrakcja informacji </h1>\n",
|
||||||
|
"<h2> 13. <i>Podejście generatywne w ekstrakcji informacji</i> [wykład]</h2> \n",
|
||||||
|
"<h3> Filip Graliński (2021)</h3>\n",
|
||||||
|
"</div>\n",
|
||||||
|
"\n",
|
||||||
|
"![Logo 2](https://git.wmi.amu.edu.pl/AITech/Szablon/raw/branch/master/Logotyp_AITech2.jpg)"
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
@ -89,11 +103,14 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
|
"author": "Filip Graliński",
|
||||||
|
"email": "filipg@amu.edu.pl",
|
||||||
"kernelspec": {
|
"kernelspec": {
|
||||||
"display_name": "Python 3",
|
"display_name": "Python 3 (ipykernel)",
|
||||||
"language": "python",
|
"language": "python",
|
||||||
"name": "python3"
|
"name": "python3"
|
||||||
},
|
},
|
||||||
|
"lang": "pl",
|
||||||
"language_info": {
|
"language_info": {
|
||||||
"codemirror_mode": {
|
"codemirror_mode": {
|
||||||
"name": "ipython",
|
"name": "ipython",
|
||||||
@ -104,10 +121,13 @@
|
|||||||
"name": "python",
|
"name": "python",
|
||||||
"nbconvert_exporter": "python",
|
"nbconvert_exporter": "python",
|
||||||
"pygments_lexer": "ipython3",
|
"pygments_lexer": "ipython3",
|
||||||
"version": "3.9.2"
|
"version": "3.9.6"
|
||||||
},
|
},
|
||||||
"org": null
|
"org": null,
|
||||||
|
"subtitle": "13.Podejście generatywne w ekstrakcji informacji[wykład]",
|
||||||
|
"title": "Ekstrakcja informacji",
|
||||||
|
"year": "2021"
|
||||||
},
|
},
|
||||||
"nbformat": 4,
|
"nbformat": 4,
|
||||||
"nbformat_minor": 1
|
"nbformat_minor": 4
|
||||||
}
|
}
|
||||||
|
@ -1,338 +1,391 @@
|
|||||||
{
|
{
|
||||||
"cells": [
|
"cells": [
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {
|
||||||
"source": [
|
"collapsed": false
|
||||||
"## Pretrenowanie modeli\n",
|
},
|
||||||
"\n"
|
"source": [
|
||||||
]
|
"![Logo 1](https://git.wmi.amu.edu.pl/AITech/Szablon/raw/branch/master/Logotyp_AITech1.jpg)\n",
|
||||||
},
|
"<div class=\"alert alert-block alert-info\">\n",
|
||||||
{
|
"<h1> Ekstrakcja informacji </h1>\n",
|
||||||
"cell_type": "markdown",
|
"<h2> 14. <i>Pretrenowane modele j\u0119zyka</i> [wyk\u0142ad]</h2> \n",
|
||||||
"metadata": {},
|
"<h3> Filip Grali\u0144ski (2021)</h3>\n",
|
||||||
"source": [
|
"</div>\n",
|
||||||
"System AlphaZero uczy się grając sam ze sobą — wystarczy 24 godziny,\n",
|
"\n",
|
||||||
"by system nauczył się grać w szachy lub go na nadludzkim poziomie.\n",
|
"![Logo 2](https://git.wmi.amu.edu.pl/AITech/Szablon/raw/branch/master/Logotyp_AITech2.jpg)"
|
||||||
"\n",
|
]
|
||||||
"**Pytanie**: Dlaczego granie samemu ze sobą nie jest dobrym sposobem\n",
|
},
|
||||||
" nauczenia się grania w szachy dla człowieka, a dla maszyny jest?\n",
|
{
|
||||||
"\n",
|
"cell_type": "markdown",
|
||||||
"Co jest odpowiednikiem grania samemu ze sobą w świecie przetwarzania tekstu?\n",
|
"metadata": {},
|
||||||
"Tzn. **pretrenowanie** (*pretraining*) na dużym korpusie tekstu. (Tekst jest tani!)\n",
|
"source": [
|
||||||
"\n",
|
"## Pretrenowanie modeli\n",
|
||||||
"Jest kilka sposobów na pretrenowanie modelu, w każdym razie sprowadza\n",
|
"\n"
|
||||||
"się do odgadywania następnego bądź zamaskowanego słowa.\n",
|
]
|
||||||
"W każdym razie zawsze stosujemy softmax (być może ze „sztuczkami” takimi jak\n",
|
},
|
||||||
"negatywne próbkowanie albo hierarchiczny softamx) na pewnej **representecji kontekstowej**:\n",
|
{
|
||||||
"\n",
|
"cell_type": "markdown",
|
||||||
"$$\\vec{p} = \\operatorname{softmax}(f(\\vec{c})).$$\n",
|
"metadata": {},
|
||||||
"\n",
|
"source": [
|
||||||
"Model jest karany używając funkcji log loss:\n",
|
"System AlphaZero uczy si\u0119 graj\u0105c sam ze sob\u0105 \u2014 wystarczy 24 godziny,\n",
|
||||||
"\n",
|
"by system nauczy\u0142 si\u0119 gra\u0107 w szachy lub go na nadludzkim poziomie.\n",
|
||||||
"$$-\\log(p_j),$$\n",
|
"\n",
|
||||||
"\n",
|
"**Pytanie**: Dlaczego granie samemu ze sob\u0105 nie jest dobrym sposobem\n",
|
||||||
"gdzie $w_j$ jest wyrazem, który pojawił się rzeczywiście w korpusie.\n",
|
" nauczenia si\u0119 grania w szachy dla cz\u0142owieka, a dla maszyny jest?\n",
|
||||||
"\n"
|
"\n",
|
||||||
]
|
"Co jest odpowiednikiem grania samemu ze sob\u0105 w \u015bwiecie przetwarzania tekstu?\n",
|
||||||
},
|
"Tzn. **pretrenowanie** (*pretraining*) na du\u017cym korpusie tekstu. (Tekst jest tani!)\n",
|
||||||
{
|
"\n",
|
||||||
"cell_type": "markdown",
|
"Jest kilka sposob\u00f3w na pretrenowanie modelu, w ka\u017cdym razie sprowadza\n",
|
||||||
"metadata": {},
|
"si\u0119 do odgadywania nast\u0119pnego b\u0105d\u017a zamaskowanego s\u0142owa.\n",
|
||||||
"source": [
|
"W ka\u017cdym razie zawsze stosujemy softmax (by\u0107 mo\u017ce ze \u201esztuczkami\u201d takimi jak\n",
|
||||||
"### Przewidywanie słowa (GPT-2)\n",
|
"negatywne pr\u00f3bkowanie albo hierarchiczny softmax) na pewnej **reprezentacji kontekstowej**:\n",
|
||||||
"\n"
|
"\n",
|
||||||
]
|
"$$\\vec{p} = \\operatorname{softmax}(f(\\vec{c})).$$\n",
|
||||||
},
|
"\n",
|
||||||
{
|
"Model jest karany u\u017cywaj\u0105c funkcji log loss:\n",
|
||||||
"cell_type": "markdown",
|
"\n",
|
||||||
"metadata": {},
|
"$$-\\log(p_j),$$\n",
|
||||||
"source": [
|
"\n",
|
||||||
"Jeden ze sposobów pretrenowania modelu to po prostu przewidywanie\n",
|
"gdzie $w_j$ jest wyrazem, kt\u00f3ry pojawi\u0142 si\u0119 rzeczywi\u015bcie w korpusie.\n",
|
||||||
"następnego słowa.\n",
|
"\n"
|
||||||
"\n",
|
]
|
||||||
"Zainstalujmy najpierw bibliotekę transformers.\n",
|
},
|
||||||
"\n"
|
{
|
||||||
]
|
"cell_type": "markdown",
|
||||||
},
|
"metadata": {},
|
||||||
{
|
"source": [
|
||||||
"cell_type": "code",
|
"### Przewidywanie s\u0142owa (GPT-2)\n",
|
||||||
"execution_count": 1,
|
"\n"
|
||||||
"metadata": {},
|
]
|
||||||
"outputs": [],
|
},
|
||||||
"source": [
|
{
|
||||||
"! pip install transformers"
|
"cell_type": "markdown",
|
||||||
]
|
"metadata": {},
|
||||||
},
|
"source": [
|
||||||
{
|
"Jeden ze sposob\u00f3w pretrenowania modelu to po prostu przewidywanie\n",
|
||||||
"cell_type": "code",
|
"nast\u0119pnego s\u0142owa.\n",
|
||||||
"execution_count": 5,
|
"\n",
|
||||||
"metadata": {},
|
"Zainstalujmy najpierw bibliotek\u0119 transformers.\n",
|
||||||
"outputs": [
|
"\n"
|
||||||
{
|
]
|
||||||
"name": "stdout",
|
},
|
||||||
"output_type": "stream",
|
{
|
||||||
"text": [
|
"cell_type": "code",
|
||||||
"50257\n"
|
"execution_count": 1,
|
||||||
]
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"! pip install transformers"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 17,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"50257\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"[('\u00c2\u0142', 0.6182783842086792),\n",
|
||||||
|
" ('\u00c8', 0.1154019758105278),\n",
|
||||||
|
" ('\u00d1\u0123', 0.026960616931319237),\n",
|
||||||
|
" ('_____', 0.024418892338871956),\n",
|
||||||
|
" ('________', 0.014962316490709782),\n",
|
||||||
|
" ('\u00c3\u0124', 0.010653386823832989),\n",
|
||||||
|
" ('\u00e4\u00b8\u0143', 0.008340531960129738),\n",
|
||||||
|
" ('\u00d1', 0.007557711564004421),\n",
|
||||||
|
" ('\u00ca', 0.007046067621558905),\n",
|
||||||
|
" ('\u00e3\u0122', 0.006875576451420784),\n",
|
||||||
|
" ('ile', 0.006685272324830294),\n",
|
||||||
|
" ('____', 0.006307446397840977),\n",
|
||||||
|
" ('\u00e2\u0122\u012d', 0.006306538358330727),\n",
|
||||||
|
" ('\u00d1\u0122', 0.006197483278810978),\n",
|
||||||
|
" ('\u0120Belarus', 0.006108700763434172),\n",
|
||||||
|
" ('\u00c6', 0.005720408633351326),\n",
|
||||||
|
" ('\u0120Poland', 0.0053678699769079685),\n",
|
||||||
|
" ('\u00e1\u00b9', 0.004606408067047596),\n",
|
||||||
|
" ('\u00ee\u0122', 0.004161055199801922),\n",
|
||||||
|
" ('????', 0.004056799225509167),\n",
|
||||||
|
" ('_______', 0.0038176667876541615),\n",
|
||||||
|
" ('\u00e4\u00b8', 0.0036082742735743523),\n",
|
||||||
|
" ('\u00cc', 0.003221835708245635),\n",
|
||||||
|
" ('urs', 0.003080119378864765),\n",
|
||||||
|
" ('________________', 0.0027312245219945908),\n",
|
||||||
|
" ('\u0120Lithuania', 0.0023860156070441008),\n",
|
||||||
|
" ('ich', 0.0021211160346865654),\n",
|
||||||
|
" ('iz', 0.002069818088784814),\n",
|
||||||
|
" ('vern', 0.002001357264816761),\n",
|
||||||
|
" ('\u00c5\u0124', 0.001717406208626926)]"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 17,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"import torch\n",
|
||||||
|
"from transformers import GPT2Tokenizer, GPT2LMHeadModel\n",
|
||||||
|
"tokenizer = GPT2Tokenizer.from_pretrained('gpt2-large')\n",
|
||||||
|
"model = GPT2LMHeadModel.from_pretrained('gpt2-large')\n",
|
||||||
|
"text = 'Warsaw is the capital city of'\n",
|
||||||
|
"encoded_input = tokenizer(text, return_tensors='pt')\n",
|
||||||
|
"output = model(**encoded_input)\n",
|
||||||
|
"next_token_probs = torch.softmax(output[0][:, -1, :][0], dim=0)\n",
|
||||||
|
"\n",
|
||||||
|
"nb_of_tokens = next_token_probs.size()[0]\n",
|
||||||
|
"print(nb_of_tokens)\n",
|
||||||
|
"\n",
|
||||||
|
"_, top_k_indices = torch.topk(next_token_probs, 30, sorted=True)\n",
|
||||||
|
"\n",
|
||||||
|
"words = tokenizer.convert_ids_to_tokens(top_k_indices)\n",
|
||||||
|
"\n",
|
||||||
|
"top_probs = []\n",
|
||||||
|
"\n",
|
||||||
|
"for ix in range(len(top_k_indices)):\n",
|
||||||
|
" top_probs.append((words[ix], next_token_probs[top_k_indices[ix]].item()))\n",
|
||||||
|
"\n",
|
||||||
|
"top_probs"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Zalety tego podej\u015bcia:\n",
|
||||||
|
"\n",
|
||||||
|
"- prostota,\n",
|
||||||
|
"- dobra podstawa do strojenia system\u00f3w generowania tekstu zw\u0142aszcza\n",
|
||||||
|
" \u201eotwartego\u201d (systemy dialogowe, generowanie (fake) news\u00f3w, streszczanie tekstu),\n",
|
||||||
|
" ale niekoniecznie t\u0142umaczenia maszynowego,\n",
|
||||||
|
"- zaskakuj\u0105ca skuteczno\u015b\u0107 przy uczeniu *few-shot* i *zero-shot*.\n",
|
||||||
|
"\n",
|
||||||
|
"Wady:\n",
|
||||||
|
"\n",
|
||||||
|
"- asymetryczno\u015b\u0107, przetwarzanie tylko z lewej do prawej, preferencja\n",
|
||||||
|
" dla lewego kontekstu,\n",
|
||||||
|
"- mniejsza skuteczno\u015b\u0107 przy dostrajaniu do zada\u0144 klasyfikacji i innych zada\u0144\n",
|
||||||
|
" niepolegaj\u0105cych na prostym generowaniu.\n",
|
||||||
|
"\n",
|
||||||
|
"Przyk\u0142ady modeli: GPT, GPT-2, GPT-3, DialoGPT.\n",
|
||||||
|
"\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Maskowanie s\u0142\u00f3w (BERT)\n",
|
||||||
|
"\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Inn\u0105 metod\u0105 jest maskowanie s\u0142\u00f3w (*Masked Language Modeling*, *MLM*).\n",
|
||||||
|
"\n",
|
||||||
|
"W tym podej\u015bciu losowe wybrane zast\u0119pujemy losowe s\u0142owa specjalnym\n",
|
||||||
|
"tokenem (`[MASK]`) i ka\u017cemy modelowi odgadywa\u0107 w ten spos\u00f3b\n",
|
||||||
|
"zamaskowane s\u0142owa (z uwzgl\u0119dnieniem r\u00f3wnie\u017c prawego kontekstu!).\n",
|
||||||
|
"\n",
|
||||||
|
"M\u00f3ci\u0105c \u015bci\u015ble, w jednym z pierwszych modeli tego typu (BERT)\n",
|
||||||
|
"zastosowano schemat, w kt\u00f3rym r\u00f3wnie\u017c niezamaskowane s\u0142owa s\u0105 odgadywane (!):\n",
|
||||||
|
"\n",
|
||||||
|
"- wybieramy losowe 15% wyraz\u00f3w do odgadni\u0119cia\n",
|
||||||
|
"- 80% z nich zast\u0119pujemy tokenem `[MASK]`,\n",
|
||||||
|
"- 10% zast\u0119pujemy innym losowym wyrazem,\n",
|
||||||
|
"- 10% pozostawiamy bez zmian.\n",
|
||||||
|
"\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 1,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stderr",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"/home/filipg/.local/lib/python3.9/site-packages/transformers/models/auto/modeling_auto.py:806: FutureWarning: The class `AutoModelWithLMHead` is deprecated and will be removed in a future version. Please use `AutoModelForCausalLM` for causal language models, `AutoModelForMaskedLM` for masked language models and `AutoModelForSeq2SeqLM` for encoder-decoder models.\n",
|
||||||
|
" warnings.warn(\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"W kt\u00f3rym pa\u0144stwie le\u017cy Bombaj? W USA. (score: 0.16715531051158905)\n",
|
||||||
|
"W kt\u00f3rym pa\u0144stwie le\u017cy Bombaj? W India. (score: 0.09912960231304169)\n",
|
||||||
|
"W kt\u00f3rym pa\u0144stwie le\u017cy Bombaj? W Indian. (score: 0.039642028510570526)\n",
|
||||||
|
"W kt\u00f3rym pa\u0144stwie le\u017cy Bombaj? W Nepal. (score: 0.027137665078043938)\n",
|
||||||
|
"W kt\u00f3rym pa\u0144stwie le\u017cy Bombaj? W Pakistan. (score: 0.027065709233283997)\n",
|
||||||
|
"W kt\u00f3rym pa\u0144stwie le\u017cy Bombaj? W Polsce. (score: 0.023737527430057526)\n",
|
||||||
|
"W kt\u00f3rym pa\u0144stwie le\u017cy Bombaj? W .... (score: 0.02306722290813923)\n",
|
||||||
|
"W kt\u00f3rym pa\u0144stwie le\u017cy Bombaj? W Bangladesh. (score: 0.022106658667325974)\n",
|
||||||
|
"W kt\u00f3rym pa\u0144stwie le\u017cy Bombaj? W .... (score: 0.01628892682492733)\n",
|
||||||
|
"W kt\u00f3rym pa\u0144stwie le\u017cy Bombaj? W Niemczech. (score: 0.014501162804663181)\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"from transformers import AutoModelWithLMHead, AutoTokenizer\n",
|
||||||
|
"import torch\n",
|
||||||
|
"\n",
|
||||||
|
"tokenizer = AutoTokenizer.from_pretrained(\"xlm-roberta-large\")\n",
|
||||||
|
"model = AutoModelWithLMHead.from_pretrained(\"xlm-roberta-large\")\n",
|
||||||
|
"\n",
|
||||||
|
"sequence = f'W kt\u00f3rym pa\u0144stwie le\u017cy Bombaj? W {tokenizer.mask_token}.'\n",
|
||||||
|
"\n",
|
||||||
|
"input_ids = tokenizer.encode(sequence, return_tensors=\"pt\")\n",
|
||||||
|
"mask_token_index = torch.where(input_ids == tokenizer.mask_token_id)[1]\n",
|
||||||
|
"\n",
|
||||||
|
"token_logits = model(input_ids)[0]\n",
|
||||||
|
"mask_token_logits = token_logits[0, mask_token_index, :]\n",
|
||||||
|
"mask_token_logits = torch.softmax(mask_token_logits, dim=1)\n",
|
||||||
|
"\n",
|
||||||
|
"top_10 = torch.topk(mask_token_logits, 10, dim=1)\n",
|
||||||
|
"top_10_tokens = zip(top_10.indices[0].tolist(), top_10.values[0].tolist())\n",
|
||||||
|
"\n",
|
||||||
|
"for token, score in top_10_tokens:\n",
|
||||||
|
" print(sequence.replace(tokenizer.mask_token, tokenizer.decode([token])), f\"(score: {score})\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"Przyk\u0142ady: BERT, RoBERTa (r\u00f3wnie\u017c Polish RoBERTa).\n",
|
||||||
|
"\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"### Podej\u015bcie generatywne (koder-dekoder).\n",
|
||||||
|
"\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"System ma wygenerowa\u0107 odpowied\u017a na r\u00f3\u017cne pytania (r\u00f3wnie\u017c\n",
|
||||||
|
"odpowiadaj\u0105ce zadaniu MLM), np.:\n",
|
||||||
|
"\n",
|
||||||
|
"- \"translate English to German: That is good.\" => \"Das ist gut.\"\n",
|
||||||
|
"- \"cola sentence: The course is jumping well.\" => \"not acceptable\"\n",
|
||||||
|
"- \"summarize: state authorities dispatched emergency crews tuesday to survey the damage after an onslaught of severe weather in mississippi…\"\n",
|
||||||
|
" => \"six people hospitalized after a storm in attala county\"\n",
|
||||||
|
"- \"Thank you for <X> me to your party <Y> week.\" => <X> for inviting <Y> last <Z>\n",
|
||||||
|
"\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 2,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"['World War II ended in World War II.',\n",
|
||||||
|
" 'World War II ended in 1945..',\n",
|
||||||
|
" 'World War II ended in 1945.',\n",
|
||||||
|
" 'World War II ended in 1945.',\n",
|
||||||
|
" 'World War II ended in 1945.']"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 2,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"from transformers import T5Tokenizer, T5Config, T5ForConditionalGeneration\n",
|
||||||
|
"\n",
|
||||||
|
"T5_PATH = 't5-base'\n",
|
||||||
|
"\n",
|
||||||
|
"t5_tokenizer = T5Tokenizer.from_pretrained(T5_PATH)\n",
|
||||||
|
"t5_config = T5Config.from_pretrained(T5_PATH)\n",
|
||||||
|
"t5_mlm = T5ForConditionalGeneration.from_pretrained(T5_PATH, config=t5_config)\n",
|
||||||
|
"\n",
|
||||||
|
"slot = '<extra_id_0>'\n",
|
||||||
|
"\n",
|
||||||
|
"text = f'World War II ended in {slot}.'\n",
|
||||||
|
"\n",
|
||||||
|
"encoded = t5_tokenizer.encode_plus(text, add_special_tokens=True, return_tensors='pt')\n",
|
||||||
|
"input_ids = encoded['input_ids']\n",
|
||||||
|
"\n",
|
||||||
|
"outputs = t5_mlm.generate(input_ids=input_ids,\n",
|
||||||
|
" num_beams=200, num_return_sequences=5,\n",
|
||||||
|
" max_length=5)\n",
|
||||||
|
"\n",
|
||||||
|
"_0_index = text.index(slot)\n",
|
||||||
|
"_result_prefix = text[:_0_index]\n",
|
||||||
|
"_result_suffix = text[_0_index+len(slot):]\n",
|
||||||
|
"\n",
|
||||||
|
"def _filter(output, end_token='<extra_id_1>'):\n",
|
||||||
|
" _txt = t5_tokenizer.decode(output[2:], skip_special_tokens=False, clean_up_tokenization_spaces=False)\n",
|
||||||
|
" if end_token in _txt:\n",
|
||||||
|
" _end_token_index = _txt.index(end_token)\n",
|
||||||
|
" return _result_prefix + _txt[:_end_token_index] + _result_suffix\n",
|
||||||
|
" else:\n",
|
||||||
|
" return _result_prefix + _txt + _result_suffix\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"results = [_filter(out) for out in outputs]\n",
|
||||||
|
"results"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"(Zob. [https://arxiv.org/pdf/1910.10683.pdf](https://arxiv.org/pdf/1910.10683.pdf))\n",
|
||||||
|
"\n",
|
||||||
|
"Przyk\u0142ad: T5, mT5\n",
|
||||||
|
"\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3 (ipykernel)",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.9.6"
|
||||||
|
},
|
||||||
|
"org": null,
|
||||||
|
"author": "Filip Grali\u0144ski",
|
||||||
|
"email": "filipg@amu.edu.pl",
|
||||||
|
"lang": "pl",
|
||||||
|
"subtitle": "14.Pretrenowane modele j\u0119zyka[wyk\u0142ad]",
|
||||||
|
"title": "Ekstrakcja informacji",
|
||||||
|
"year": "2021"
|
||||||
},
|
},
|
||||||
{
|
"nbformat": 4,
|
||||||
"data": {
|
"nbformat_minor": 4
|
||||||
"text/plain": [
|
}
|
||||||
"[('Ġon', 0.6786560416221619),\n",
|
|
||||||
" ('Ġupon', 0.04339785501360893),\n",
|
|
||||||
" ('Ġheavily', 0.02208443358540535),\n",
|
|
||||||
" ('Ġin', 0.021049050614237785),\n",
|
|
||||||
" (',', 0.020188499242067337),\n",
|
|
||||||
" ('Ġa', 0.01833895780146122),\n",
|
|
||||||
" ('Ġvery', 0.017935041338205338),\n",
|
|
||||||
" ('Ġentirely', 0.017528969794511795),\n",
|
|
||||||
" ('Ġlargely', 0.016769640147686005),\n",
|
|
||||||
" ('Ġto', 0.01009418722242117),\n",
|
|
||||||
" ('Ġgreatly', 0.010009866207838058),\n",
|
|
||||||
" ('Ġnot', 0.009016563184559345),\n",
|
|
||||||
" ('Ġmore', 0.005853226874023676),\n",
|
|
||||||
" ('Ġprimarily', 0.005203146021813154),\n",
|
|
||||||
" ('Ġstrongly', 0.0034501152113080025),\n",
|
|
||||||
" ('Ġpartly', 0.0033184229396283627),\n",
|
|
||||||
" ('Ġmuch', 0.0033095215912908316),\n",
|
|
||||||
" ('Ġmostly', 0.0032150144688785076),\n",
|
|
||||||
" ('Ġmainly', 0.0030899408739060163),\n",
|
|
||||||
" ('Ġfor', 0.003034428460523486),\n",
|
|
||||||
" ('.', 0.0028878094162791967),\n",
|
|
||||||
" ('Ġboth', 0.0028405177872627974),\n",
|
|
||||||
" ('Ġsomewhat', 0.0028194624464958906),\n",
|
|
||||||
" ('Ġcru', 0.002263976726680994),\n",
|
|
||||||
" ('Ġas', 0.00221616611815989),\n",
|
|
||||||
" ('Ġof', 0.0022000609897077084),\n",
|
|
||||||
" ('Ġalmost', 0.001968063646927476),\n",
|
|
||||||
" ('Ġat', 0.0018015997484326363),\n",
|
|
||||||
" ('Ġhighly', 0.0017461496172472835),\n",
|
|
||||||
" ('Ġcompletely', 0.001692073536105454)]"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"execution_count": 5,
|
|
||||||
"metadata": {},
|
|
||||||
"output_type": "execute_result"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"import torch\n",
|
|
||||||
"from transformers import GPT2Tokenizer, GPT2LMHeadModel\n",
|
|
||||||
"tokenizer = GPT2Tokenizer.from_pretrained('gpt2-large')\n",
|
|
||||||
"model = GPT2LMHeadModel.from_pretrained('gpt2-large')\n",
|
|
||||||
"text = \"This issue depends\"\n",
|
|
||||||
"encoded_input = tokenizer(text, return_tensors='pt')\n",
|
|
||||||
"output = model(**encoded_input)\n",
|
|
||||||
"next_token_probs = torch.softmax(output[0][:, -1, :][0], dim=0)\n",
|
|
||||||
"\n",
|
|
||||||
"next_token_probs\n",
|
|
||||||
"nb_of_tokens = next_token_probs.size()[0]\n",
|
|
||||||
"print(nb_of_tokens)\n",
|
|
||||||
"\n",
|
|
||||||
"_, top_k_indices = torch.topk(next_token_probs, 30, sorted=True)\n",
|
|
||||||
"\n",
|
|
||||||
"words = tokenizer.convert_ids_to_tokens(top_k_indices)\n",
|
|
||||||
"\n",
|
|
||||||
"top_probs = []\n",
|
|
||||||
"\n",
|
|
||||||
"for ix in range(len(top_k_indices)):\n",
|
|
||||||
" top_probs.append((words[ix], next_token_probs[top_k_indices[ix]].item()))\n",
|
|
||||||
"\n",
|
|
||||||
"top_probs"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"Zalety tego podejścia:\n",
|
|
||||||
"\n",
|
|
||||||
"- prostota,\n",
|
|
||||||
"- dobra podstawa do strojenia systemów generowania tekstu zwłaszcza\n",
|
|
||||||
" „otwartego” (systemy dialogowe, generowanie (fake) newsów, streszczanie tekstu),\n",
|
|
||||||
" ale niekoniecznie tłumaczenia maszynowego,\n",
|
|
||||||
"- zaskakująca skuteczność przy uczeniu *few-shot* i *zero-shot*.\n",
|
|
||||||
"\n",
|
|
||||||
"Wady:\n",
|
|
||||||
"\n",
|
|
||||||
"- asymetryczność, przetwarzanie tylko z lewej do prawej, preferencja\n",
|
|
||||||
" dla lewego kontekstu,\n",
|
|
||||||
"- mniejsza skuteczność przy dostrajaniu do zadań klasyfikacji i innych zadań\n",
|
|
||||||
" niepolegających na prostym generowaniu.\n",
|
|
||||||
"\n",
|
|
||||||
"Przykłady modeli: GPT, GPT-2, GPT-3, DialoGPT.\n",
|
|
||||||
"\n"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Maskowanie słów (BERT)\n",
|
|
||||||
"\n"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"Inną metodą jest maskowanie słów (*Masked Language Modeling*, *MLM*).\n",
|
|
||||||
"\n",
|
|
||||||
"W tym podejściu losowe wybrane zastępujemy losowe słowa specjalnym\n",
|
|
||||||
"tokenem (`[MASK]`) i każemy modelowi odgadywać w ten sposób\n",
|
|
||||||
"zamaskowane słowa (z uwzględnieniem również prawego kontekstu!).\n",
|
|
||||||
"\n",
|
|
||||||
"Móciąc ściśle, w jednym z pierwszych modeli tego typu (BERT)\n",
|
|
||||||
"zastosowano schemat, w którym również niezamaskowane słowa są odgadywane (!):\n",
|
|
||||||
"\n",
|
|
||||||
"- wybieramy losowe 15% wyrazów do odgadnięcia\n",
|
|
||||||
"- 80% z nich zastępujemy tokenem `[MASK]`,\n",
|
|
||||||
"- 10% zastępujemy innym losowym wyrazem,\n",
|
|
||||||
"- 10% pozostawiamy bez zmian.\n",
|
|
||||||
"\n"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 1,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"name": "stdout",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"# Out[3]:"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"from transformers import AutoModelWithLMHead, AutoTokenizer\n",
|
|
||||||
"import torch\n",
|
|
||||||
"\n",
|
|
||||||
"tokenizer = AutoTokenizer.from_pretrained(\"xlm-roberta-large\")\n",
|
|
||||||
"model = AutoModelWithLMHead.from_pretrained(\"xlm-roberta-large\")\n",
|
|
||||||
"\n",
|
|
||||||
"sequence = f'II wojna światowa zakończyła się w {tokenizer.mask_token} roku.'\n",
|
|
||||||
"\n",
|
|
||||||
"input_ids = tokenizer.encode(sequence, return_tensors=\"pt\")\n",
|
|
||||||
"mask_token_index = torch.where(input_ids == tokenizer.mask_token_id)[1]\n",
|
|
||||||
"\n",
|
|
||||||
"token_logits = model(input_ids)[0]\n",
|
|
||||||
"mask_token_logits = token_logits[0, mask_token_index, :]\n",
|
|
||||||
"mask_token_logits = torch.softmax(mask_token_logits, dim=1)\n",
|
|
||||||
"\n",
|
|
||||||
"top_10 = torch.topk(mask_token_logits, 10, dim=1)\n",
|
|
||||||
"top_10_tokens = zip(top_10.indices[0].tolist(), top_10.values[0].tolist())\n",
|
|
||||||
"\n",
|
|
||||||
"for token, score in top_10_tokens:\n",
|
|
||||||
" print(sequence.replace(tokenizer.mask_token, tokenizer.decode([token])), f\"(score: {score})\")"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"Przykłady: BERT, RoBERTa (również Polish RoBERTa).\n",
|
|
||||||
"\n"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"### Podejście generatywne (koder-dekoder).\n",
|
|
||||||
"\n"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"System ma wygenerować odpowiedź na różne pytania (również\n",
|
|
||||||
"odpowiadające zadaniu MLM), np.:\n",
|
|
||||||
"\n",
|
|
||||||
"- \"translate English to German: That is good.\" => \"Das ist gut.\"\n",
|
|
||||||
"- \"cola sentence: The course is jumping well.\" => \"not acceptable\"\n",
|
|
||||||
"- \"summarize: state authorities dispatched emergency crews tuesday to survey the damage after an onslaught of severe weather in mississippi…\"\n",
|
|
||||||
" => \"six people hospitalized after a storm in attala county\"\n",
|
|
||||||
"- \"Thank you for <X> me to your party <Y> week.\" => <X> for inviting <Y> last <Z>\n",
|
|
||||||
"\n"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 1,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"from transformers import T5Tokenizer, T5Config, T5ForConditionalGeneration\n",
|
|
||||||
"\n",
|
|
||||||
"T5_PATH = 't5-base'\n",
|
|
||||||
"\n",
|
|
||||||
"t5_tokenizer = T5Tokenizer.from_pretrained(T5_PATH)\n",
|
|
||||||
"t5_config = T5Config.from_pretrained(T5_PATH)\n",
|
|
||||||
"t5_mlm = T5ForConditionalGeneration.from_pretrained(T5_PATH, config=t5_config)\n",
|
|
||||||
"\n",
|
|
||||||
"slot = '<extra_id_0>'\n",
|
|
||||||
"\n",
|
|
||||||
"text = f'Warsaw is the {slot} of Poland.'\n",
|
|
||||||
"\n",
|
|
||||||
"encoded = t5_tokenizer.encode_plus(text, add_special_tokens=True, return_tensors='pt')\n",
|
|
||||||
"input_ids = encoded['input_ids']\n",
|
|
||||||
"\n",
|
|
||||||
"outputs = t5_mlm.generate(input_ids=input_ids,\n",
|
|
||||||
" num_beams=200, num_return_sequences=5,\n",
|
|
||||||
" max_length=5)\n",
|
|
||||||
"\n",
|
|
||||||
"_0_index = text.index(slot)\n",
|
|
||||||
"_result_prefix = text[:_0_index]\n",
|
|
||||||
"_result_suffix = text[_0_index+len(slot):]\n",
|
|
||||||
"\n",
|
|
||||||
"def _filter(output, end_token='<extra_id_1>'):\n",
|
|
||||||
" _txt = t5_tokenizer.decode(output[2:], skip_special_tokens=False, clean_up_tokenization_spaces=False)\n",
|
|
||||||
" if end_token in _txt:\n",
|
|
||||||
" _end_token_index = _txt.index(end_token)\n",
|
|
||||||
" return _result_prefix + _txt[:_end_token_index] + _result_suffix\n",
|
|
||||||
" else:\n",
|
|
||||||
" return _result_prefix + _txt + _result_suffix\n",
|
|
||||||
"\n",
|
|
||||||
"\n",
|
|
||||||
"results = [_filter(out) for out in outputs]\n",
|
|
||||||
"results"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "markdown",
|
|
||||||
"metadata": {},
|
|
||||||
"source": [
|
|
||||||
"(Zob. [https://arxiv.org/pdf/1910.10683.pdf](https://arxiv.org/pdf/1910.10683.pdf))\n",
|
|
||||||
"\n",
|
|
||||||
"Przykład: T5, mT5\n",
|
|
||||||
"\n"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"metadata": {
|
|
||||||
"kernelspec": {
|
|
||||||
"display_name": "Python 3",
|
|
||||||
"language": "python",
|
|
||||||
"name": "python3"
|
|
||||||
},
|
|
||||||
"language_info": {
|
|
||||||
"codemirror_mode": {
|
|
||||||
"name": "ipython",
|
|
||||||
"version": 3
|
|
||||||
},
|
|
||||||
"file_extension": ".py",
|
|
||||||
"mimetype": "text/x-python",
|
|
||||||
"name": "python",
|
|
||||||
"nbconvert_exporter": "python",
|
|
||||||
"pygments_lexer": "ipython3",
|
|
||||||
"version": "3.9.2"
|
|
||||||
},
|
|
||||||
"org": null
|
|
||||||
},
|
|
||||||
"nbformat": 4,
|
|
||||||
"nbformat_minor": 1
|
|
||||||
}
|
|
@ -1,5 +1,19 @@
|
|||||||
{
|
{
|
||||||
"cells": [
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"![Logo 1](https://git.wmi.amu.edu.pl/AITech/Szablon/raw/branch/master/Logotyp_AITech1.jpg)\n",
|
||||||
|
"<div class=\"alert alert-block alert-info\">\n",
|
||||||
|
"<h1> Ekstrakcja informacji </h1>\n",
|
||||||
|
"<h2> 15. <i>Sieci Transformer i ich zastosowanie w ekstrakcji informacji</i> [wykład]</h2> \n",
|
||||||
|
"<h3> Filip Graliński (2021)</h3>\n",
|
||||||
|
"</div>\n",
|
||||||
|
"\n",
|
||||||
|
"![Logo 2](https://git.wmi.amu.edu.pl/AITech/Szablon/raw/branch/master/Logotyp_AITech2.jpg)"
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
@ -226,11 +240,14 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
|
"author": "Filip Graliński",
|
||||||
|
"email": "filipg@amu.edu.pl",
|
||||||
"kernelspec": {
|
"kernelspec": {
|
||||||
"display_name": "Python 3",
|
"display_name": "Python 3 (ipykernel)",
|
||||||
"language": "python",
|
"language": "python",
|
||||||
"name": "python3"
|
"name": "python3"
|
||||||
},
|
},
|
||||||
|
"lang": "pl",
|
||||||
"language_info": {
|
"language_info": {
|
||||||
"codemirror_mode": {
|
"codemirror_mode": {
|
||||||
"name": "ipython",
|
"name": "ipython",
|
||||||
@ -241,10 +258,13 @@
|
|||||||
"name": "python",
|
"name": "python",
|
||||||
"nbconvert_exporter": "python",
|
"nbconvert_exporter": "python",
|
||||||
"pygments_lexer": "ipython3",
|
"pygments_lexer": "ipython3",
|
||||||
"version": "3.9.2"
|
"version": "3.9.6"
|
||||||
},
|
},
|
||||||
"org": null
|
"org": null,
|
||||||
|
"subtitle": "15.Sieci Transformer i ich zastosowanie w ekstrakcji informacji[wykład]",
|
||||||
|
"title": "Ekstrakcja informacji",
|
||||||
|
"year": "2021"
|
||||||
},
|
},
|
||||||
"nbformat": 4,
|
"nbformat": 4,
|
||||||
"nbformat_minor": 1
|
"nbformat_minor": 4
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user