{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Jak stworzyć swojego robota?" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Narzędzia uruchamiane z wiersza poleceń\n", "\n", "* wget\n", "* curl\n", "* aria2c" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "--2021-03-17 09:25:32-- https://laboratoria.wmi.amu.edu.pl/\n", "Loaded CA certificate '/etc/ssl/certs/ca-certificates.crt'\n", "Resolving laboratoria.wmi.amu.edu.pl (laboratoria.wmi.amu.edu.pl)... 150.254.78.3\n", "Connecting to laboratoria.wmi.amu.edu.pl (laboratoria.wmi.amu.edu.pl)|150.254.78.3|:443... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 6269 (6.1K) [text/html]\n", "Saving to: 'laboratoria.wmi.amu.edu.pl/index.html'\n", "\n", "laboratoria.wmi.amu 100%[===================>] 6.12K --.-KB/s in 0.001s \n", "\n", "2021-03-17 09:25:32 (4.19 MB/s) - 'laboratoria.wmi.amu.edu.pl/index.html' saved [6269/6269]\n", "\n", "Loading robots.txt; please ignore errors.\n", "--2021-03-17 09:25:32-- https://laboratoria.wmi.amu.edu.pl/robots.txt\n", "Reusing existing connection to laboratoria.wmi.amu.edu.pl:443.\n", "HTTP request sent, awaiting response... 403 Forbidden\n", "2021-03-17 09:25:32 ERROR 403: Forbidden.\n", "\n", "--2021-03-17 09:25:32-- https://laboratoria.wmi.amu.edu.pl/page-resources/wmi.png\n", "Reusing existing connection to laboratoria.wmi.amu.edu.pl:443.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 596 [image/png]\n", "Saving to: 'laboratoria.wmi.amu.edu.pl/page-resources/wmi.png'\n", "\n", "laboratoria.wmi.amu 100%[===================>] 596 --.-KB/s in 0s \n", "\n", "2021-03-17 09:25:32 (53.7 MB/s) - 'laboratoria.wmi.amu.edu.pl/page-resources/wmi.png' saved [596/596]\n", "\n", "--2021-03-17 09:25:32-- https://laboratoria.wmi.amu.edu.pl/css/labs.css\n", "Reusing existing connection to laboratoria.wmi.amu.edu.pl:443.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 6919 (6.8K) [text/css]\n", "Saving to: 'laboratoria.wmi.amu.edu.pl/css/labs.css'\n", "\n", "laboratoria.wmi.amu 100%[===================>] 6.76K --.-KB/s in 0s \n", "\n", "2021-03-17 09:25:32 (18.5 MB/s) - 'laboratoria.wmi.amu.edu.pl/css/labs.css' saved [6919/6919]\n", "\n", "--2021-03-17 09:25:32-- https://laboratoria.wmi.amu.edu.pl/en/\n", "Reusing existing connection to laboratoria.wmi.amu.edu.pl:443.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 5946 (5.8K) [text/html]\n", "Saving to: 'laboratoria.wmi.amu.edu.pl/en/index.html'\n", "\n", "laboratoria.wmi.amu 100%[===================>] 5.81K --.-KB/s in 0.002s \n", "\n", "2021-03-17 09:25:32 (3.04 MB/s) - 'laboratoria.wmi.amu.edu.pl/en/index.html' saved [5946/5946]\n", "\n", "--2021-03-17 09:25:32-- https://laboratoria.wmi.amu.edu.pl/page-resources/wmi_transparent.png\n", "Reusing existing connection to laboratoria.wmi.amu.edu.pl:443.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 15034 (15K) [image/png]\n", "Saving to: 'laboratoria.wmi.amu.edu.pl/page-resources/wmi_transparent.png'\n", "\n", "laboratoria.wmi.amu 100%[===================>] 14.68K --.-KB/s in 0.005s \n", "\n", "2021-03-17 09:25:32 (2.62 MB/s) - 'laboratoria.wmi.amu.edu.pl/page-resources/wmi_transparent.png' saved [15034/15034]\n", "\n", "--2021-03-17 09:25:32-- https://laboratoria.wmi.amu.edu.pl/godziny-otwarcia/\n", "Reusing existing connection to laboratoria.wmi.amu.edu.pl:443.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 5317 (5.2K) [text/html]\n", "Saving to: 'laboratoria.wmi.amu.edu.pl/godziny-otwarcia/index.html'\n", "\n", "laboratoria.wmi.amu 100%[===================>] 5.19K --.-KB/s in 0s \n", "\n", "2021-03-17 09:25:32 (87.9 MB/s) - 'laboratoria.wmi.amu.edu.pl/godziny-otwarcia/index.html' saved [5317/5317]\n", "\n", "--2021-03-17 09:25:32-- https://laboratoria.wmi.amu.edu.pl/kontakt/\n", "Reusing existing connection to laboratoria.wmi.amu.edu.pl:443.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 4644 (4.5K) [text/html]\n", "Saving to: 'laboratoria.wmi.amu.edu.pl/kontakt/index.html'\n", "\n", "laboratoria.wmi.amu 100%[===================>] 4.54K --.-KB/s in 0s \n", "\n", "2021-03-17 09:25:32 (142 MB/s) - 'laboratoria.wmi.amu.edu.pl/kontakt/index.html' saved [4644/4644]\n", "\n", "--2021-03-17 09:25:32-- https://laboratoria.wmi.amu.edu.pl/pierwsze-kroki/\n", "Reusing existing connection to laboratoria.wmi.amu.edu.pl:443.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 6639 (6.5K) [text/html]\n", "Saving to: 'laboratoria.wmi.amu.edu.pl/pierwsze-kroki/index.html'\n", "\n", "laboratoria.wmi.amu 100%[===================>] 6.48K --.-KB/s in 0.002s \n", "\n", "2021-03-17 09:25:32 (3.61 MB/s) - 'laboratoria.wmi.amu.edu.pl/pierwsze-kroki/index.html' saved [6639/6639]\n", "\n", "--2021-03-17 09:25:32-- https://laboratoria.wmi.amu.edu.pl/przewodnik/\n", "Reusing existing connection to laboratoria.wmi.amu.edu.pl:443.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 5454 (5.3K) [text/html]\n", "Saving to: 'laboratoria.wmi.amu.edu.pl/przewodnik/index.html'\n", "\n", "laboratoria.wmi.amu 100%[===================>] 5.33K --.-KB/s in 0.002s \n", "\n", "2021-03-17 09:25:32 (2.97 MB/s) - 'laboratoria.wmi.amu.edu.pl/przewodnik/index.html' saved [5454/5454]\n", "\n", "--2021-03-17 09:25:32-- https://laboratoria.wmi.amu.edu.pl/regulamin-laboratoriow-komputerowych/\n", "Reusing existing connection to laboratoria.wmi.amu.edu.pl:443.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 14393 (14K) [text/html]\n", "Saving to: 'laboratoria.wmi.amu.edu.pl/regulamin-laboratoriow-komputerowych/index.html'\n", "\n", "laboratoria.wmi.amu 100%[===================>] 14.06K --.-KB/s in 0.005s \n", "\n", "2021-03-17 09:25:32 (2.65 MB/s) - 'laboratoria.wmi.amu.edu.pl/regulamin-laboratoriow-komputerowych/index.html' saved [14393/14393]\n", "\n", "--2021-03-17 09:25:32-- https://laboratoria.wmi.amu.edu.pl/nie-odpowiadamy/\n", "Reusing existing connection to laboratoria.wmi.amu.edu.pl:443.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 4481 (4.4K) [text/html]\n", "Saving to: 'laboratoria.wmi.amu.edu.pl/nie-odpowiadamy/index.html'\n", "\n", "laboratoria.wmi.amu 100%[===================>] 4.38K --.-KB/s in 0s \n", "\n", "2021-03-17 09:25:32 (101 MB/s) - 'laboratoria.wmi.amu.edu.pl/nie-odpowiadamy/index.html' saved [4481/4481]\n", "\n", "--2021-03-17 09:25:32-- https://laboratoria.wmi.amu.edu.pl/laboratoria/oprogramowanie/\n", "Reusing existing connection to laboratoria.wmi.amu.edu.pl:443.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 12821 (13K) [text/html]\n", "Saving to: 'laboratoria.wmi.amu.edu.pl/laboratoria/oprogramowanie/index.html'\n", "\n", "laboratoria.wmi.amu 100%[===================>] 12.52K --.-KB/s in 0.004s \n", "\n", "2021-03-17 09:25:32 (2.93 MB/s) - 'laboratoria.wmi.amu.edu.pl/laboratoria/oprogramowanie/index.html' saved [12821/12821]\n", "\n", "--2021-03-17 09:25:32-- https://laboratoria.wmi.amu.edu.pl/uslugi/\n", "Reusing existing connection to laboratoria.wmi.amu.edu.pl:443.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 10688 (10K) [text/html]\n", "Saving to: 'laboratoria.wmi.amu.edu.pl/uslugi/index.html'\n", "\n", "laboratoria.wmi.amu 100%[===================>] 10.44K --.-KB/s in 0.004s \n", "\n", "2021-03-17 09:25:32 (2.74 MB/s) - 'laboratoria.wmi.amu.edu.pl/uslugi/index.html' saved [10688/10688]\n", "\n", "--2021-03-17 09:25:32-- https://laboratoria.wmi.amu.edu.pl/uslugi-uniwersyteckie/\n", "Reusing existing connection to laboratoria.wmi.amu.edu.pl:443.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 4240 (4.1K) [text/html]\n", "Saving to: 'laboratoria.wmi.amu.edu.pl/uslugi-uniwersyteckie/index.html'\n", "\n", "laboratoria.wmi.amu 100%[===================>] 4.14K --.-KB/s in 0.001s \n", "\n", "2021-03-17 09:25:32 (3.27 MB/s) - 'laboratoria.wmi.amu.edu.pl/uslugi-uniwersyteckie/index.html' saved [4240/4240]\n", "\n", "--2021-03-17 09:25:32-- https://laboratoria.wmi.amu.edu.pl/problemy/docker/\n", "Reusing existing connection to laboratoria.wmi.amu.edu.pl:443.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 6326 (6.2K) [text/html]\n", "Saving to: 'laboratoria.wmi.amu.edu.pl/problemy/docker/index.html'\n", "\n", "laboratoria.wmi.amu 100%[===================>] 6.18K --.-KB/s in 0s \n", "\n", "2021-03-17 09:25:32 (182 MB/s) - 'laboratoria.wmi.amu.edu.pl/problemy/docker/index.html' saved [6326/6326]\n", "\n", "--2021-03-17 09:25:32-- https://laboratoria.wmi.amu.edu.pl/serwery-terminalowe/\n", "Reusing existing connection to laboratoria.wmi.amu.edu.pl:443.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 382 [text/html]\n", "Saving to: 'laboratoria.wmi.amu.edu.pl/serwery-terminalowe/index.html'\n", "\n", "laboratoria.wmi.amu 100%[===================>] 382 --.-KB/s in 0s \n", "\n", "2021-03-17 09:25:32 (15.9 MB/s) - 'laboratoria.wmi.amu.edu.pl/serwery-terminalowe/index.html' saved [382/382]\n", "\n", "--2021-03-17 09:25:32-- https://laboratoria.wmi.amu.edu.pl/vpn/\n", "Reusing existing connection to laboratoria.wmi.amu.edu.pl:443.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 334 [text/html]\n", "Saving to: 'laboratoria.wmi.amu.edu.pl/vpn/index.html'\n", "\n", "laboratoria.wmi.amu 100%[===================>] 334 --.-KB/s in 0s \n", "\n", "2021-03-17 09:25:32 (16.3 MB/s) - 'laboratoria.wmi.amu.edu.pl/vpn/index.html' saved [334/334]\n", "\n", "--2021-03-17 09:25:32-- https://laboratoria.wmi.amu.edu.pl/a126\n", "Reusing existing connection to laboratoria.wmi.amu.edu.pl:443.\n", "HTTP request sent, awaiting response... 301 Moved Permanently\n", "Location: https://laboratoria.wmi.amu.edu.pl/a126/ [following]\n", "--2021-03-17 09:25:32-- https://laboratoria.wmi.amu.edu.pl/a126/\n", "Reusing existing connection to laboratoria.wmi.amu.edu.pl:443.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 3671 (3.6K) [text/html]\n", "Saving to: 'laboratoria.wmi.amu.edu.pl/a126'\n", "\n", "laboratoria.wmi.amu 100%[===================>] 3.58K --.-KB/s in 0s \n", "\n", "2021-03-17 09:25:32 (194 MB/s) - 'laboratoria.wmi.amu.edu.pl/a126' saved [3671/3671]\n", "\n", "--2021-03-17 09:25:32-- https://laboratoria.wmi.amu.edu.pl/irc\n", "Reusing existing connection to laboratoria.wmi.amu.edu.pl:443.\n", "HTTP request sent, awaiting response... 301 Moved Permanently\n", "Location: https://laboratoria.wmi.amu.edu.pl/irc/ [following]\n", "--2021-03-17 09:25:32-- https://laboratoria.wmi.amu.edu.pl/irc/\n", "Reusing existing connection to laboratoria.wmi.amu.edu.pl:443.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 3946 (3.9K) [text/html]\n", "Saving to: 'laboratoria.wmi.amu.edu.pl/irc'\n", "\n", "laboratoria.wmi.amu 100%[===================>] 3.85K --.-KB/s in 0s \n", "\n", "2021-03-17 09:25:32 (243 MB/s) - 'laboratoria.wmi.amu.edu.pl/irc' saved [3946/3946]\n", "\n", "--2021-03-17 09:25:32-- https://laboratoria.wmi.amu.edu.pl/godziny-otwarcia\n", "Reusing existing connection to laboratoria.wmi.amu.edu.pl:443.\n", "HTTP request sent, awaiting response... 301 Moved Permanently\n", "Location: https://laboratoria.wmi.amu.edu.pl/godziny-otwarcia/ [following]\n", "--2021-03-17 09:25:32-- https://laboratoria.wmi.amu.edu.pl/godziny-otwarcia/\n", "Reusing existing connection to laboratoria.wmi.amu.edu.pl:443.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 5317 (5.2K) [text/html]\n", "laboratoria.wmi.amu.edu.pl/godziny-otwarcia: Is a directory\n", "\n", "Cannot write to 'laboratoria.wmi.amu.edu.pl/godziny-otwarcia' (Is a directory).\n", "--2021-03-17 09:25:32-- https://laboratoria.wmi.amu.edu.pl/js/fix.js\n", "Connecting to laboratoria.wmi.amu.edu.pl (laboratoria.wmi.amu.edu.pl)|150.254.78.3|:443... connected.\n", "HTTP request sent, awaiting response... 200 OK\n", "Length: 62 [application/javascript]\n", "Saving to: 'laboratoria.wmi.amu.edu.pl/js/fix.js'\n", "\n", "laboratoria.wmi.amu 100%[===================>] 62 --.-KB/s in 0s \n", "\n", "2021-03-17 09:25:32 (6.51 MB/s) - 'laboratoria.wmi.amu.edu.pl/js/fix.js' saved [62/62]\n", "\n", "FINISHED --2021-03-17 09:25:32--\n", "Total wall clock time: 0.3s\n", "Downloaded: 20 files, 115K in 0.03s (4.14 MB/s)\n" ] } ], "source": [ "# Pobierz rekurencyjnie, z ograniczeniem do jednego poziomu rekurencji \n", "! wget -r -l 1 https://laboratoria.wmi.amu.edu.pl/" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "http://www.almanachmuszyny.pl/spisy/1991/AM1991_02_muszynski_zamek_prawda_i_legenda.pdf\n", " out=1991-1.pdf\n", "http://www.almanachmuszyny.pl/spisy/1991/AM1991_03_muszyna_miasteczko_historyczne.pdf\n", " out=1991-2.pdf\n", "\n", "03/17 09:31:54 [\u001b[1;32mNOTICE\u001b[0m] Downloading 2 item(s)\n", "\n", "03/17 09:31:55 [\u001b[1;32mNOTICE\u001b[0m] Download complete: /home/filipg/ext/amu/aitech-eks/wyk/aria2c-example/1991-1.pdf\n", "\n", "03/17 09:31:55 [\u001b[1;32mNOTICE\u001b[0m] Download complete: /home/filipg/ext/amu/aitech-eks/wyk/aria2c-example/1991-2.pdf\n", "\n", "Download Results:\n", "gid |stat|avg speed |path/URI\n", "======+====+===========+=======================================================\n", "3bf8a7|\u001b[1;32mOK\u001b[0m | 458KiB/s|/home/filipg/ext/amu/aitech-eks/wyk/aria2c-example/1991-1.pdf\n", "e0c4c1|\u001b[1;32mOK\u001b[0m | 677KiB/s|/home/filipg/ext/amu/aitech-eks/wyk/aria2c-example/1991-2.pdf\n", "\n", "Status Legend:\n", "(OK):download completed.\n" ] } ], "source": [ "# aria2c pozwala łatwo pobrać listę adresów URL, dla każdego adresu można ustawić specyficzne opcje\n", "! (cd aria2c-example && cat aria.in)\n", "! (cd aria2c-example && aria2c -i aria.in)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Biblioteki/frameworki do tworzenia robotów\n", "\n", "### Python \n", "\n", "Użyteczne biblioteki: \n", "\n", "* urllib\n", "* request\n", "* Beautiful Soup (do parsowania HTML-a)" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[('/en/', 'English'), ('/', '\\n\\n Laboratoria Komputerowe\\n '), ('/', 'Strona główna'), ('/godziny-otwarcia/', 'Godziny otwarcia'), ('/kontakt/', 'Kontakt'), ('/pierwsze-kroki/', 'Pierwsze kroki'), ('/przewodnik/', 'Przewodnik po stronie'), ('/regulamin-laboratoriow-komputerowych/', 'Regulamin Wydziałowych Laboratoriów Komputerowych'), ('/nie-odpowiadamy/', 'Za co nie odpowiadamy'), ('/laboratoria/oprogramowanie/', 'Laboratoria'), ('/uslugi/', 'Usługi'), ('/uslugi-uniwersyteckie/', 'Usługi Uniwersyteckie'), ('/problemy/docker/', 'Problemy'), ('/serwery-terminalowe/', 'serwera terminalowego'), ('/vpn/', 'VPN'), ('https://help.wmi.amu.edu.pl/', 'https://help.wmi.amu.edu.pl/'), ('/a126', 'A1-26'), ('https://help.wmi.amu.edu.pl/', 'System helpdeskowy'), ('mailto:helpdesk@wmi.amu.edu.pl', 'helpdesk@wmi.amu.edu.pl'), ('/irc', 'users'), ('https://www.facebook.com/wmilabs/', 'Facebook'), ('/godziny-otwarcia', 'Godziny otwarcia')]\n" ] } ], "source": [ "import urllib\n", "import requests\n", "from bs4 import BeautifulSoup\n", "\n", "url = 'https://laboratoria.wmi.amu.edu.pl/'\n", "response = requests.get(url)\n", "soup = BeautifulSoup(response.content, \"html.parser\")\n", "\n", "# wydobądź wszystkie linki (elementy A)\n", "links = soup.find_all('a')\n", "print([(link['href'], link.get_text()) for link in links])\n", " " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## XPath\n", "\n", "XPath – język służący do adresowania części dokumentu XML.\n", "\n", "* `/html/body/div/p` – pełna ścieżka do wszystkich akapitów wewnątrz głównych elementów `
`\n", "* `//div/p` – wszystkie akapity w jakichkolwiek elementach `
`\n", "* `//a/@href` - wartości atrybutu `href` dla wszystkich linków\n", "* `//p[@id=’foo’]/img[5]` - piąty (indeksowanie od 1!) obrazek wewnątrz akapitu o identyfikatorze foo\n", "* `//p[img]/a` - linki w akapitach zawierających obrazek\n", "\n", "Czym się różni:\n", "\n", "* `//img[3]` od `(//img)[3]` ?\n", "* `//p[img]/a` od `//p[//img]/a` ?\n", "\n", "\n" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['/', '/godziny-otwarcia/', '/kontakt/', '/pierwsze-kroki/', '/przewodnik/', '/regulamin-laboratoriow-komputerowych/', '/nie-odpowiadamy/', '/laboratoria/oprogramowanie/', '/uslugi/', '/uslugi-uniwersyteckie/', '/problemy/docker/']\n" ] } ], "source": [ "\n", "from urllib.request import urlopen\n", "from lxml import etree\n", "\n", "url = 'https://laboratoria.wmi.amu.edu.pl/'\n", "\n", "response = urlopen(url)\n", "htmlparser = etree.HTMLParser()\n", "tree = etree.parse(response, htmlparser)\n", "# linki z panelu\n", "links = tree.xpath(\"//div[@class='sidebar-menu']//a/@href\")\n", "print(links)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "## Jak poradzić sobie z dynamicznymi stronami?" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### HtmlUnit\n", "\n", "```\n", "WebClient webClient = new WebClient();\n", "HtmlPage page = webClient.getPage(\"http://ceti.pl/?ceti=administracja\");\n", "\n", "HtmlForm form = page.getForms().get(2);\n", "\n", "HtmlTextInput loginField = form.getInputByName(\"login\");\n", "loginField.setValueAttribute(\"atrapa\");\n", "HtmlPasswordInput passField = form.getInputByName(\"pass\");\n", "passField.setValueAttribute(\"haslo1\");\n", "\n", "HtmlImageInput button = form.getInputByValue(\"OK\");\n", "HtmlPage page2 = (HtmlPage)button.click();\n", "\n", "HtmlPage page3 = webClient.getPage(\"https://tau4.ceti.pl/cgi-bin/logs-user-show.cgi\");\n", "System.out.println(page3.asXml());\n", "\n", "UnexpectedPage page4 = webClient.getPage(\"https://adm.tau4.ceti.pl/logs.zip\");\n", "InputStream istr = page4.getInputStream();\n", "``` \n", " " ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "## Selenium" ] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['https://www.python.org/community/sigs/guidelines', 'https://www.python.org/dev/peps/pep-0585/', 'https://www.python.org/community/lists', 'https://www.python.org/doc/essays/list2str', 'https://www.python.org/dev/core-mentorship', 'https://www.python.org/dev/peps/pep-3128/', 'https://www.python.org/dev/peps/pep-0204/', 'https://www.python.org/community/sigs/coordination', 'https://www.python.org/psf/committees', 'https://www.python.org/dev/peps/pep-0225/', 'https://www.python.org/dev/peps/pep-3132/', 'https://www.python.org/community/sigs/current/doc-sig/stext', 'https://www.python.org/dev/peps/pep-0202/', 'https://www.python.org/dev/peps/pep-0274/', 'https://www.python.org/dev/peps/pep-0469/', 'https://www.python.org/dev/peps/pep-0289/', 'https://www.python.org/dev/peps/pep-0270/', 'https://www.python.org/community/sigs/retired/string-sig', 'https://www.python.org/community/sigs/retired/progenv-sig', 'https://www.python.org/psf/records/board/minutes/2005-02-08']\n" ] } ], "source": [ "# należy wcześniej uruchomić serwer selenium\n", "# wget https://selenium-release.storage.googleapis.com/3.141/selenium-server-standalone-3.141.59.jar\n", "# java -jar selenium-server-standalone-3.141.59.jar\n", "\n", "from selenium import webdriver\n", "from selenium.webdriver.common.desired_capabilities import DesiredCapabilities\n", "from selenium.webdriver.common.keys import Keys\n", "from selenium.webdriver.common.by import By\n", "\n", "driver = webdriver.Remote(\n", " command_executor='http://127.0.0.1:4444/wd/hub',\n", " desired_capabilities=DesiredCapabilities.CHROME)\n", "\n", "driver.get(\"http://www.python.org\")\n", "assert \"Python\" in driver.title\n", "elem = driver.find_element_by_name(\"q\")\n", "elem.clear()\n", "elem.send_keys(\"list\")\n", "elem.send_keys(Keys.RETURN)\n", "links = driver.find_elements(By.XPATH, '//h3/a')\n", "print([l.get_attribute('href') for l in links])\n", "driver.close()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Haskell i strzałki\n", "\n", "W języku Haskell można tworzyć roboty używając biblioteki HXT opartym na formalizmie strzałek (ang. _arrows_).\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.1" } }, "nbformat": 4, "nbformat_minor": 4 }