diff --git a/lab/lab_01.ipynb b/lab/lab_01.ipynb index 0ffe833..88e3888 100644 --- a/lab/lab_01.ipynb +++ b/lab/lab_01.ipynb @@ -251,10 +251,40 @@ "tm_lookup('Wciśnij przycisk [ENTER]')" ] }, + { + "cell_type": "code", + "execution_count": 1, + "id": "36510584-d363-444d-bfdb-2f0260d197cd", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['Press the ENTER button']" + ] + }, + "execution_count": 1, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "translation_memory = [('Wciśnij przycisk Enter', 'Press the ENTER button'), \n", + " ('Sprawdź ustawienia sieciowe', 'Check the network settings')]\n", + "\n", + "def tm_lookup(sentence):\n", + " return [entry[1] for entry in translation_memory if entry[0].lower() == sentence.lower()]\n", + "\n", + "#tm_lookup('Wciśnij przycisk Enter') # -> ['Press the ENTER button'] OK\n", + "tm_lookup('Wciśnij przycisk ENTER') # -> ['Press the ENTER button'] OK" + ] + }, { "cell_type": "markdown", "id": "choice-committee", - "metadata": {}, + "metadata": { + "tags": [] + }, "source": [ "### Ćwiczenie 2: zmodyfikuj funkcję tm_lookup w taki sposób, aby nie brała pod uwagę znaków interpunkcyjnych. Rada - zdefiniuj funkcję sentence_similar." ] @@ -299,6 +329,66 @@ "tm_lookup('Wymagane ponowne uruchomienie maszyny')" ] }, + { + "cell_type": "code", + "execution_count": 24, + "id": "25af95de-30e0-468c-8c0c-2478ce0d8856", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "wciśnijprzyciskenter\n", + "wciśnijprzyciskenter\n" + ] + }, + { + "data": { + "text/plain": [ + "'Press the ENTER button'" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import re\n", + "translation_memory = [('Wciśnij przycisk Enter', 'Press the ENTER button'), \n", + " ('Sprawdź ustawienia sieciowe', 'Check the network settings')]\n", + "\n", + "def sentence_cleanup(sentence):\n", + " x = sentence.replace(\" \", \"\")\n", + " x = x.replace(\"\\s\", \"\")\n", + " x = x.replace(\"\\t\", \"\")\n", + " x = x.replace(\"\\v\", \"\")\n", + " x = x.replace(\"\\n\", \"\")\n", + " x = x.replace(\"\\r\", \"\")\n", + " x = x.replace(\"\\f\", \"\")\n", + " x = x.replace(\".\", \"\")\n", + " x = x.replace(\"!\", \"\")\n", + " x = x.replace(\":\", \"\")\n", + " x = x.replace('?', \"\")\n", + " x = x.replace(\";\", \"\")\n", + " x = x.replace('\"', \"\")\n", + " x = x.lower()\n", + " print(x)\n", + " return x\n", + "\n", + "def tm_lookup(sentence):\n", + " for entry in translation_memory:\n", + " part1 = entry[0]\n", + " part2 = entry[1]\n", + " better_sentence = sentence_cleanup(sentence)\n", + " better_entry = sentence_cleanup(part1)\n", + " if better_entry == better_sentence:\n", + " return part2\n", + "\n", + "tm_lookup('Wciśnij przycisk:ENTER!') # -> ['Press the ENTER button'] OK" + ] + }, { "cell_type": "markdown", "id": "guided-tutorial", @@ -409,6 +499,34 @@ "Odpowiedź:" ] }, + { + "cell_type": "code", + "execution_count": 31, + "id": "6347bc85-194d-45c4-a30d-5e5e06033821", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[('przycisk', 'button'), ('drukarka', 'printer')]" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "glossary = [('komputer', 'computer'), ('przycisk', 'button'), ('drukarka', 'printer')]\n", + "\n", + "def glossary_lookup(sentence):\n", + " better_sentence = sentence.lower()\n", + " sentence_words = better_sentence.split()\n", + " return [entry for entry in glossary if entry[0] in sentence_words]\n", + "\n", + "glossary_lookup('Każda DRUKARKA posiada PrzycisK wznowienia drukowania')" + ] + }, { "cell_type": "markdown", "id": "objective-matthew", @@ -424,8 +542,14 @@ "metadata": {}, "outputs": [], "source": [ + "glossary = [('komputer', 'computer'), ('przycisk', 'button'), ('drukarka', 'printer')]\n", + "\n", "def glossary_lookup(sentence):\n", - " return ''" + " better_sentence = sentence.lower()\n", + " sentence_words = better_sentence.split()\n", + " return [entry for entry in glossary if entry[0] in sentence_words]\n", + "\n", + "glossary_lookup('Każda DRUKARKA posiada PrzycisK wznowienia drukowania')" ] }, { @@ -452,7 +576,7 @@ "author": "Rafał Jaworski", "email": "rjawor@amu.edu.pl", "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -467,7 +591,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.10" + "version": "3.9.2" }, "subtitle": "1. Podstawowe techniki wspomagania tłumaczenia", "title": "Komputerowe wspomaganie tłumaczenia",