Compare commits
3 Commits
Author | SHA1 | Date |
---|---|---|
Kamil Kubiak | 959cf021a0 | |
Kamil Kubiak | 9a3d25dac1 | |
Kamil Kubiak | 1f7c4d72ad |
132
lab/lab_01.ipynb
132
lab/lab_01.ipynb
|
@ -251,10 +251,40 @@
|
|||
"tm_lookup('Wciśnij przycisk [ENTER]')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "36510584-d363-444d-bfdb-2f0260d197cd",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"['Press the ENTER button']"
|
||||
]
|
||||
},
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"translation_memory = [('Wciśnij przycisk Enter', 'Press the ENTER button'), \n",
|
||||
" ('Sprawdź ustawienia sieciowe', 'Check the network settings')]\n",
|
||||
"\n",
|
||||
"def tm_lookup(sentence):\n",
|
||||
" return [entry[1] for entry in translation_memory if entry[0].lower() == sentence.lower()]\n",
|
||||
"\n",
|
||||
"#tm_lookup('Wciśnij przycisk Enter') # -> ['Press the ENTER button'] OK\n",
|
||||
"tm_lookup('Wciśnij przycisk ENTER') # -> ['Press the ENTER button'] OK"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "choice-committee",
|
||||
"metadata": {},
|
||||
"metadata": {
|
||||
"tags": []
|
||||
},
|
||||
"source": [
|
||||
"### Ćwiczenie 2: zmodyfikuj funkcję tm_lookup w taki sposób, aby nie brała pod uwagę znaków interpunkcyjnych. Rada - zdefiniuj funkcję sentence_similar."
|
||||
]
|
||||
|
@ -299,6 +329,66 @@
|
|||
"tm_lookup('Wymagane ponowne uruchomienie maszyny')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 24,
|
||||
"id": "25af95de-30e0-468c-8c0c-2478ce0d8856",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"wciśnijprzyciskenter\n",
|
||||
"wciśnijprzyciskenter\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'Press the ENTER button'"
|
||||
]
|
||||
},
|
||||
"execution_count": 24,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import re\n",
|
||||
"translation_memory = [('Wciśnij przycisk Enter', 'Press the ENTER button'), \n",
|
||||
" ('Sprawdź ustawienia sieciowe', 'Check the network settings')]\n",
|
||||
"\n",
|
||||
"def sentence_cleanup(sentence):\n",
|
||||
" x = sentence.replace(\" \", \"\")\n",
|
||||
" x = x.replace(\"\\s\", \"\")\n",
|
||||
" x = x.replace(\"\\t\", \"\")\n",
|
||||
" x = x.replace(\"\\v\", \"\")\n",
|
||||
" x = x.replace(\"\\n\", \"\")\n",
|
||||
" x = x.replace(\"\\r\", \"\")\n",
|
||||
" x = x.replace(\"\\f\", \"\")\n",
|
||||
" x = x.replace(\".\", \"\")\n",
|
||||
" x = x.replace(\"!\", \"\")\n",
|
||||
" x = x.replace(\":\", \"\")\n",
|
||||
" x = x.replace('?', \"\")\n",
|
||||
" x = x.replace(\";\", \"\")\n",
|
||||
" x = x.replace('\"', \"\")\n",
|
||||
" x = x.lower()\n",
|
||||
" print(x)\n",
|
||||
" return x\n",
|
||||
"\n",
|
||||
"def tm_lookup(sentence):\n",
|
||||
" for entry in translation_memory:\n",
|
||||
" part1 = entry[0]\n",
|
||||
" part2 = entry[1]\n",
|
||||
" better_sentence = sentence_cleanup(sentence)\n",
|
||||
" better_entry = sentence_cleanup(part1)\n",
|
||||
" if better_entry == better_sentence:\n",
|
||||
" return part2\n",
|
||||
"\n",
|
||||
"tm_lookup('Wciśnij przycisk:ENTER!') # -> ['Press the ENTER button'] OK"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "guided-tutorial",
|
||||
|
@ -409,6 +499,34 @@
|
|||
"Odpowiedź:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 31,
|
||||
"id": "6347bc85-194d-45c4-a30d-5e5e06033821",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[('przycisk', 'button'), ('drukarka', 'printer')]"
|
||||
]
|
||||
},
|
||||
"execution_count": 31,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"glossary = [('komputer', 'computer'), ('przycisk', 'button'), ('drukarka', 'printer')]\n",
|
||||
"\n",
|
||||
"def glossary_lookup(sentence):\n",
|
||||
" better_sentence = sentence.lower()\n",
|
||||
" sentence_words = better_sentence.split()\n",
|
||||
" return [entry for entry in glossary if entry[0] in sentence_words]\n",
|
||||
"\n",
|
||||
"glossary_lookup('Każda DRUKARKA posiada PrzycisK wznowienia drukowania')"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "objective-matthew",
|
||||
|
@ -424,8 +542,14 @@
|
|||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"glossary = [('komputer', 'computer'), ('przycisk', 'button'), ('drukarka', 'printer')]\n",
|
||||
"\n",
|
||||
"def glossary_lookup(sentence):\n",
|
||||
" return ''"
|
||||
" better_sentence = sentence.lower()\n",
|
||||
" sentence_words = better_sentence.split()\n",
|
||||
" return [entry for entry in glossary if entry[0] in sentence_words]\n",
|
||||
"\n",
|
||||
"glossary_lookup('Każda DRUKARKA posiada PrzycisK wznowienia drukowania')"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -452,7 +576,7 @@
|
|||
"author": "Rafał Jaworski",
|
||||
"email": "rjawor@amu.edu.pl",
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
|
@ -467,7 +591,7 @@
|
|||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.8.10"
|
||||
"version": "3.9.2"
|
||||
},
|
||||
"subtitle": "1. Podstawowe techniki wspomagania tłumaczenia",
|
||||
"title": "Komputerowe wspomaganie tłumaczenia",
|
||||
|
|
|
@ -223,17 +223,17 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": 7,
|
||||
"id": "secondary-wrist",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"2"
|
||||
"3"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
|
@ -344,21 +344,70 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"execution_count": 19,
|
||||
"id": "genetic-cradle",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Wciśnij przycisk Enter -> Wciśnij przycisk Enter [ SCORE = 1.0 ]\n",
|
||||
"Wciśnij przycisk Enter -> Wciśnij przycisk ENTER [ SCORE = 0.8181818181818181 ]\n",
|
||||
"Wciśnij przycisk Enter -> Wciśnij przycisk Enter! [ SCORE = 0.9565217391304348 ]\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"#!pip3 install python-Levenshtein\n",
|
||||
"from Levenshtein import distance as levenshtein_distance\n",
|
||||
"\n",
|
||||
"translation_memory = [\n",
|
||||
" ('Wciśnij przycisk Enter', 'Press the ENTER button'),\n",
|
||||
" ('Wciśnij przycisk ENTER', 'Press the ENTER button'), \n",
|
||||
" ('Wciśnij przycisk Enter!', 'Press the ENTER button!'), \n",
|
||||
" ('Wciśnij przycisk', 'Press the button'), \n",
|
||||
" ('Wciśnij Enter', 'Press the ENTER'), \n",
|
||||
" ('Sprawdź ustawienia sieciowe', 'Check the network settings'),\n",
|
||||
" ('Drukarka jest wyłączona', 'The printer is switched off'),\n",
|
||||
" ('Wymagane ponowne uruchomienie komputera', 'System restart required')\n",
|
||||
" ]\n",
|
||||
"\n",
|
||||
"def levenshtein_similarity(x,y):\n",
|
||||
" return 1 - levenshtein_distance(x,y) / max(len(x), len(y))\n",
|
||||
"\n",
|
||||
"def fuzzy_lookup(sentence, threshold):\n",
|
||||
" return []"
|
||||
" for entry in translation_memory:\n",
|
||||
" part1 = entry[0]\n",
|
||||
" score = levenshtein_similarity(sentence, part1)\n",
|
||||
" if score >= threshold:\n",
|
||||
" print(sentence + ' -> ' + part1 + ' [ SCORE = ' + str(score) + ' ]')\n",
|
||||
"\n",
|
||||
"fuzzy_lookup('Wciśnij przycisk Enter', 0.8)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "8b7bb6aa-5aaf-4f49-84ab-edbe6797d568",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "3b095fff-71a8-44a9-b809-d872ee9a7b62",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"author": "Rafał Jaworski",
|
||||
"email": "rjawor@amu.edu.pl",
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
|
@ -373,7 +422,7 @@
|
|||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.8.10"
|
||||
"version": "3.9.2"
|
||||
},
|
||||
"subtitle": "2. Zaawansowane użycie pamięci tłumaczeń",
|
||||
"title": "Komputerowe wspomaganie tłumaczenia",
|
||||
|
|
|
@ -63,7 +63,14 @@
|
|||
"id": "diverse-sunglasses",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Odpowiedź:"
|
||||
"Odpowiedź:\n",
|
||||
"\n",
|
||||
"slides of metal cabinet\n",
|
||||
"\n",
|
||||
"Źródła - tłumaczono z PL -> ENG oraz ENG -> PL\n",
|
||||
"https://translate.google.pl/?hl=pl&sl=en&tl=pl&text=metal%20cabinet%20slides&op=translate\n",
|
||||
"\n",
|
||||
"https://www.deepl.com/pl/translator#en/pl/slides%20of%20metal%20cabinet"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -128,13 +135,50 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 97,
|
||||
"id": "cognitive-cedar",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"program(14, 21)\n",
|
||||
"program(291, 298)\n",
|
||||
"program(468, 475)\n",
|
||||
"program(516, 523)\n",
|
||||
"program(533, 540)\n",
|
||||
"application(80, 91)\n",
|
||||
"application(164, 175)\n",
|
||||
"application(322, 333)\n",
|
||||
"applet(302, 308)\n",
|
||||
"compile(56, 63)\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"def terminology_lookup():\n",
|
||||
" return []"
|
||||
"import re\n",
|
||||
"\n",
|
||||
"text = \" For all Java programmers:\"\n",
|
||||
"text += \" This section explains how to compile and run a Swing application from the command line.\"\n",
|
||||
"text += \" For information on compiling and running a Swing application using NetBeans IDE,\"\n",
|
||||
"text += \" see Running Tutorial Examples in NetBeans IDE. The compilation instructions work for all Swing programs\"\n",
|
||||
"text += \" — applets, as well as applications. Here are the steps you need to follow:\"\n",
|
||||
"text += \" Install the latest release of the Java SE platform, if you haven't already done so.\"\n",
|
||||
"text += \" Create a program that uses Swing components. Compile the program. Run the program.\"\n",
|
||||
"\n",
|
||||
"dictionary = ['program', 'application', 'applet', 'compile']\n",
|
||||
"\n",
|
||||
"def terminology_lookup(): \n",
|
||||
" for entry in dictionary:\n",
|
||||
" p = r\"\\b\" + entry + \"\\w+\" \n",
|
||||
" p1 = r\"\\b\" + entry\n",
|
||||
" re_pattern = re.compile(p1)\n",
|
||||
" match = re_pattern.finditer(text)\n",
|
||||
" for x in match:\n",
|
||||
" print(entry + str(x.span()))\n",
|
||||
" \n",
|
||||
"terminology_lookup()"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -407,7 +451,7 @@
|
|||
"author": "Rafał Jaworski",
|
||||
"email": "rjawor@amu.edu.pl",
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
|
@ -422,7 +466,7 @@
|
|||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.8.10"
|
||||
"version": "3.9.2"
|
||||
},
|
||||
"subtitle": "3. Terminologia",
|
||||
"title": "Komputerowe wspomaganie tłumaczenia",
|
||||
|
|
Loading…
Reference in New Issue