forked from bfijalkowski/KWT-2024
Compare commits
No commits in common. "main" and "main" have entirely different histories.
132
lab/lab_01.ipynb
132
lab/lab_01.ipynb
@ -251,40 +251,10 @@
|
|||||||
"tm_lookup('Wciśnij przycisk [ENTER]')"
|
"tm_lookup('Wciśnij przycisk [ENTER]')"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 1,
|
|
||||||
"id": "36510584-d363-444d-bfdb-2f0260d197cd",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"data": {
|
|
||||||
"text/plain": [
|
|
||||||
"['Press the ENTER button']"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"execution_count": 1,
|
|
||||||
"metadata": {},
|
|
||||||
"output_type": "execute_result"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"translation_memory = [('Wciśnij przycisk Enter', 'Press the ENTER button'), \n",
|
|
||||||
" ('Sprawdź ustawienia sieciowe', 'Check the network settings')]\n",
|
|
||||||
"\n",
|
|
||||||
"def tm_lookup(sentence):\n",
|
|
||||||
" return [entry[1] for entry in translation_memory if entry[0].lower() == sentence.lower()]\n",
|
|
||||||
"\n",
|
|
||||||
"#tm_lookup('Wciśnij przycisk Enter') # -> ['Press the ENTER button'] OK\n",
|
|
||||||
"tm_lookup('Wciśnij przycisk ENTER') # -> ['Press the ENTER button'] OK"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"id": "choice-committee",
|
"id": "choice-committee",
|
||||||
"metadata": {
|
"metadata": {},
|
||||||
"tags": []
|
|
||||||
},
|
|
||||||
"source": [
|
"source": [
|
||||||
"### Ćwiczenie 2: zmodyfikuj funkcję tm_lookup w taki sposób, aby nie brała pod uwagę znaków interpunkcyjnych. Rada - zdefiniuj funkcję sentence_similar."
|
"### Ćwiczenie 2: zmodyfikuj funkcję tm_lookup w taki sposób, aby nie brała pod uwagę znaków interpunkcyjnych. Rada - zdefiniuj funkcję sentence_similar."
|
||||||
]
|
]
|
||||||
@ -329,66 +299,6 @@
|
|||||||
"tm_lookup('Wymagane ponowne uruchomienie maszyny')"
|
"tm_lookup('Wymagane ponowne uruchomienie maszyny')"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 24,
|
|
||||||
"id": "25af95de-30e0-468c-8c0c-2478ce0d8856",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"name": "stdout",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"wciśnijprzyciskenter\n",
|
|
||||||
"wciśnijprzyciskenter\n"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"data": {
|
|
||||||
"text/plain": [
|
|
||||||
"'Press the ENTER button'"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"execution_count": 24,
|
|
||||||
"metadata": {},
|
|
||||||
"output_type": "execute_result"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"import re\n",
|
|
||||||
"translation_memory = [('Wciśnij przycisk Enter', 'Press the ENTER button'), \n",
|
|
||||||
" ('Sprawdź ustawienia sieciowe', 'Check the network settings')]\n",
|
|
||||||
"\n",
|
|
||||||
"def sentence_cleanup(sentence):\n",
|
|
||||||
" x = sentence.replace(\" \", \"\")\n",
|
|
||||||
" x = x.replace(\"\\s\", \"\")\n",
|
|
||||||
" x = x.replace(\"\\t\", \"\")\n",
|
|
||||||
" x = x.replace(\"\\v\", \"\")\n",
|
|
||||||
" x = x.replace(\"\\n\", \"\")\n",
|
|
||||||
" x = x.replace(\"\\r\", \"\")\n",
|
|
||||||
" x = x.replace(\"\\f\", \"\")\n",
|
|
||||||
" x = x.replace(\".\", \"\")\n",
|
|
||||||
" x = x.replace(\"!\", \"\")\n",
|
|
||||||
" x = x.replace(\":\", \"\")\n",
|
|
||||||
" x = x.replace('?', \"\")\n",
|
|
||||||
" x = x.replace(\";\", \"\")\n",
|
|
||||||
" x = x.replace('\"', \"\")\n",
|
|
||||||
" x = x.lower()\n",
|
|
||||||
" print(x)\n",
|
|
||||||
" return x\n",
|
|
||||||
"\n",
|
|
||||||
"def tm_lookup(sentence):\n",
|
|
||||||
" for entry in translation_memory:\n",
|
|
||||||
" part1 = entry[0]\n",
|
|
||||||
" part2 = entry[1]\n",
|
|
||||||
" better_sentence = sentence_cleanup(sentence)\n",
|
|
||||||
" better_entry = sentence_cleanup(part1)\n",
|
|
||||||
" if better_entry == better_sentence:\n",
|
|
||||||
" return part2\n",
|
|
||||||
"\n",
|
|
||||||
"tm_lookup('Wciśnij przycisk:ENTER!') # -> ['Press the ENTER button'] OK"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"id": "guided-tutorial",
|
"id": "guided-tutorial",
|
||||||
@ -499,34 +409,6 @@
|
|||||||
"Odpowiedź:"
|
"Odpowiedź:"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 31,
|
|
||||||
"id": "6347bc85-194d-45c4-a30d-5e5e06033821",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"data": {
|
|
||||||
"text/plain": [
|
|
||||||
"[('przycisk', 'button'), ('drukarka', 'printer')]"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"execution_count": 31,
|
|
||||||
"metadata": {},
|
|
||||||
"output_type": "execute_result"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"glossary = [('komputer', 'computer'), ('przycisk', 'button'), ('drukarka', 'printer')]\n",
|
|
||||||
"\n",
|
|
||||||
"def glossary_lookup(sentence):\n",
|
|
||||||
" better_sentence = sentence.lower()\n",
|
|
||||||
" sentence_words = better_sentence.split()\n",
|
|
||||||
" return [entry for entry in glossary if entry[0] in sentence_words]\n",
|
|
||||||
"\n",
|
|
||||||
"glossary_lookup('Każda DRUKARKA posiada PrzycisK wznowienia drukowania')"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"id": "objective-matthew",
|
"id": "objective-matthew",
|
||||||
@ -542,14 +424,8 @@
|
|||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"glossary = [('komputer', 'computer'), ('przycisk', 'button'), ('drukarka', 'printer')]\n",
|
|
||||||
"\n",
|
|
||||||
"def glossary_lookup(sentence):\n",
|
"def glossary_lookup(sentence):\n",
|
||||||
" better_sentence = sentence.lower()\n",
|
" return ''"
|
||||||
" sentence_words = better_sentence.split()\n",
|
|
||||||
" return [entry for entry in glossary if entry[0] in sentence_words]\n",
|
|
||||||
"\n",
|
|
||||||
"glossary_lookup('Każda DRUKARKA posiada PrzycisK wznowienia drukowania')"
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -576,7 +452,7 @@
|
|||||||
"author": "Rafał Jaworski",
|
"author": "Rafał Jaworski",
|
||||||
"email": "rjawor@amu.edu.pl",
|
"email": "rjawor@amu.edu.pl",
|
||||||
"kernelspec": {
|
"kernelspec": {
|
||||||
"display_name": "Python 3 (ipykernel)",
|
"display_name": "Python 3",
|
||||||
"language": "python",
|
"language": "python",
|
||||||
"name": "python3"
|
"name": "python3"
|
||||||
},
|
},
|
||||||
@ -591,7 +467,7 @@
|
|||||||
"name": "python",
|
"name": "python",
|
||||||
"nbconvert_exporter": "python",
|
"nbconvert_exporter": "python",
|
||||||
"pygments_lexer": "ipython3",
|
"pygments_lexer": "ipython3",
|
||||||
"version": "3.9.2"
|
"version": "3.8.10"
|
||||||
},
|
},
|
||||||
"subtitle": "1. Podstawowe techniki wspomagania tłumaczenia",
|
"subtitle": "1. Podstawowe techniki wspomagania tłumaczenia",
|
||||||
"title": "Komputerowe wspomaganie tłumaczenia",
|
"title": "Komputerowe wspomaganie tłumaczenia",
|
||||||
|
@ -223,17 +223,17 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 7,
|
"execution_count": 5,
|
||||||
"id": "secondary-wrist",
|
"id": "secondary-wrist",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"data": {
|
"data": {
|
||||||
"text/plain": [
|
"text/plain": [
|
||||||
"3"
|
"2"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"execution_count": 8,
|
"execution_count": 5,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"output_type": "execute_result"
|
"output_type": "execute_result"
|
||||||
}
|
}
|
||||||
@ -344,70 +344,21 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 19,
|
"execution_count": 10,
|
||||||
"id": "genetic-cradle",
|
"id": "genetic-cradle",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [],
|
||||||
{
|
|
||||||
"name": "stdout",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"Wciśnij przycisk Enter -> Wciśnij przycisk Enter [ SCORE = 1.0 ]\n",
|
|
||||||
"Wciśnij przycisk Enter -> Wciśnij przycisk ENTER [ SCORE = 0.8181818181818181 ]\n",
|
|
||||||
"Wciśnij przycisk Enter -> Wciśnij przycisk Enter! [ SCORE = 0.9565217391304348 ]\n"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
"source": [
|
||||||
"#!pip3 install python-Levenshtein\n",
|
|
||||||
"from Levenshtein import distance as levenshtein_distance\n",
|
|
||||||
"\n",
|
|
||||||
"translation_memory = [\n",
|
|
||||||
" ('Wciśnij przycisk Enter', 'Press the ENTER button'),\n",
|
|
||||||
" ('Wciśnij przycisk ENTER', 'Press the ENTER button'), \n",
|
|
||||||
" ('Wciśnij przycisk Enter!', 'Press the ENTER button!'), \n",
|
|
||||||
" ('Wciśnij przycisk', 'Press the button'), \n",
|
|
||||||
" ('Wciśnij Enter', 'Press the ENTER'), \n",
|
|
||||||
" ('Sprawdź ustawienia sieciowe', 'Check the network settings'),\n",
|
|
||||||
" ('Drukarka jest wyłączona', 'The printer is switched off'),\n",
|
|
||||||
" ('Wymagane ponowne uruchomienie komputera', 'System restart required')\n",
|
|
||||||
" ]\n",
|
|
||||||
"\n",
|
|
||||||
"def levenshtein_similarity(x,y):\n",
|
|
||||||
" return 1 - levenshtein_distance(x,y) / max(len(x), len(y))\n",
|
|
||||||
"\n",
|
|
||||||
"def fuzzy_lookup(sentence, threshold):\n",
|
"def fuzzy_lookup(sentence, threshold):\n",
|
||||||
" for entry in translation_memory:\n",
|
" return []"
|
||||||
" part1 = entry[0]\n",
|
|
||||||
" score = levenshtein_similarity(sentence, part1)\n",
|
|
||||||
" if score >= threshold:\n",
|
|
||||||
" print(sentence + ' -> ' + part1 + ' [ SCORE = ' + str(score) + ' ]')\n",
|
|
||||||
"\n",
|
|
||||||
"fuzzy_lookup('Wciśnij przycisk Enter', 0.8)"
|
|
||||||
]
|
]
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"id": "8b7bb6aa-5aaf-4f49-84ab-edbe6797d568",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": []
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": null,
|
|
||||||
"id": "3b095fff-71a8-44a9-b809-d872ee9a7b62",
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": []
|
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
"author": "Rafał Jaworski",
|
"author": "Rafał Jaworski",
|
||||||
"email": "rjawor@amu.edu.pl",
|
"email": "rjawor@amu.edu.pl",
|
||||||
"kernelspec": {
|
"kernelspec": {
|
||||||
"display_name": "Python 3 (ipykernel)",
|
"display_name": "Python 3",
|
||||||
"language": "python",
|
"language": "python",
|
||||||
"name": "python3"
|
"name": "python3"
|
||||||
},
|
},
|
||||||
@ -422,7 +373,7 @@
|
|||||||
"name": "python",
|
"name": "python",
|
||||||
"nbconvert_exporter": "python",
|
"nbconvert_exporter": "python",
|
||||||
"pygments_lexer": "ipython3",
|
"pygments_lexer": "ipython3",
|
||||||
"version": "3.9.2"
|
"version": "3.8.10"
|
||||||
},
|
},
|
||||||
"subtitle": "2. Zaawansowane użycie pamięci tłumaczeń",
|
"subtitle": "2. Zaawansowane użycie pamięci tłumaczeń",
|
||||||
"title": "Komputerowe wspomaganie tłumaczenia",
|
"title": "Komputerowe wspomaganie tłumaczenia",
|
||||||
|
@ -63,14 +63,7 @@
|
|||||||
"id": "diverse-sunglasses",
|
"id": "diverse-sunglasses",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"Odpowiedź:\n",
|
"Odpowiedź:"
|
||||||
"\n",
|
|
||||||
"slides of metal cabinet\n",
|
|
||||||
"\n",
|
|
||||||
"Źródła - tłumaczono z PL -> ENG oraz ENG -> PL\n",
|
|
||||||
"https://translate.google.pl/?hl=pl&sl=en&tl=pl&text=metal%20cabinet%20slides&op=translate\n",
|
|
||||||
"\n",
|
|
||||||
"https://www.deepl.com/pl/translator#en/pl/slides%20of%20metal%20cabinet"
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -135,50 +128,13 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 97,
|
"execution_count": 3,
|
||||||
"id": "cognitive-cedar",
|
"id": "cognitive-cedar",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [],
|
||||||
{
|
|
||||||
"name": "stdout",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"program(14, 21)\n",
|
|
||||||
"program(291, 298)\n",
|
|
||||||
"program(468, 475)\n",
|
|
||||||
"program(516, 523)\n",
|
|
||||||
"program(533, 540)\n",
|
|
||||||
"application(80, 91)\n",
|
|
||||||
"application(164, 175)\n",
|
|
||||||
"application(322, 333)\n",
|
|
||||||
"applet(302, 308)\n",
|
|
||||||
"compile(56, 63)\n"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
"source": [
|
||||||
"import re\n",
|
|
||||||
"\n",
|
|
||||||
"text = \" For all Java programmers:\"\n",
|
|
||||||
"text += \" This section explains how to compile and run a Swing application from the command line.\"\n",
|
|
||||||
"text += \" For information on compiling and running a Swing application using NetBeans IDE,\"\n",
|
|
||||||
"text += \" see Running Tutorial Examples in NetBeans IDE. The compilation instructions work for all Swing programs\"\n",
|
|
||||||
"text += \" — applets, as well as applications. Here are the steps you need to follow:\"\n",
|
|
||||||
"text += \" Install the latest release of the Java SE platform, if you haven't already done so.\"\n",
|
|
||||||
"text += \" Create a program that uses Swing components. Compile the program. Run the program.\"\n",
|
|
||||||
"\n",
|
|
||||||
"dictionary = ['program', 'application', 'applet', 'compile']\n",
|
|
||||||
"\n",
|
|
||||||
"def terminology_lookup():\n",
|
"def terminology_lookup():\n",
|
||||||
" for entry in dictionary:\n",
|
" return []"
|
||||||
" p = r\"\\b\" + entry + \"\\w+\" \n",
|
|
||||||
" p1 = r\"\\b\" + entry\n",
|
|
||||||
" re_pattern = re.compile(p1)\n",
|
|
||||||
" match = re_pattern.finditer(text)\n",
|
|
||||||
" for x in match:\n",
|
|
||||||
" print(entry + str(x.span()))\n",
|
|
||||||
" \n",
|
|
||||||
"terminology_lookup()"
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -451,7 +407,7 @@
|
|||||||
"author": "Rafał Jaworski",
|
"author": "Rafał Jaworski",
|
||||||
"email": "rjawor@amu.edu.pl",
|
"email": "rjawor@amu.edu.pl",
|
||||||
"kernelspec": {
|
"kernelspec": {
|
||||||
"display_name": "Python 3 (ipykernel)",
|
"display_name": "Python 3",
|
||||||
"language": "python",
|
"language": "python",
|
||||||
"name": "python3"
|
"name": "python3"
|
||||||
},
|
},
|
||||||
@ -466,7 +422,7 @@
|
|||||||
"name": "python",
|
"name": "python",
|
||||||
"nbconvert_exporter": "python",
|
"nbconvert_exporter": "python",
|
||||||
"pygments_lexer": "ipython3",
|
"pygments_lexer": "ipython3",
|
||||||
"version": "3.9.2"
|
"version": "3.8.10"
|
||||||
},
|
},
|
||||||
"subtitle": "3. Terminologia",
|
"subtitle": "3. Terminologia",
|
||||||
"title": "Komputerowe wspomaganie tłumaczenia",
|
"title": "Komputerowe wspomaganie tłumaczenia",
|
||||||
|
@ -57,62 +57,13 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 6,
|
"execution_count": 1,
|
||||||
"id": "moving-clothing",
|
"id": "moving-clothing",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [],
|
||||||
{
|
|
||||||
"ename": "TypeError",
|
|
||||||
"evalue": "Fraction.__new__() got an unexpected keyword argument '_normalize'",
|
|
||||||
"output_type": "error",
|
|
||||||
"traceback": [
|
|
||||||
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
|
|
||||||
"\u001b[1;31mTypeError\u001b[0m Traceback (most recent call last)",
|
|
||||||
"Cell \u001b[1;32mIn[6], line 35\u001b[0m\n\u001b[0;32m 32\u001b[0m predictions \u001b[38;5;241m=\u001b[39m [\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mthe\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpicture\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mthe\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpicture\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mby\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mme\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[0;32m 34\u001b[0m \u001b[38;5;66;03m# Calculate BLEU score with weights\u001b[39;00m\n\u001b[1;32m---> 35\u001b[0m score \u001b[38;5;241m=\u001b[39m \u001b[43msentence_bleu\u001b[49m\u001b[43m(\u001b[49m\u001b[43mreference\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mpredictions\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mweights\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mweights\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 36\u001b[0m \u001b[38;5;28mprint\u001b[39m(score)\n",
|
|
||||||
"File \u001b[1;32m~\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\nltk\\translate\\bleu_score.py:107\u001b[0m, in \u001b[0;36msentence_bleu\u001b[1;34m(references, hypothesis, weights, smoothing_function, auto_reweigh)\u001b[0m\n\u001b[0;32m 20\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21msentence_bleu\u001b[39m(\n\u001b[0;32m 21\u001b[0m references,\n\u001b[0;32m 22\u001b[0m hypothesis,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 25\u001b[0m auto_reweigh\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m,\n\u001b[0;32m 26\u001b[0m ):\n\u001b[0;32m 27\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m 28\u001b[0m \u001b[38;5;124;03m Calculate BLEU score (Bilingual Evaluation Understudy) from\u001b[39;00m\n\u001b[0;32m 29\u001b[0m \u001b[38;5;124;03m Papineni, Kishore, Salim Roukos, Todd Ward, and Wei-Jing Zhu. 2002.\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 105\u001b[0m \u001b[38;5;124;03m :rtype: float / list(float)\u001b[39;00m\n\u001b[0;32m 106\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[1;32m--> 107\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mcorpus_bleu\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 108\u001b[0m \u001b[43m \u001b[49m\u001b[43m[\u001b[49m\u001b[43mreferences\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m[\u001b[49m\u001b[43mhypothesis\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mweights\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msmoothing_function\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mauto_reweigh\u001b[49m\n\u001b[0;32m 109\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n",
|
|
||||||
"File \u001b[1;32m~\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\nltk\\translate\\bleu_score.py:210\u001b[0m, in \u001b[0;36mcorpus_bleu\u001b[1;34m(list_of_references, hypotheses, weights, smoothing_function, auto_reweigh)\u001b[0m\n\u001b[0;32m 206\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m references, hypothesis \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mzip\u001b[39m(list_of_references, hypotheses):\n\u001b[0;32m 207\u001b[0m \u001b[38;5;66;03m# For each order of ngram, calculate the numerator and\u001b[39;00m\n\u001b[0;32m 208\u001b[0m \u001b[38;5;66;03m# denominator for the corpus-level modified precision.\u001b[39;00m\n\u001b[0;32m 209\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m i \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mrange\u001b[39m(\u001b[38;5;241m1\u001b[39m, max_weight_length \u001b[38;5;241m+\u001b[39m \u001b[38;5;241m1\u001b[39m):\n\u001b[1;32m--> 210\u001b[0m p_i \u001b[38;5;241m=\u001b[39m \u001b[43mmodified_precision\u001b[49m\u001b[43m(\u001b[49m\u001b[43mreferences\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mhypothesis\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mi\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 211\u001b[0m p_numerators[i] \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m p_i\u001b[38;5;241m.\u001b[39mnumerator\n\u001b[0;32m 212\u001b[0m p_denominators[i] \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m p_i\u001b[38;5;241m.\u001b[39mdenominator\n",
|
|
||||||
"File \u001b[1;32m~\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\nltk\\translate\\bleu_score.py:368\u001b[0m, in \u001b[0;36mmodified_precision\u001b[1;34m(references, hypothesis, n)\u001b[0m\n\u001b[0;32m 364\u001b[0m \u001b[38;5;66;03m# Ensures that denominator is minimum 1 to avoid ZeroDivisionError.\u001b[39;00m\n\u001b[0;32m 365\u001b[0m \u001b[38;5;66;03m# Usually this happens when the ngram order is > len(reference).\u001b[39;00m\n\u001b[0;32m 366\u001b[0m denominator \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mmax\u001b[39m(\u001b[38;5;241m1\u001b[39m, \u001b[38;5;28msum\u001b[39m(counts\u001b[38;5;241m.\u001b[39mvalues()))\n\u001b[1;32m--> 368\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mFraction\u001b[49m\u001b[43m(\u001b[49m\u001b[43mnumerator\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdenominator\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m_normalize\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m)\u001b[49m\n",
|
|
||||||
"\u001b[1;31mTypeError\u001b[0m: Fraction.__new__() got an unexpected keyword argument '_normalize'"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
"source": [
|
||||||
"#!pip install nltk\n",
|
"def calculate_bleu():\n",
|
||||||
"#from nltk.translate.bleu_score import sentence_bleu, corpus_bleu\n",
|
" return 0"
|
||||||
"#import zipfile\n",
|
|
||||||
"#archive = zipfile.ZipFile('./data/corpus_corrected.zip', 'r')\n",
|
|
||||||
"#reference1 = archive.read('corpus_the_human.txt')\n",
|
|
||||||
"#reference2 = archive.read('corpus_the_nmt.txt')\n",
|
|
||||||
"#translation = archive.read('corpus_en.txt')\n",
|
|
||||||
"\n",
|
|
||||||
"# Prepare the reference sentences\n",
|
|
||||||
"#reference1 = ['I', 'love', 'eating', 'ice', 'cream']\n",
|
|
||||||
"#reference2 = ['I', 'enjoy', 'eating', 'ice', 'cream']\n",
|
|
||||||
"#translation = ['I', 'love', 'eating', 'ice', 'cream']\n",
|
|
||||||
"\n",
|
|
||||||
"#bleu_score = sentence_bleu([reference1, reference2], translation)\n",
|
|
||||||
"#print(\"BLEU Score: \", bleu_score)\n",
|
|
||||||
"# Calculate the BLEU score for a single sentence\n",
|
|
||||||
"\n",
|
|
||||||
"#def calculate_bleu(reference1,reference2,translation):\n",
|
|
||||||
"# bleu_score = sentence_bleu([reference1, reference2], translation)\n",
|
|
||||||
"# print(\"BLEU Score: \", bleu_score)\n",
|
|
||||||
"# return bleu_score\n",
|
|
||||||
"#\n",
|
|
||||||
"#calculate_bleu(reference1,reference2,translation)\n",
|
|
||||||
"\n",
|
|
||||||
"# V2\n",
|
|
||||||
"from nltk.translate.bleu_score import sentence_bleu, corpus_bleu \n",
|
|
||||||
"# Define your desired weights (example: higher weight for bi-grams)\n",
|
|
||||||
"weights = (0.25, 0.25, 0, 0) # Weights for uni-gram, bi-gram, tri-gram, and 4-gram \n",
|
|
||||||
"# Reference and predicted texts (same as before)\n",
|
|
||||||
"reference = [[\"the\", \"picture\", \"is\", \"clicked\", \"by\", \"me\"],\n",
|
|
||||||
" [\"this\", \"picture\", \"was\", \"clicked\", \"by\", \"me\"]]\n",
|
|
||||||
"predictions = [\"the\", \"picture\", \"the\", \"picture\", \"by\", \"me\"]\n",
|
|
||||||
" \n",
|
|
||||||
"# Calculate BLEU score with weights\n",
|
|
||||||
"score = sentence_bleu(reference, predictions, weights=weights)\n",
|
|
||||||
"print(score)"
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -239,12 +190,15 @@
|
|||||||
"metadata": {
|
"metadata": {
|
||||||
"author": "Rafał Jaworski",
|
"author": "Rafał Jaworski",
|
||||||
"email": "rjawor@amu.edu.pl",
|
"email": "rjawor@amu.edu.pl",
|
||||||
|
"lang": "pl",
|
||||||
|
"subtitle": "8. Wykorzystanie tłumaczenia automatycznego we wspomaganiu tłumaczenia",
|
||||||
|
"title": "Komputerowe wspomaganie tłumaczenia",
|
||||||
|
"year": "2021",
|
||||||
"kernelspec": {
|
"kernelspec": {
|
||||||
"display_name": "Python 3 (ipykernel)",
|
"display_name": "Python 3",
|
||||||
"language": "python",
|
"language": "python",
|
||||||
"name": "python3"
|
"name": "python3"
|
||||||
},
|
},
|
||||||
"lang": "pl",
|
|
||||||
"language_info": {
|
"language_info": {
|
||||||
"codemirror_mode": {
|
"codemirror_mode": {
|
||||||
"name": "ipython",
|
"name": "ipython",
|
||||||
@ -255,11 +209,8 @@
|
|||||||
"name": "python",
|
"name": "python",
|
||||||
"nbconvert_exporter": "python",
|
"nbconvert_exporter": "python",
|
||||||
"pygments_lexer": "ipython3",
|
"pygments_lexer": "ipython3",
|
||||||
"version": "3.12.2"
|
"version": "3.8.10"
|
||||||
},
|
}
|
||||||
"subtitle": "8. Wykorzystanie tłumaczenia automatycznego we wspomaganiu tłumaczenia",
|
|
||||||
"title": "Komputerowe wspomaganie tłumaczenia",
|
|
||||||
"year": "2021"
|
|
||||||
},
|
},
|
||||||
"nbformat": 4,
|
"nbformat": 4,
|
||||||
"nbformat_minor": 5
|
"nbformat_minor": 5
|
||||||
|
2043
lab/lab_09-10.ipynb
2043
lab/lab_09-10.ipynb
File diff suppressed because one or more lines are too long
@ -187,12 +187,15 @@
|
|||||||
"metadata": {
|
"metadata": {
|
||||||
"author": "Rafał Jaworski",
|
"author": "Rafał Jaworski",
|
||||||
"email": "rjawor@amu.edu.pl",
|
"email": "rjawor@amu.edu.pl",
|
||||||
|
"lang": "pl",
|
||||||
|
"subtitle": "11. Urównoleglanie",
|
||||||
|
"title": "Komputerowe wspomaganie tłumaczenia",
|
||||||
|
"year": "2021",
|
||||||
"kernelspec": {
|
"kernelspec": {
|
||||||
"display_name": "Python 3 (ipykernel)",
|
"display_name": "Python 3",
|
||||||
"language": "python",
|
"language": "python",
|
||||||
"name": "python3"
|
"name": "python3"
|
||||||
},
|
},
|
||||||
"lang": "pl",
|
|
||||||
"language_info": {
|
"language_info": {
|
||||||
"codemirror_mode": {
|
"codemirror_mode": {
|
||||||
"name": "ipython",
|
"name": "ipython",
|
||||||
@ -203,11 +206,8 @@
|
|||||||
"name": "python",
|
"name": "python",
|
||||||
"nbconvert_exporter": "python",
|
"nbconvert_exporter": "python",
|
||||||
"pygments_lexer": "ipython3",
|
"pygments_lexer": "ipython3",
|
||||||
"version": "3.12.2"
|
"version": "3.8.10"
|
||||||
},
|
}
|
||||||
"subtitle": "11. Urównoleglanie",
|
|
||||||
"title": "Komputerowe wspomaganie tłumaczenia",
|
|
||||||
"year": "2021"
|
|
||||||
},
|
},
|
||||||
"nbformat": 4,
|
"nbformat": 4,
|
||||||
"nbformat_minor": 5
|
"nbformat_minor": 5
|
||||||
|
@ -154,12 +154,15 @@
|
|||||||
"metadata": {
|
"metadata": {
|
||||||
"author": "Rafał Jaworski",
|
"author": "Rafał Jaworski",
|
||||||
"email": "rjawor@amu.edu.pl",
|
"email": "rjawor@amu.edu.pl",
|
||||||
|
"lang": "pl",
|
||||||
|
"subtitle": "12. Key logging",
|
||||||
|
"title": "Komputerowe wspomaganie tłumaczenia",
|
||||||
|
"year": "2021",
|
||||||
"kernelspec": {
|
"kernelspec": {
|
||||||
"display_name": "Python 3 (ipykernel)",
|
"display_name": "Python 3",
|
||||||
"language": "python",
|
"language": "python",
|
||||||
"name": "python3"
|
"name": "python3"
|
||||||
},
|
},
|
||||||
"lang": "pl",
|
|
||||||
"language_info": {
|
"language_info": {
|
||||||
"codemirror_mode": {
|
"codemirror_mode": {
|
||||||
"name": "ipython",
|
"name": "ipython",
|
||||||
@ -170,11 +173,8 @@
|
|||||||
"name": "python",
|
"name": "python",
|
||||||
"nbconvert_exporter": "python",
|
"nbconvert_exporter": "python",
|
||||||
"pygments_lexer": "ipython3",
|
"pygments_lexer": "ipython3",
|
||||||
"version": "3.12.2"
|
"version": "3.8.10"
|
||||||
},
|
}
|
||||||
"subtitle": "12. Key logging",
|
|
||||||
"title": "Komputerowe wspomaganie tłumaczenia",
|
|
||||||
"year": "2021"
|
|
||||||
},
|
},
|
||||||
"nbformat": 4,
|
"nbformat": 4,
|
||||||
"nbformat_minor": 5
|
"nbformat_minor": 5
|
||||||
|
@ -201,7 +201,7 @@
|
|||||||
"author": "Rafał Jaworski",
|
"author": "Rafał Jaworski",
|
||||||
"email": "rjawor@amu.edu.pl",
|
"email": "rjawor@amu.edu.pl",
|
||||||
"kernelspec": {
|
"kernelspec": {
|
||||||
"display_name": "Python 3 (ipykernel)",
|
"display_name": "Python 3",
|
||||||
"language": "python",
|
"language": "python",
|
||||||
"name": "python3"
|
"name": "python3"
|
||||||
},
|
},
|
||||||
@ -216,7 +216,7 @@
|
|||||||
"name": "python",
|
"name": "python",
|
||||||
"nbconvert_exporter": "python",
|
"nbconvert_exporter": "python",
|
||||||
"pygments_lexer": "ipython3",
|
"pygments_lexer": "ipython3",
|
||||||
"version": "3.12.2"
|
"version": "3.8.10"
|
||||||
},
|
},
|
||||||
"subtitle": "13,14. Korekta pisowni",
|
"subtitle": "13,14. Korekta pisowni",
|
||||||
"title": "Komputerowe wspomaganie tłumaczenia",
|
"title": "Komputerowe wspomaganie tłumaczenia",
|
||||||
|
@ -155,7 +155,7 @@
|
|||||||
"author": "Rafał Jaworski",
|
"author": "Rafał Jaworski",
|
||||||
"email": "rjawor@amu.edu.pl",
|
"email": "rjawor@amu.edu.pl",
|
||||||
"kernelspec": {
|
"kernelspec": {
|
||||||
"display_name": "Python 3 (ipykernel)",
|
"display_name": "Python 3",
|
||||||
"language": "python",
|
"language": "python",
|
||||||
"name": "python3"
|
"name": "python3"
|
||||||
},
|
},
|
||||||
@ -170,7 +170,7 @@
|
|||||||
"name": "python",
|
"name": "python",
|
||||||
"nbconvert_exporter": "python",
|
"nbconvert_exporter": "python",
|
||||||
"pygments_lexer": "ipython3",
|
"pygments_lexer": "ipython3",
|
||||||
"version": "3.12.2"
|
"version": "3.8.10"
|
||||||
},
|
},
|
||||||
"subtitle": "15. Korekta gramatyczna",
|
"subtitle": "15. Korekta gramatyczna",
|
||||||
"title": "Komputerowe wspomaganie tłumaczenia",
|
"title": "Komputerowe wspomaganie tłumaczenia",
|
||||||
|
Loading…
Reference in New Issue
Block a user