This commit is contained in:
kamil kubiak 2024-06-23 23:57:31 +02:00
parent 959cf021a0
commit bd8ec052b5
6 changed files with 2100 additions and 46 deletions

View File

@ -57,13 +57,62 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 6,
"id": "moving-clothing",
"metadata": {},
"outputs": [],
"outputs": [
{
"ename": "TypeError",
"evalue": "Fraction.__new__() got an unexpected keyword argument '_normalize'",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mTypeError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[1;32mIn[6], line 35\u001b[0m\n\u001b[0;32m 32\u001b[0m predictions \u001b[38;5;241m=\u001b[39m [\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mthe\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpicture\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mthe\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpicture\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mby\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mme\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[0;32m 34\u001b[0m \u001b[38;5;66;03m# Calculate BLEU score with weights\u001b[39;00m\n\u001b[1;32m---> 35\u001b[0m score \u001b[38;5;241m=\u001b[39m \u001b[43msentence_bleu\u001b[49m\u001b[43m(\u001b[49m\u001b[43mreference\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mpredictions\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mweights\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mweights\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 36\u001b[0m \u001b[38;5;28mprint\u001b[39m(score)\n",
"File \u001b[1;32m~\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\nltk\\translate\\bleu_score.py:107\u001b[0m, in \u001b[0;36msentence_bleu\u001b[1;34m(references, hypothesis, weights, smoothing_function, auto_reweigh)\u001b[0m\n\u001b[0;32m 20\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21msentence_bleu\u001b[39m(\n\u001b[0;32m 21\u001b[0m references,\n\u001b[0;32m 22\u001b[0m hypothesis,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 25\u001b[0m auto_reweigh\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m,\n\u001b[0;32m 26\u001b[0m ):\n\u001b[0;32m 27\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m 28\u001b[0m \u001b[38;5;124;03m Calculate BLEU score (Bilingual Evaluation Understudy) from\u001b[39;00m\n\u001b[0;32m 29\u001b[0m \u001b[38;5;124;03m Papineni, Kishore, Salim Roukos, Todd Ward, and Wei-Jing Zhu. 2002.\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 105\u001b[0m \u001b[38;5;124;03m :rtype: float / list(float)\u001b[39;00m\n\u001b[0;32m 106\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[1;32m--> 107\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mcorpus_bleu\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 108\u001b[0m \u001b[43m \u001b[49m\u001b[43m[\u001b[49m\u001b[43mreferences\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m[\u001b[49m\u001b[43mhypothesis\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mweights\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msmoothing_function\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mauto_reweigh\u001b[49m\n\u001b[0;32m 109\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[1;32m~\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\nltk\\translate\\bleu_score.py:210\u001b[0m, in \u001b[0;36mcorpus_bleu\u001b[1;34m(list_of_references, hypotheses, weights, smoothing_function, auto_reweigh)\u001b[0m\n\u001b[0;32m 206\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m references, hypothesis \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mzip\u001b[39m(list_of_references, hypotheses):\n\u001b[0;32m 207\u001b[0m \u001b[38;5;66;03m# For each order of ngram, calculate the numerator and\u001b[39;00m\n\u001b[0;32m 208\u001b[0m \u001b[38;5;66;03m# denominator for the corpus-level modified precision.\u001b[39;00m\n\u001b[0;32m 209\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m i \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mrange\u001b[39m(\u001b[38;5;241m1\u001b[39m, max_weight_length \u001b[38;5;241m+\u001b[39m \u001b[38;5;241m1\u001b[39m):\n\u001b[1;32m--> 210\u001b[0m p_i \u001b[38;5;241m=\u001b[39m \u001b[43mmodified_precision\u001b[49m\u001b[43m(\u001b[49m\u001b[43mreferences\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mhypothesis\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mi\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 211\u001b[0m p_numerators[i] \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m p_i\u001b[38;5;241m.\u001b[39mnumerator\n\u001b[0;32m 212\u001b[0m p_denominators[i] \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m p_i\u001b[38;5;241m.\u001b[39mdenominator\n",
"File \u001b[1;32m~\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\nltk\\translate\\bleu_score.py:368\u001b[0m, in \u001b[0;36mmodified_precision\u001b[1;34m(references, hypothesis, n)\u001b[0m\n\u001b[0;32m 364\u001b[0m \u001b[38;5;66;03m# Ensures that denominator is minimum 1 to avoid ZeroDivisionError.\u001b[39;00m\n\u001b[0;32m 365\u001b[0m \u001b[38;5;66;03m# Usually this happens when the ngram order is > len(reference).\u001b[39;00m\n\u001b[0;32m 366\u001b[0m denominator \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mmax\u001b[39m(\u001b[38;5;241m1\u001b[39m, \u001b[38;5;28msum\u001b[39m(counts\u001b[38;5;241m.\u001b[39mvalues()))\n\u001b[1;32m--> 368\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mFraction\u001b[49m\u001b[43m(\u001b[49m\u001b[43mnumerator\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdenominator\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m_normalize\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m)\u001b[49m\n",
"\u001b[1;31mTypeError\u001b[0m: Fraction.__new__() got an unexpected keyword argument '_normalize'"
]
}
],
"source": [
"def calculate_bleu():\n",
" return 0"
"#!pip install nltk\n",
"#from nltk.translate.bleu_score import sentence_bleu, corpus_bleu\n",
"#import zipfile\n",
"#archive = zipfile.ZipFile('./data/corpus_corrected.zip', 'r')\n",
"#reference1 = archive.read('corpus_the_human.txt')\n",
"#reference2 = archive.read('corpus_the_nmt.txt')\n",
"#translation = archive.read('corpus_en.txt')\n",
"\n",
"# Prepare the reference sentences\n",
"#reference1 = ['I', 'love', 'eating', 'ice', 'cream']\n",
"#reference2 = ['I', 'enjoy', 'eating', 'ice', 'cream']\n",
"#translation = ['I', 'love', 'eating', 'ice', 'cream']\n",
"\n",
"#bleu_score = sentence_bleu([reference1, reference2], translation)\n",
"#print(\"BLEU Score: \", bleu_score)\n",
"# Calculate the BLEU score for a single sentence\n",
"\n",
"#def calculate_bleu(reference1,reference2,translation):\n",
"# bleu_score = sentence_bleu([reference1, reference2], translation)\n",
"# print(\"BLEU Score: \", bleu_score)\n",
"# return bleu_score\n",
"#\n",
"#calculate_bleu(reference1,reference2,translation)\n",
"\n",
"# V2\n",
"from nltk.translate.bleu_score import sentence_bleu, corpus_bleu \n",
"# Define your desired weights (example: higher weight for bi-grams)\n",
"weights = (0.25, 0.25, 0, 0) # Weights for uni-gram, bi-gram, tri-gram, and 4-gram \n",
"# Reference and predicted texts (same as before)\n",
"reference = [[\"the\", \"picture\", \"is\", \"clicked\", \"by\", \"me\"],\n",
" [\"this\", \"picture\", \"was\", \"clicked\", \"by\", \"me\"]]\n",
"predictions = [\"the\", \"picture\", \"the\", \"picture\", \"by\", \"me\"]\n",
" \n",
"# Calculate BLEU score with weights\n",
"score = sentence_bleu(reference, predictions, weights=weights)\n",
"print(score)"
]
},
{
@ -190,15 +239,12 @@
"metadata": {
"author": "Rafał Jaworski",
"email": "rjawor@amu.edu.pl",
"lang": "pl",
"subtitle": "8. Wykorzystanie tłumaczenia automatycznego we wspomaganiu tłumaczenia",
"title": "Komputerowe wspomaganie tłumaczenia",
"year": "2021",
"kernelspec": {
"display_name": "Python 3",
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"lang": "pl",
"language_info": {
"codemirror_mode": {
"name": "ipython",
@ -209,8 +255,11 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.10"
}
"version": "3.12.2"
},
"subtitle": "8. Wykorzystanie tłumaczenia automatycznego we wspomaganiu tłumaczenia",
"title": "Komputerowe wspomaganie tłumaczenia",
"year": "2021"
},
"nbformat": 4,
"nbformat_minor": 5

File diff suppressed because one or more lines are too long

View File

@ -187,15 +187,12 @@
"metadata": {
"author": "Rafał Jaworski",
"email": "rjawor@amu.edu.pl",
"lang": "pl",
"subtitle": "11. Urównoleglanie",
"title": "Komputerowe wspomaganie tłumaczenia",
"year": "2021",
"kernelspec": {
"display_name": "Python 3",
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"lang": "pl",
"language_info": {
"codemirror_mode": {
"name": "ipython",
@ -206,8 +203,11 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.10"
}
"version": "3.12.2"
},
"subtitle": "11. Urównoleglanie",
"title": "Komputerowe wspomaganie tłumaczenia",
"year": "2021"
},
"nbformat": 4,
"nbformat_minor": 5

View File

@ -154,15 +154,12 @@
"metadata": {
"author": "Rafał Jaworski",
"email": "rjawor@amu.edu.pl",
"lang": "pl",
"subtitle": "12. Key logging",
"title": "Komputerowe wspomaganie tłumaczenia",
"year": "2021",
"kernelspec": {
"display_name": "Python 3",
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"lang": "pl",
"language_info": {
"codemirror_mode": {
"name": "ipython",
@ -173,8 +170,11 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.10"
}
"version": "3.12.2"
},
"subtitle": "12. Key logging",
"title": "Komputerowe wspomaganie tłumaczenia",
"year": "2021"
},
"nbformat": 4,
"nbformat_minor": 5

View File

@ -201,7 +201,7 @@
"author": "Rafał Jaworski",
"email": "rjawor@amu.edu.pl",
"kernelspec": {
"display_name": "Python 3",
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
@ -216,7 +216,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.10"
"version": "3.12.2"
},
"subtitle": "13,14. Korekta pisowni",
"title": "Komputerowe wspomaganie tłumaczenia",

View File

@ -155,7 +155,7 @@
"author": "Rafał Jaworski",
"email": "rjawor@amu.edu.pl",
"kernelspec": {
"display_name": "Python 3",
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
@ -170,7 +170,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.10"
"version": "3.12.2"
},
"subtitle": "15. Korekta gramatyczna",
"title": "Komputerowe wspomaganie tłumaczenia",