forked from bfijalkowski/KWT-2024
lab 1
This commit is contained in:
parent
71ca3b66ed
commit
ddd2833663
242
lab/lab_01.ipynb
242
lab/lab_01.ipynb
@ -52,9 +52,14 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 1,
|
"execution_count": 5,
|
||||||
"id": "narrow-romantic",
|
"id": "narrow-romantic",
|
||||||
"metadata": {},
|
"metadata": {
|
||||||
|
"ExecuteTime": {
|
||||||
|
"end_time": "2024-04-13T11:05:09.046685900Z",
|
||||||
|
"start_time": "2024-04-13T11:05:08.877692800Z"
|
||||||
|
}
|
||||||
|
},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"translation_memory = [('Wciśnij przycisk Enter', 'Press the ENTER button'), \n",
|
"translation_memory = [('Wciśnij przycisk Enter', 'Press the ENTER button'), \n",
|
||||||
@ -71,9 +76,14 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 2,
|
"execution_count": 6,
|
||||||
"id": "indonesian-electron",
|
"id": "indonesian-electron",
|
||||||
"metadata": {},
|
"metadata": {
|
||||||
|
"ExecuteTime": {
|
||||||
|
"end_time": "2024-04-13T11:05:09.131296300Z",
|
||||||
|
"start_time": "2024-04-13T11:05:08.893315Z"
|
||||||
|
}
|
||||||
|
},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"def tm_lookup(sentence):\n",
|
"def tm_lookup(sentence):\n",
|
||||||
@ -82,17 +92,20 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 3,
|
"execution_count": 7,
|
||||||
"id": "compact-trinidad",
|
"id": "compact-trinidad",
|
||||||
"metadata": {},
|
"metadata": {
|
||||||
|
"ExecuteTime": {
|
||||||
|
"end_time": "2024-04-13T11:05:09.162547Z",
|
||||||
|
"start_time": "2024-04-13T11:05:08.924558500Z"
|
||||||
|
}
|
||||||
|
},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"data": {
|
"data": {
|
||||||
"text/plain": [
|
"text/plain": "['Press the ENTER button']"
|
||||||
"['Press the ENTER button']"
|
|
||||||
]
|
|
||||||
},
|
},
|
||||||
"execution_count": 3,
|
"execution_count": 7,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"output_type": "execute_result"
|
"output_type": "execute_result"
|
||||||
}
|
}
|
||||||
@ -119,9 +132,14 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 4,
|
"execution_count": 8,
|
||||||
"id": "exposed-daniel",
|
"id": "exposed-daniel",
|
||||||
"metadata": {},
|
"metadata": {
|
||||||
|
"ExecuteTime": {
|
||||||
|
"end_time": "2024-04-13T11:05:09.162547Z",
|
||||||
|
"start_time": "2024-04-13T11:05:08.946722400Z"
|
||||||
|
}
|
||||||
|
},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"translation_memory.append(('Drukarka jest wyłączona', 'The printer is switched off'))\n",
|
"translation_memory.append(('Drukarka jest wyłączona', 'The printer is switched off'))\n",
|
||||||
@ -139,17 +157,20 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 5,
|
"execution_count": 9,
|
||||||
"id": "serial-velvet",
|
"id": "serial-velvet",
|
||||||
"metadata": {},
|
"metadata": {
|
||||||
|
"ExecuteTime": {
|
||||||
|
"end_time": "2024-04-13T11:05:09.162547Z",
|
||||||
|
"start_time": "2024-04-13T11:05:08.955053700Z"
|
||||||
|
}
|
||||||
|
},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"data": {
|
"data": {
|
||||||
"text/plain": [
|
"text/plain": "['Press the ENTER button', 'Press the ENTER key']"
|
||||||
"['Press the ENTER button', 'Press the ENTER key']"
|
|
||||||
]
|
|
||||||
},
|
},
|
||||||
"execution_count": 5,
|
"execution_count": 9,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"output_type": "execute_result"
|
"output_type": "execute_result"
|
||||||
}
|
}
|
||||||
@ -176,17 +197,20 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 6,
|
"execution_count": 10,
|
||||||
"id": "every-gibson",
|
"id": "every-gibson",
|
||||||
"metadata": {},
|
"metadata": {
|
||||||
|
"ExecuteTime": {
|
||||||
|
"end_time": "2024-04-13T11:05:09.178168700Z",
|
||||||
|
"start_time": "2024-04-13T11:05:08.970677700Z"
|
||||||
|
}
|
||||||
|
},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"data": {
|
"data": {
|
||||||
"text/plain": [
|
"text/plain": "[]"
|
||||||
"[]"
|
|
||||||
]
|
|
||||||
},
|
},
|
||||||
"execution_count": 6,
|
"execution_count": 10,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"output_type": "execute_result"
|
"output_type": "execute_result"
|
||||||
}
|
}
|
||||||
@ -213,13 +237,19 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 7,
|
"execution_count": 21,
|
||||||
"id": "protected-rings",
|
"id": "protected-rings",
|
||||||
"metadata": {},
|
"metadata": {
|
||||||
|
"ExecuteTime": {
|
||||||
|
"end_time": "2024-04-13T11:05:12.496455200Z",
|
||||||
|
"start_time": "2024-04-13T11:05:12.465209700Z"
|
||||||
|
}
|
||||||
|
},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"def tm_lookup(sentence):\n",
|
"def tm_lookup(sentence):\n",
|
||||||
" return ''"
|
" sentence = sentence.lower()\n",
|
||||||
|
" return [entry[1] for entry in translation_memory if entry[0].lower() == sentence]"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -232,17 +262,20 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 18,
|
"execution_count": 22,
|
||||||
"id": "severe-alloy",
|
"id": "severe-alloy",
|
||||||
"metadata": {},
|
"metadata": {
|
||||||
|
"ExecuteTime": {
|
||||||
|
"end_time": "2024-04-13T11:05:14.153976900Z",
|
||||||
|
"start_time": "2024-04-13T11:05:14.120474700Z"
|
||||||
|
}
|
||||||
|
},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"data": {
|
"data": {
|
||||||
"text/plain": [
|
"text/plain": "[]"
|
||||||
"''"
|
|
||||||
]
|
|
||||||
},
|
},
|
||||||
"execution_count": 18,
|
"execution_count": 22,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"output_type": "execute_result"
|
"output_type": "execute_result"
|
||||||
}
|
}
|
||||||
@ -261,13 +294,24 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 11,
|
"execution_count": 23,
|
||||||
"id": "structural-diesel",
|
"id": "structural-diesel",
|
||||||
"metadata": {},
|
"metadata": {
|
||||||
|
"ExecuteTime": {
|
||||||
|
"end_time": "2024-04-13T11:05:15.199517300Z",
|
||||||
|
"start_time": "2024-04-13T11:05:15.105892400Z"
|
||||||
|
}
|
||||||
|
},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
|
"import string\n",
|
||||||
|
"\n",
|
||||||
|
"def normalize(sentence):\n",
|
||||||
|
" return sentence.translate(str.maketrans('', '', string.punctuation)).lower()\n",
|
||||||
|
"\n",
|
||||||
"def tm_lookup(sentence):\n",
|
"def tm_lookup(sentence):\n",
|
||||||
" return ''"
|
" sentence = normalize(sentence)\n",
|
||||||
|
" return [entry[1] for entry in translation_memory if normalize(entry[0]) == sentence]"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -280,17 +324,20 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 12,
|
"execution_count": 24,
|
||||||
"id": "brief-senegal",
|
"id": "brief-senegal",
|
||||||
"metadata": {},
|
"metadata": {
|
||||||
|
"ExecuteTime": {
|
||||||
|
"end_time": "2024-04-13T11:05:17.857048100Z",
|
||||||
|
"start_time": "2024-04-13T11:05:17.825799600Z"
|
||||||
|
}
|
||||||
|
},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"data": {
|
"data": {
|
||||||
"text/plain": [
|
"text/plain": "[]"
|
||||||
"''"
|
|
||||||
]
|
|
||||||
},
|
},
|
||||||
"execution_count": 12,
|
"execution_count": 24,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"output_type": "execute_result"
|
"output_type": "execute_result"
|
||||||
}
|
}
|
||||||
@ -317,13 +364,49 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 14,
|
"execution_count": 25,
|
||||||
"id": "mathematical-customs",
|
"id": "mathematical-customs",
|
||||||
"metadata": {},
|
"metadata": {
|
||||||
|
"ExecuteTime": {
|
||||||
|
"end_time": "2024-04-13T12:00:14.223561700Z",
|
||||||
|
"start_time": "2024-04-13T12:00:14.159559100Z"
|
||||||
|
}
|
||||||
|
},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
|
"def find_similar(sentence):\n",
|
||||||
|
" mismatches_threshold = 2\n",
|
||||||
|
" words = sentence.split()\n",
|
||||||
|
" words_count = len(words)\n",
|
||||||
|
" for entry in translation_memory:\n",
|
||||||
|
" entry_words = normalize(entry[0]).split()\n",
|
||||||
|
" if words_count != len(entry_words):\n",
|
||||||
|
" continue\n",
|
||||||
|
" mismatches = 0\n",
|
||||||
|
" i = 0\n",
|
||||||
|
" for word in words:\n",
|
||||||
|
" if word != entry_words[i]:\n",
|
||||||
|
" if mismatches < mismatches_threshold:\n",
|
||||||
|
" mismatches += 1\n",
|
||||||
|
" else:\n",
|
||||||
|
" break\n",
|
||||||
|
" i += 1\n",
|
||||||
|
" if mismatches < mismatches_threshold:\n",
|
||||||
|
" return entry[1]\n",
|
||||||
|
" return []\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"def find_exact(sentence):\n",
|
||||||
|
" return [entry[1] for entry in translation_memory if normalize(entry[0]) == sentence]\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
"def tm_lookup(sentence):\n",
|
"def tm_lookup(sentence):\n",
|
||||||
" return ''"
|
" sentence = normalize(sentence)\n",
|
||||||
|
" exact_match = find_exact(sentence)\n",
|
||||||
|
" if not exact_match:\n",
|
||||||
|
" return find_similar(sentence)\n",
|
||||||
|
" else:\n",
|
||||||
|
" return exact_match"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -344,9 +427,14 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 15,
|
"execution_count": 26,
|
||||||
"id": "humanitarian-wrong",
|
"id": "humanitarian-wrong",
|
||||||
"metadata": {},
|
"metadata": {
|
||||||
|
"ExecuteTime": {
|
||||||
|
"end_time": "2024-04-13T12:00:18.016836500Z",
|
||||||
|
"start_time": "2024-04-13T12:00:17.992836400Z"
|
||||||
|
}
|
||||||
|
},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"glossary = [('komputer', 'computer'), ('przycisk', 'button'), ('drukarka', 'printer')]"
|
"glossary = [('komputer', 'computer'), ('przycisk', 'button'), ('drukarka', 'printer')]"
|
||||||
@ -362,9 +450,14 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 16,
|
"execution_count": 27,
|
||||||
"id": "located-perception",
|
"id": "located-perception",
|
||||||
"metadata": {},
|
"metadata": {
|
||||||
|
"ExecuteTime": {
|
||||||
|
"end_time": "2024-04-13T12:02:06.039160400Z",
|
||||||
|
"start_time": "2024-04-13T12:02:06.015160400Z"
|
||||||
|
}
|
||||||
|
},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"def glossary_lookup(sentence):\n",
|
"def glossary_lookup(sentence):\n",
|
||||||
@ -374,17 +467,20 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 17,
|
"execution_count": 28,
|
||||||
"id": "advised-casting",
|
"id": "advised-casting",
|
||||||
"metadata": {},
|
"metadata": {
|
||||||
|
"ExecuteTime": {
|
||||||
|
"end_time": "2024-04-13T12:02:06.846998600Z",
|
||||||
|
"start_time": "2024-04-13T12:02:06.823447800Z"
|
||||||
|
}
|
||||||
|
},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"data": {
|
"data": {
|
||||||
"text/plain": [
|
"text/plain": "[('przycisk', 'button'), ('drukarka', 'printer')]"
|
||||||
"[('przycisk', 'button'), ('drukarka', 'printer')]"
|
|
||||||
]
|
|
||||||
},
|
},
|
||||||
"execution_count": 17,
|
"execution_count": 28,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"output_type": "execute_result"
|
"output_type": "execute_result"
|
||||||
}
|
}
|
||||||
@ -406,7 +502,9 @@
|
|||||||
"id": "defensive-fifteen",
|
"id": "defensive-fifteen",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"source": [
|
"source": [
|
||||||
"Odpowiedź:"
|
"Odpowiedź: \n",
|
||||||
|
"złożoność pesymistyczna: m*n\n",
|
||||||
|
"złożoność optymistyczna: m"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -421,11 +519,17 @@
|
|||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 19,
|
"execution_count": 19,
|
||||||
"id": "original-tunisia",
|
"id": "original-tunisia",
|
||||||
"metadata": {},
|
"metadata": {
|
||||||
|
"ExecuteTime": {
|
||||||
|
"end_time": "2024-04-13T11:05:09.247171300Z",
|
||||||
|
"start_time": "2024-04-13T11:05:09.124790700Z"
|
||||||
|
}
|
||||||
|
},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"def glossary_lookup(sentence):\n",
|
"def glossary_lookup(sentence):\n",
|
||||||
" return ''"
|
" sentence_words = sentence.lower().split()\n",
|
||||||
|
" return [entry for entry in glossary if entry[0].lower() in sentence_words]"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -440,11 +544,25 @@
|
|||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 20,
|
"execution_count": 20,
|
||||||
"id": "adolescent-semiconductor",
|
"id": "adolescent-semiconductor",
|
||||||
"metadata": {},
|
"metadata": {
|
||||||
|
"ExecuteTime": {
|
||||||
|
"end_time": "2024-04-13T11:05:09.247171300Z",
|
||||||
|
"start_time": "2024-04-13T11:05:09.146924500Z"
|
||||||
|
}
|
||||||
|
},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"def glossary_lookup(sentence):\n",
|
"def glossary_lookup(sentence):\n",
|
||||||
" return ''"
|
" sentence_words = sentence.lower().split()\n",
|
||||||
|
" entry_words = []\n",
|
||||||
|
" for entry in glossary:\n",
|
||||||
|
" entry_words.append((entry[0].lower(), entry[1]))\n",
|
||||||
|
" result = []\n",
|
||||||
|
" for word in sentence_words:\n",
|
||||||
|
" for entry_word in entry_words:\n",
|
||||||
|
" if entry_word[0] == word:\n",
|
||||||
|
" result.append(entry_word)\n",
|
||||||
|
" return result"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
@ -452,7 +570,7 @@
|
|||||||
"author": "Rafał Jaworski",
|
"author": "Rafał Jaworski",
|
||||||
"email": "rjawor@amu.edu.pl",
|
"email": "rjawor@amu.edu.pl",
|
||||||
"kernelspec": {
|
"kernelspec": {
|
||||||
"display_name": "Python 3",
|
"display_name": "Python 3 (ipykernel)",
|
||||||
"language": "python",
|
"language": "python",
|
||||||
"name": "python3"
|
"name": "python3"
|
||||||
},
|
},
|
||||||
@ -467,7 +585,7 @@
|
|||||||
"name": "python",
|
"name": "python",
|
||||||
"nbconvert_exporter": "python",
|
"nbconvert_exporter": "python",
|
||||||
"pygments_lexer": "ipython3",
|
"pygments_lexer": "ipython3",
|
||||||
"version": "3.8.10"
|
"version": "3.10.11"
|
||||||
},
|
},
|
||||||
"subtitle": "1. Podstawowe techniki wspomagania tłumaczenia",
|
"subtitle": "1. Podstawowe techniki wspomagania tłumaczenia",
|
||||||
"title": "Komputerowe wspomaganie tłumaczenia",
|
"title": "Komputerowe wspomaganie tłumaczenia",
|
||||||
|
Loading…
Reference in New Issue
Block a user