forked from bfijalkowski/KWT-2024
Compare commits
No commits in common. "e343070e32a6f25a562eebc4fd75e5824724da00" and "71ca3b66ed4328dcefeefcfd415f5d7f1f41b52a" have entirely different histories.
e343070e32
...
71ca3b66ed
182
lab/lab_01.ipynb
182
lab/lab_01.ipynb
@ -52,7 +52,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": 1,
|
||||
"id": "narrow-romantic",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@ -71,7 +71,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 2,
|
||||
"id": "indonesian-electron",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@ -82,7 +82,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": 3,
|
||||
"id": "compact-trinidad",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@ -92,7 +92,7 @@
|
||||
"['Press the ENTER button']"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@ -119,7 +119,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": 4,
|
||||
"id": "exposed-daniel",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@ -139,7 +139,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": 5,
|
||||
"id": "serial-velvet",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@ -149,7 +149,7 @@
|
||||
"['Press the ENTER button', 'Press the ENTER key']"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@ -176,7 +176,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"execution_count": 6,
|
||||
"id": "every-gibson",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@ -186,7 +186,7 @@
|
||||
"[]"
|
||||
]
|
||||
},
|
||||
"execution_count": 9,
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@ -213,26 +213,13 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"execution_count": 7,
|
||||
"id": "protected-rings",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"['Press the ENTER button', 'Press the ENTER key']"
|
||||
]
|
||||
},
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def tm_lookup(sentence):\n",
|
||||
" return [entry[1] for entry in translation_memory if entry[0].lower() == sentence.lower()]\n",
|
||||
"\n",
|
||||
"tm_lookup('Wciśnij przycisk ENTER')"
|
||||
" return ''"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -245,17 +232,17 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"execution_count": 18,
|
||||
"id": "severe-alloy",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[]"
|
||||
"''"
|
||||
]
|
||||
},
|
||||
"execution_count": 8,
|
||||
"execution_count": 18,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@ -274,29 +261,13 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"execution_count": 11,
|
||||
"id": "structural-diesel",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"['Press the ENTER button', 'Press the ENTER key']"
|
||||
]
|
||||
},
|
||||
"execution_count": 16,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import string\n",
|
||||
"\n",
|
||||
"def tm_lookup(sentence):\n",
|
||||
" sentence = sentence.translate(str.maketrans('', '', string.punctuation))\n",
|
||||
" return [entry[1] for entry in translation_memory if entry[0].lower() == sentence.lower()]\n",
|
||||
"\n",
|
||||
"tm_lookup('Wciśnij przycisk [ENTER]')"
|
||||
" return ''"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -309,17 +280,17 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"execution_count": 12,
|
||||
"id": "brief-senegal",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[]"
|
||||
"''"
|
||||
]
|
||||
},
|
||||
"execution_count": 17,
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@ -346,66 +317,13 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 26,
|
||||
"execution_count": 14,
|
||||
"id": "mathematical-customs",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Suggestion:\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"['System restart required']"
|
||||
]
|
||||
},
|
||||
"execution_count": 26,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def differenceThanNotBiggerThanOneElement(firstSentence, secondSentence):\n",
|
||||
" firstSentenceList = firstSentence.lower().split()\n",
|
||||
" secondSentenceList = secondSentence.lower().split()\n",
|
||||
"\n",
|
||||
" diffNumber = 0\n",
|
||||
"\n",
|
||||
" for i in range(len(firstSentenceList)):\n",
|
||||
" if(firstSentenceList[i] != secondSentenceList[i]):\n",
|
||||
" diffNumber=diffNumber+1\n",
|
||||
" if(diffNumber > 2):\n",
|
||||
" return False\n",
|
||||
"\n",
|
||||
" return True\n",
|
||||
"\n",
|
||||
"def tm_lookup(sentence):\n",
|
||||
" sentence = sentence.translate(str.maketrans('', '', string.punctuation))\n",
|
||||
"\n",
|
||||
" exactMatchList = [entry[1] for entry in translation_memory if entry[0].lower() == sentence.lower()]\n",
|
||||
"\n",
|
||||
" if(len(exactMatchList) == 0):\n",
|
||||
" diffMatchList = [entry[1] for entry in translation_memory if differenceThanNotBiggerThanOneElement(entry[0], sentence)]\n",
|
||||
"\n",
|
||||
" if(len(diffMatchList) > 0):\n",
|
||||
" print('Suggestion:')\n",
|
||||
" return diffMatchList\n",
|
||||
"\n",
|
||||
" else:\n",
|
||||
" return exactMatchList\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
" \n",
|
||||
"\n",
|
||||
"tm_lookup('Wymagane ponowne uruchomienie maszyny')"
|
||||
" return ''"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -426,7 +344,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 27,
|
||||
"execution_count": 15,
|
||||
"id": "humanitarian-wrong",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@ -444,7 +362,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 28,
|
||||
"execution_count": 16,
|
||||
"id": "located-perception",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@ -456,7 +374,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 29,
|
||||
"execution_count": 17,
|
||||
"id": "advised-casting",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@ -466,7 +384,7 @@
|
||||
"[('przycisk', 'button'), ('drukarka', 'printer')]"
|
||||
]
|
||||
},
|
||||
"execution_count": 29,
|
||||
"execution_count": 17,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@ -488,7 +406,7 @@
|
||||
"id": "defensive-fifteen",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Odpowiedź: O(n * m)"
|
||||
"Odpowiedź:"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -501,27 +419,13 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 34,
|
||||
"execution_count": 19,
|
||||
"id": "original-tunisia",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[]"
|
||||
]
|
||||
},
|
||||
"execution_count": 34,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def glossary_lookup(sentence):\n",
|
||||
" sentence_words = sentence.lower().split()\n",
|
||||
" return [entry for entry in glossary if entry[0] in sentence_words]\n",
|
||||
"\n",
|
||||
"glossary_lookup('Każda Drukarka posiada Przycisk wznowienia drukowania')"
|
||||
" return ''"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -534,27 +438,13 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 35,
|
||||
"execution_count": 20,
|
||||
"id": "adolescent-semiconductor",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[('przycisk', 'button'), ('drukarka', 'printer')]"
|
||||
]
|
||||
},
|
||||
"execution_count": 35,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def glossary_lookup(sentence):\n",
|
||||
" sentence_words = set(sentence.lower().split())\n",
|
||||
" return [entry for entry in glossary if entry[0] in sentence_words]\n",
|
||||
"\n",
|
||||
"glossary_lookup('Każda Drukarka posiada Przycisk wznowienia drukowania')"
|
||||
" return ''"
|
||||
]
|
||||
}
|
||||
],
|
||||
@ -577,7 +467,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.7"
|
||||
"version": "3.8.10"
|
||||
},
|
||||
"subtitle": "1. Podstawowe techniki wspomagania tłumaczenia",
|
||||
"title": "Komputerowe wspomaganie tłumaczenia",
|
||||
|
@ -57,7 +57,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 22,
|
||||
"execution_count": 1,
|
||||
"id": "confident-prison",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@ -80,44 +80,13 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 43,
|
||||
"execution_count": 2,
|
||||
"id": "continental-submission",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"['The printer is switched off',\n",
|
||||
" 'Check the network settings',\n",
|
||||
" 'System restart required']"
|
||||
]
|
||||
},
|
||||
"execution_count": 43,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"\n",
|
||||
"def ice_lookup(sentence, prev_sentence, next_sentence):\n",
|
||||
" for index in range(len(translation_memory)):\n",
|
||||
" if index == 0:\n",
|
||||
" continue\n",
|
||||
" elif index + 1 >= len(translation_memory):\n",
|
||||
" return []\n",
|
||||
" else:\n",
|
||||
" middleText = translation_memory[index]\n",
|
||||
" prevText = translation_memory[index-1]\n",
|
||||
" nextText = translation_memory[index+1]\n",
|
||||
" if(sentence == middleText[0] and prev_sentence == prevText[0] and next_sentence == nextText[0]):\n",
|
||||
" return [middleText[1], prevText[1], nextText[1]]\n",
|
||||
" \n",
|
||||
" return []\n",
|
||||
" \n",
|
||||
"\n",
|
||||
" \n",
|
||||
" \n",
|
||||
"ice_lookup('Drukarka jest wyłączona','Sprawdź ustawienia sieciowe','Wymagane ponowne uruchomienie komputera') \n"
|
||||
" return []"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -150,7 +119,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 44,
|
||||
"execution_count": 3,
|
||||
"id": "fourth-pillow",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@ -172,7 +141,7 @@
|
||||
"id": "graduate-theorem",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Odpowiedź: Tak, Spełnia warunki dla 1,2,3,4, "
|
||||
"Odpowiedź:"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -210,7 +179,7 @@
|
||||
"id": "metallic-leave",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Odpowiedź: Nie, Spełnia dla warunku 3, poniewaz x = 4, y = 4, to wychodzi d(x,y) = 3, a nie 0"
|
||||
"Odpowiedź:"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -254,7 +223,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 45,
|
||||
"execution_count": 5,
|
||||
"id": "secondary-wrist",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@ -264,7 +233,7 @@
|
||||
"2"
|
||||
]
|
||||
},
|
||||
"execution_count": 45,
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@ -285,7 +254,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 46,
|
||||
"execution_count": 6,
|
||||
"id": "associate-tuner",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@ -304,7 +273,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 47,
|
||||
"execution_count": 7,
|
||||
"id": "focal-pathology",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@ -314,7 +283,7 @@
|
||||
"0.9166666666666666"
|
||||
]
|
||||
},
|
||||
"execution_count": 47,
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@ -325,7 +294,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 48,
|
||||
"execution_count": 8,
|
||||
"id": "roman-ceiling",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@ -335,7 +304,7 @@
|
||||
"0.9428571428571428"
|
||||
]
|
||||
},
|
||||
"execution_count": 48,
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@ -346,7 +315,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 49,
|
||||
"execution_count": 9,
|
||||
"id": "invisible-cambodia",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@ -356,7 +325,7 @@
|
||||
"0.631578947368421"
|
||||
]
|
||||
},
|
||||
"execution_count": 49,
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@ -375,26 +344,13 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 51,
|
||||
"execution_count": 10,
|
||||
"id": "genetic-cradle",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"['Press the ENTER button']"
|
||||
]
|
||||
},
|
||||
"execution_count": 51,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def fuzzy_lookup(sentence, threshold):\n",
|
||||
" return [entry[1] for entry in translation_memory if levenshtein_similarity(entry[0],sentence ) > threshold]\n",
|
||||
"\n",
|
||||
"fuzzy_lookup('Wciśnij przycisk Enter', 0.5)"
|
||||
" return []"
|
||||
]
|
||||
}
|
||||
],
|
||||
@ -417,7 +373,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.7"
|
||||
"version": "3.8.10"
|
||||
},
|
||||
"subtitle": "2. Zaawansowane użycie pamięci tłumaczeń",
|
||||
"title": "Komputerowe wspomaganie tłumaczenia",
|
||||
|
Loading…
Reference in New Issue
Block a user