forked from bfijalkowski/KWT-2024
lab 2
This commit is contained in:
parent
ddd2833663
commit
2b22583359
@ -85,8 +85,31 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def exact_match(sentence):\n",
|
||||
" for key, entry in enumerate(translation_memory):\n",
|
||||
" if entry[0] == sentence:\n",
|
||||
" return key, entry[1]\n",
|
||||
" return None, None\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def has_exact_match_on_index(index, sentence):\n",
|
||||
" return translation_memory[index][0] == sentence\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def ice_lookup(sentence, prev_sentence, next_sentence):\n",
|
||||
" return []"
|
||||
" index, match = exact_match(sentence)\n",
|
||||
" trans_length = len(translation_memory)\n",
|
||||
" if index is None:\n",
|
||||
" return []\n",
|
||||
" if next_sentence \\\n",
|
||||
" and index < trans_length \\\n",
|
||||
" and not has_exact_match_on_index(index + 1, next_sentence):\n",
|
||||
" return []\n",
|
||||
" if prev_sentence \\\n",
|
||||
" and index > 0 \\\n",
|
||||
" and not has_exact_match_on_index(index - 1, prev_sentence):\n",
|
||||
" return []\n",
|
||||
" return [match]"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -141,7 +164,7 @@
|
||||
"id": "graduate-theorem",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Odpowiedź:"
|
||||
"Odpowiedź: Nie. 1, 3, 4."
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -179,7 +202,7 @@
|
||||
"id": "metallic-leave",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Odpowiedź:"
|
||||
"Odpowiedź: Tak. 1, 2, 3, 4."
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -206,7 +229,17 @@
|
||||
"id": "bibliographic-stopping",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Odpowiedź:"
|
||||
"Odpowiedź: Tak.\n",
|
||||
"1. Liczba operacji wykonanych nie może być ujemna.\n",
|
||||
"2. Gdy x == y, nie są wymagane żadne operacje edycyjne, więc wynik funkcji to 0.\n",
|
||||
"3. Zmiana jednego łańcucha znaków w drugi, wymaga tyle samo operacji edycji, co zmiana drugiego w pierwszy.\n",
|
||||
" Studia -> Studiel = 2; Studiel -> Studia = 2; 2 == 2\n",
|
||||
"4. Istnieją trzy opcje\n",
|
||||
" - Jeżeli x == y == z, więc 0 + 0 == 0\n",
|
||||
" - Jeżeli x == y, x != z, a x -> z = n, to y -> z = n więc albo 0 + n == n, albo n + n > 0\n",
|
||||
" - Jeżeli x != y != z to im z jest bliżej do x, tym jest dalej od y (jednostką odległości jest liczba przekształceń). Można by to przedstawić graficznie jako trójkąt (x, y, z). z stanowi punkt na pośredniej drodze pomiędzy x i y, która nie może być dłuższa niż droga bezpośrednia - wynika to z własności trójkąta.\n",
|
||||
" Studia -> Studiel = 2; Studiel -> udia = 4; udia -> Studia = 2;\n",
|
||||
" 2 + 4 > 2; 2 + 2 == 4"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -214,6 +247,7 @@
|
||||
"id": "attended-channels",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"\n",
|
||||
"W Pythonie dostępna jest biblioteka zawierająca implementację dystansu Levenshteina. Zainstaluj ją w swoim systemie przy użyciu polecenia:\n",
|
||||
"\n",
|
||||
"`pip3 install python-Levenshtein`\n",
|
||||
@ -223,19 +257,20 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "secondary-wrist",
|
||||
"execution_count": null,
|
||||
"id": "4064ce50",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"2"
|
||||
"ename": "ModuleNotFoundError",
|
||||
"evalue": "No module named 'Levenshtein'",
|
||||
"output_type": "error",
|
||||
"traceback": [
|
||||
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
||||
"\u001b[0;31mModuleNotFoundError\u001b[0m Traceback (most recent call last)",
|
||||
"Cell \u001b[0;32mIn [2], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mLevenshtein\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m distance \u001b[38;5;28;01mas\u001b[39;00m levenshtein_distance\n\u001b[1;32m 3\u001b[0m levenshtein_distance(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mkotek\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mkotki\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
|
||||
"\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'Levenshtein'"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
@ -350,7 +385,11 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def fuzzy_lookup(sentence, threshold):\n",
|
||||
" return []"
|
||||
" results = []\n",
|
||||
" for entry in translation_memory:\n",
|
||||
" if levenshtein_similarity(entry[0], sentence) >= threshold:\n",
|
||||
" results.append(entry[1])\n",
|
||||
" return results"
|
||||
]
|
||||
}
|
||||
],
|
||||
@ -358,7 +397,7 @@
|
||||
"author": "Rafał Jaworski",
|
||||
"email": "rjawor@amu.edu.pl",
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
@ -373,7 +412,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.8.10"
|
||||
"version": "3.9.2"
|
||||
},
|
||||
"subtitle": "2. Zaawansowane użycie pamięci tłumaczeń",
|
||||
"title": "Komputerowe wspomaganie tłumaczenia",
|
||||
|
Loading…
Reference in New Issue
Block a user