Compare commits
3 Commits
Author | SHA1 | Date |
---|---|---|
Marek Susniak | e580651f9f | |
Marek Susniak | 7c845bcf8d | |
Marek Susniak | 957bd22d58 |
162
lab/lab_01.ipynb
162
lab/lab_01.ipynb
|
@ -52,7 +52,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"execution_count": 77,
|
||||
"id": "narrow-romantic",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
|
@ -71,7 +71,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": 78,
|
||||
"id": "indonesian-electron",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
|
@ -82,7 +82,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 79,
|
||||
"id": "compact-trinidad",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
|
@ -92,7 +92,7 @@
|
|||
"['Press the ENTER button']"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"execution_count": 79,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
|
@ -119,7 +119,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": 80,
|
||||
"id": "exposed-daniel",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
|
@ -139,7 +139,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": 81,
|
||||
"id": "serial-velvet",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
|
@ -149,7 +149,7 @@
|
|||
"['Press the ENTER button', 'Press the ENTER key']"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"execution_count": 81,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
|
@ -176,7 +176,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": 82,
|
||||
"id": "every-gibson",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
|
@ -186,7 +186,7 @@
|
|||
"[]"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"execution_count": 82,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
|
@ -213,13 +213,15 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": 83,
|
||||
"id": "protected-rings",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def tm_lookup(sentence):\n",
|
||||
" return ''"
|
||||
" lowerSentence = sentence.lower()\n",
|
||||
"\n",
|
||||
" return [entry[1] for entry in translation_memory if entry[0].lower() == lowerSentence]"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -232,17 +234,17 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"id": "severe-alloy",
|
||||
"execution_count": 84,
|
||||
"id": "60a6c976",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"''"
|
||||
"[]"
|
||||
]
|
||||
},
|
||||
"execution_count": 18,
|
||||
"execution_count": 84,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
|
@ -261,13 +263,21 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"execution_count": 85,
|
||||
"id": "structural-diesel",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import string\n",
|
||||
"\n",
|
||||
"def prepare_sentence(sentence):\n",
|
||||
" translator = str.maketrans('', '', string.punctuation)\n",
|
||||
"\n",
|
||||
" return sentence.lower().translate(translator)\n",
|
||||
"\n",
|
||||
"def tm_lookup(sentence):\n",
|
||||
" return ''"
|
||||
" lowerSentence = prepare_sentence(sentence)\n",
|
||||
" return [entry[1] for entry in translation_memory if prepare_sentence(entry[0]) == lowerSentence]"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -280,17 +290,17 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"execution_count": 86,
|
||||
"id": "brief-senegal",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"''"
|
||||
"[]"
|
||||
]
|
||||
},
|
||||
"execution_count": 12,
|
||||
"execution_count": 86,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
|
@ -317,13 +327,43 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"execution_count": 87,
|
||||
"id": "mathematical-customs",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import string\n",
|
||||
"\n",
|
||||
"def prepare_sentence(sentence):\n",
|
||||
" translator = str.maketrans('', '', string.punctuation)\n",
|
||||
"\n",
|
||||
" return sentence.lower().translate(translator)\n",
|
||||
"\n",
|
||||
"def sentence_similar(sentence1, sentence2):\n",
|
||||
" words1 = sentence1.split()\n",
|
||||
" words2 = sentence2.split()\n",
|
||||
" \n",
|
||||
" min_length = min(len(words1), len(words2))\n",
|
||||
" \n",
|
||||
" matched_count = 0\n",
|
||||
" for i in range(min_length):\n",
|
||||
" if prepare_sentence(words1[i]) == prepare_sentence(words2[i]):\n",
|
||||
" matched_count += 1\n",
|
||||
" \n",
|
||||
" return {\n",
|
||||
" \"count\": matched_count,\n",
|
||||
" \"length\": min_length\n",
|
||||
" }\n",
|
||||
"\n",
|
||||
"def tm_lookup(sentence):\n",
|
||||
" return ''"
|
||||
" collection = []\n",
|
||||
"\n",
|
||||
" for entry in translation_memory:\n",
|
||||
" similarity = sentence_similar(sentence, entry[0])\n",
|
||||
" if similarity[\"length\"] - similarity[\"count\"] <= 1:\n",
|
||||
" collection.append(entry[1])\n",
|
||||
"\n",
|
||||
" return collection\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -344,7 +384,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"execution_count": 88,
|
||||
"id": "humanitarian-wrong",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
|
@ -362,7 +402,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"execution_count": 89,
|
||||
"id": "located-perception",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
|
@ -374,7 +414,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"execution_count": 90,
|
||||
"id": "advised-casting",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
|
@ -384,7 +424,7 @@
|
|||
"[('przycisk', 'button'), ('drukarka', 'printer')]"
|
||||
]
|
||||
},
|
||||
"execution_count": 17,
|
||||
"execution_count": 90,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
|
@ -406,7 +446,7 @@
|
|||
"id": "defensive-fifteen",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Odpowiedź:"
|
||||
"Odpowiedź: Jest to n przeszukiwań po liście m-elementowej co daje złozonosc O(n*m)"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -419,13 +459,40 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"execution_count": 91,
|
||||
"id": "original-tunisia",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def prepare_sentence(sentence):\n",
|
||||
" return sentence.lower()\n",
|
||||
"\n",
|
||||
"def glossary_lookup(sentence):\n",
|
||||
" return ''"
|
||||
" sentence_words = sentence.split()\n",
|
||||
" lowered_words = list(map(prepare_sentence, sentence_words))\n",
|
||||
"\n",
|
||||
" return [entry for entry in glossary if prepare_sentence(entry[0]) in lowered_words]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 92,
|
||||
"id": "df948bb3",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[('komputer', 'computer')]"
|
||||
]
|
||||
},
|
||||
"execution_count": 92,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"glossary_lookup(\"Komputer\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -438,13 +505,44 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 20,
|
||||
"execution_count": 93,
|
||||
"id": "adolescent-semiconductor",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def prepare_dictionary(sentences):\n",
|
||||
" dict = {}\n",
|
||||
"\n",
|
||||
" for entry in sentences:\n",
|
||||
" dict[entry[0].lower()] = entry\n",
|
||||
"\n",
|
||||
" return dict\n",
|
||||
"\n",
|
||||
"glossary_dict = prepare_dictionary(glossary)\n",
|
||||
"\n",
|
||||
"def glossary_lookup(sentence):\n",
|
||||
" return ''"
|
||||
" return glossary_dict[sentence.lower()]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 94,
|
||||
"id": "98e9ff56",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"('komputer', 'computer')"
|
||||
]
|
||||
},
|
||||
"execution_count": 94,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"glossary_lookup(\"Komputer\")"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
@ -467,7 +565,7 @@
|
|||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.8.10"
|
||||
"version": "3.7.9"
|
||||
},
|
||||
"subtitle": "1. Podstawowe techniki wspomagania tłumaczenia",
|
||||
"title": "Komputerowe wspomaganie tłumaczenia",
|
||||
|
|
131
lab/lab_02.ipynb
131
lab/lab_02.ipynb
|
@ -57,7 +57,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"execution_count": 92,
|
||||
"id": "confident-prison",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
|
@ -80,15 +80,51 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": 93,
|
||||
"id": "continental-submission",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def ice_lookup(sentence, prev_sentence, next_sentence):\n",
|
||||
"def prepare_dictionary(sentences):\n",
|
||||
" dict = {}\n",
|
||||
"\n",
|
||||
" for entry in sentences:\n",
|
||||
" dict[entry[0].lower()] = entry\n",
|
||||
"\n",
|
||||
" return dict\n",
|
||||
"\n",
|
||||
"memory_dict = prepare_dictionary(translation_memory)\n",
|
||||
"\n",
|
||||
"def ice_lookup(input, prev_sentence, next_sentence): \n",
|
||||
" sentence = input.lower()\n",
|
||||
"\n",
|
||||
" if prev_sentence.lower() in memory_dict and next_sentence.lower() in memory_dict and sentence in memory_dict:\n",
|
||||
" return memory_dict[sentence]\n",
|
||||
"\n",
|
||||
" return []"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 94,
|
||||
"id": "bdc1df76",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"('Wciśnij przycisk Enter', 'Press the ENTER button')"
|
||||
]
|
||||
},
|
||||
"execution_count": 94,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"ice_lookup(\"Wciśnij przycisk Enter\", \"Sprawdź ustawienia sieciowe\", \"Drukarka jest wyłączona\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "figured-server",
|
||||
|
@ -119,7 +155,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 95,
|
||||
"id": "fourth-pillow",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
|
@ -141,7 +177,11 @@
|
|||
"id": "graduate-theorem",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Odpowiedź:"
|
||||
"Odpowiedź: Nie, nie jest poprawna. Cechy:\n",
|
||||
"- nieujemna (abs > 0)\n",
|
||||
"- identyfikacja nie jest spelniona -> moga miec taka sama dlugosc, a byc inne\n",
|
||||
"- symetryczna - wynik z wartosci bezwglednej\n",
|
||||
"- nierownosc trojkata nie jest spelniona"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -154,7 +194,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": 96,
|
||||
"id": "continued-christopher",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
|
@ -179,7 +219,7 @@
|
|||
"id": "metallic-leave",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Odpowiedź:"
|
||||
"Odpowiedź: z punktu widzenia cech, wszystkie cechy sa spelnione, jednak funkcja sama w sobie jest bezuyteczna poprzez to, ze wartosci sa stale. "
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -206,7 +246,10 @@
|
|||
"id": "bibliographic-stopping",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Odpowiedź:"
|
||||
"Odpowiedź: Tak, poniewaz spelnia cechy: \n",
|
||||
"- nieujemnosci - zawsze dodatni lub zero gdy a i b jest rowny sobie\n",
|
||||
"- symetria - dystans od a i b jest taki sam jak b i a\n",
|
||||
"- nierownosc trojkata - dystans od ciągu A do C przez B jest zawsze mniejszy lub równy sumie dystansów od A do B i od B do C"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -223,7 +266,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": 97,
|
||||
"id": "secondary-wrist",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
|
@ -233,7 +276,7 @@
|
|||
"2"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"execution_count": 97,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
|
@ -254,7 +297,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": 98,
|
||||
"id": "associate-tuner",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
|
@ -273,7 +316,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": 99,
|
||||
"id": "focal-pathology",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
|
@ -283,7 +326,7 @@
|
|||
"0.9166666666666666"
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
"execution_count": 99,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
|
@ -294,7 +337,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"execution_count": 100,
|
||||
"id": "roman-ceiling",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
|
@ -304,7 +347,7 @@
|
|||
"0.9428571428571428"
|
||||
]
|
||||
},
|
||||
"execution_count": 8,
|
||||
"execution_count": 100,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
|
@ -315,7 +358,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"execution_count": 101,
|
||||
"id": "invisible-cambodia",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
|
@ -325,7 +368,7 @@
|
|||
"0.631578947368421"
|
||||
]
|
||||
},
|
||||
"execution_count": 9,
|
||||
"execution_count": 101,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
|
@ -344,13 +387,61 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"execution_count": 102,
|
||||
"id": "genetic-cradle",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def fuzzy_lookup(sentence, threshold):\n",
|
||||
" return []"
|
||||
" col = []\n",
|
||||
"\n",
|
||||
" for entry in translation_memory:\n",
|
||||
" if (levenshtein_similarity(entry[0], sentence)) >= threshold:\n",
|
||||
" col.append(entry)\n",
|
||||
" \n",
|
||||
" return col"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 103,
|
||||
"id": "57fb39b9",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[]"
|
||||
]
|
||||
},
|
||||
"execution_count": 103,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"fuzzy_lookup('Spróbuj wyłączyć i włączyć komputer', 0.7)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 104,
|
||||
"id": "94e1b3be",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[('Wciśnij przycisk Enter', 'Press the ENTER button')]"
|
||||
]
|
||||
},
|
||||
"execution_count": 104,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"fuzzy_lookup('Wciśnij przycisk escape', 0.7)"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
@ -373,7 +464,7 @@
|
|||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.8.10"
|
||||
"version": "3.7.9"
|
||||
},
|
||||
"subtitle": "2. Zaawansowane użycie pamięci tłumaczeń",
|
||||
"title": "Komputerowe wspomaganie tłumaczenia",
|
||||
|
|
474
lab/lab_03.ipynb
474
lab/lab_03.ipynb
|
@ -63,7 +63,7 @@
|
|||
"id": "diverse-sunglasses",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Odpowiedź:"
|
||||
"Odpowiedź: metal cabinet guides lub metal cabinet slides. Skorzystalem z dwoch slownikow oraz duzego modelu jezykowego."
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -86,7 +86,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"execution_count": 102,
|
||||
"id": "loving-prince",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
|
@ -110,12 +110,12 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": 103,
|
||||
"id": "bound-auction",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"dictionary = ['program', 'application', 'applet' 'compile']"
|
||||
"dictionary = ['program', 'application', 'applet', 'compile']"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -128,13 +128,47 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 104,
|
||||
"id": "cognitive-cedar",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def terminology_lookup():\n",
|
||||
" return []"
|
||||
"import re\n",
|
||||
"\n",
|
||||
"def terminology_lookup(txt, labels):\n",
|
||||
" results = []\n",
|
||||
"\n",
|
||||
" for label in labels:\n",
|
||||
" results.append((\n",
|
||||
" label,\n",
|
||||
" [(m.start(), m.end() - 1) for m in re.finditer(label, txt)]\n",
|
||||
" ))\n",
|
||||
"\n",
|
||||
" return results"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 105,
|
||||
"id": "7cc3ad1f",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[('program', [(14, 20), (291, 297), (468, 474), (516, 522), (533, 539)]),\n",
|
||||
" ('application', [(80, 90), (164, 174), (322, 332)]),\n",
|
||||
" ('applet', [(302, 307)]),\n",
|
||||
" ('compile', [(56, 62)])]"
|
||||
]
|
||||
},
|
||||
"execution_count": 105,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"terminology_lookup(text, dictionary)"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -161,7 +195,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": 106,
|
||||
"id": "tribal-attention",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
|
@ -169,108 +203,108 @@
|
|||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
" \n",
|
||||
"for\n",
|
||||
"all\n",
|
||||
"Java\n",
|
||||
"programmer\n",
|
||||
":\n",
|
||||
"this\n",
|
||||
"section\n",
|
||||
"explain\n",
|
||||
"how\n",
|
||||
"to\n",
|
||||
"compile\n",
|
||||
"and\n",
|
||||
"run\n",
|
||||
"a\n",
|
||||
"swing\n",
|
||||
"application\n",
|
||||
"from\n",
|
||||
"the\n",
|
||||
"command\n",
|
||||
"line\n",
|
||||
".\n",
|
||||
"for\n",
|
||||
"information\n",
|
||||
"on\n",
|
||||
"compile\n",
|
||||
"and\n",
|
||||
"run\n",
|
||||
"a\n",
|
||||
"swing\n",
|
||||
"application\n",
|
||||
"use\n",
|
||||
"NetBeans\n",
|
||||
"IDE\n",
|
||||
",\n",
|
||||
"see\n",
|
||||
"Running\n",
|
||||
"Tutorial\n",
|
||||
"Examples\n",
|
||||
"in\n",
|
||||
"NetBeans\n",
|
||||
"IDE\n",
|
||||
".\n",
|
||||
"the\n",
|
||||
"compilation\n",
|
||||
"instruction\n",
|
||||
"work\n",
|
||||
"for\n",
|
||||
"all\n",
|
||||
"swing\n",
|
||||
"program\n",
|
||||
"—\n",
|
||||
"applet\n",
|
||||
",\n",
|
||||
"as\n",
|
||||
"well\n",
|
||||
"as\n",
|
||||
"application\n",
|
||||
".\n",
|
||||
"here\n",
|
||||
"be\n",
|
||||
"the\n",
|
||||
"step\n",
|
||||
"-PRON-\n",
|
||||
"need\n",
|
||||
"to\n",
|
||||
"follow\n",
|
||||
":\n",
|
||||
"install\n",
|
||||
"the\n",
|
||||
"late\n",
|
||||
"release\n",
|
||||
"of\n",
|
||||
"the\n",
|
||||
"Java\n",
|
||||
"SE\n",
|
||||
"platform\n",
|
||||
",\n",
|
||||
"if\n",
|
||||
"-PRON-\n",
|
||||
"have\n",
|
||||
"not\n",
|
||||
"already\n",
|
||||
"do\n",
|
||||
"so\n",
|
||||
".\n",
|
||||
"create\n",
|
||||
"a\n",
|
||||
"program\n",
|
||||
"that\n",
|
||||
"use\n",
|
||||
"Swing\n",
|
||||
"component\n",
|
||||
".\n",
|
||||
"compile\n",
|
||||
"the\n",
|
||||
"program\n",
|
||||
".\n",
|
||||
"run\n",
|
||||
"the\n",
|
||||
"program\n",
|
||||
".\n"
|
||||
" 0\n",
|
||||
"For for 1\n",
|
||||
"all all 5\n",
|
||||
"Java Java 9\n",
|
||||
"programmers programmer 14\n",
|
||||
": : 25\n",
|
||||
"This this 27\n",
|
||||
"section section 32\n",
|
||||
"explains explain 40\n",
|
||||
"how how 49\n",
|
||||
"to to 53\n",
|
||||
"compile compile 56\n",
|
||||
"and and 64\n",
|
||||
"run run 68\n",
|
||||
"a a 72\n",
|
||||
"Swing swing 74\n",
|
||||
"application application 80\n",
|
||||
"from from 92\n",
|
||||
"the the 97\n",
|
||||
"command command 101\n",
|
||||
"line line 109\n",
|
||||
". . 113\n",
|
||||
"For for 115\n",
|
||||
"information information 119\n",
|
||||
"on on 131\n",
|
||||
"compiling compile 134\n",
|
||||
"and and 144\n",
|
||||
"running run 148\n",
|
||||
"a a 156\n",
|
||||
"Swing swing 158\n",
|
||||
"application application 164\n",
|
||||
"using use 176\n",
|
||||
"NetBeans NetBeans 182\n",
|
||||
"IDE IDE 191\n",
|
||||
", , 194\n",
|
||||
"see see 196\n",
|
||||
"Running run 200\n",
|
||||
"Tutorial Tutorial 208\n",
|
||||
"Examples Examples 217\n",
|
||||
"in in 226\n",
|
||||
"NetBeans NetBeans 229\n",
|
||||
"IDE IDE 238\n",
|
||||
". . 241\n",
|
||||
"The the 243\n",
|
||||
"compilation compilation 247\n",
|
||||
"instructions instruction 259\n",
|
||||
"work work 272\n",
|
||||
"for for 277\n",
|
||||
"all all 281\n",
|
||||
"Swing Swing 285\n",
|
||||
"programs program 291\n",
|
||||
"— — 300\n",
|
||||
"applets applet 302\n",
|
||||
", , 309\n",
|
||||
"as as 311\n",
|
||||
"well well 314\n",
|
||||
"as as 319\n",
|
||||
"applications application 322\n",
|
||||
". . 334\n",
|
||||
"Here here 336\n",
|
||||
"are be 341\n",
|
||||
"the the 345\n",
|
||||
"steps step 349\n",
|
||||
"you you 355\n",
|
||||
"need need 359\n",
|
||||
"to to 364\n",
|
||||
"follow follow 367\n",
|
||||
": : 373\n",
|
||||
"Install install 375\n",
|
||||
"the the 383\n",
|
||||
"latest late 387\n",
|
||||
"release release 394\n",
|
||||
"of of 402\n",
|
||||
"the the 405\n",
|
||||
"Java Java 409\n",
|
||||
"SE SE 414\n",
|
||||
"platform platform 417\n",
|
||||
", , 425\n",
|
||||
"if if 427\n",
|
||||
"you you 430\n",
|
||||
"have have 434\n",
|
||||
"n't not 438\n",
|
||||
"already already 442\n",
|
||||
"done do 450\n",
|
||||
"so so 455\n",
|
||||
". . 457\n",
|
||||
"Create create 459\n",
|
||||
"a a 466\n",
|
||||
"program program 468\n",
|
||||
"that that 476\n",
|
||||
"uses use 481\n",
|
||||
"Swing swing 486\n",
|
||||
"components component 492\n",
|
||||
". . 502\n",
|
||||
"Compile compile 504\n",
|
||||
"the the 512\n",
|
||||
"program program 516\n",
|
||||
". . 523\n",
|
||||
"Run run 525\n",
|
||||
"the the 529\n",
|
||||
"program program 533\n",
|
||||
". . 540\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
@ -281,7 +315,7 @@
|
|||
"doc = nlp(text)\n",
|
||||
"\n",
|
||||
"for token in doc:\n",
|
||||
" print(token.lemma_)"
|
||||
" print(token, token.lemma_, token.idx)"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -302,13 +336,50 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": 107,
|
||||
"id": "surgical-demonstration",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def terminology_lookup():\n",
|
||||
" return []"
|
||||
"import spacy\n",
|
||||
"nlp = spacy.load(\"en_core_web_sm\")\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def terminology_lookup(txt, labels):\n",
|
||||
" result = {};\n",
|
||||
" doc = nlp(txt)\n",
|
||||
"\n",
|
||||
" for token in doc:\n",
|
||||
" if token.lemma_ in labels: \n",
|
||||
" if token.lemma_ not in result:\n",
|
||||
" result[token.lemma_] = []\n",
|
||||
" result[token.lemma_].append((token.idx, token.idx + len(token)))\n",
|
||||
"\n",
|
||||
" return result"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 108,
|
||||
"id": "4772c1b1",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'compile': [(56, 63), (134, 143), (504, 511)],\n",
|
||||
" 'application': [(80, 91), (164, 175), (322, 334)],\n",
|
||||
" 'program': [(291, 299), (468, 475), (516, 523), (533, 540)],\n",
|
||||
" 'applet': [(302, 309)]}"
|
||||
]
|
||||
},
|
||||
"execution_count": 108,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"terminology_lookup(text, dictionary)"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -337,13 +408,56 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": 109,
|
||||
"id": "superb-butterfly",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def get_nouns(text):\n",
|
||||
" return []"
|
||||
" doc = nlp(text)\n",
|
||||
" return [token.lemma_ for token in doc if token.pos_ == 'NOUN']"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 110,
|
||||
"id": "3c916a3e",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"['programmer',\n",
|
||||
" 'section',\n",
|
||||
" 'swing',\n",
|
||||
" 'application',\n",
|
||||
" 'command',\n",
|
||||
" 'line',\n",
|
||||
" 'information',\n",
|
||||
" 'swing',\n",
|
||||
" 'application',\n",
|
||||
" 'compilation',\n",
|
||||
" 'instruction',\n",
|
||||
" 'program',\n",
|
||||
" 'applet',\n",
|
||||
" 'application',\n",
|
||||
" 'step',\n",
|
||||
" 'release',\n",
|
||||
" 'platform',\n",
|
||||
" 'program',\n",
|
||||
" 'swing',\n",
|
||||
" 'component',\n",
|
||||
" 'program',\n",
|
||||
" 'program']"
|
||||
]
|
||||
},
|
||||
"execution_count": 110,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"get_nouns(text)"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -356,7 +470,7 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": 111,
|
||||
"id": "acting-tolerance",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
|
@ -374,13 +488,57 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"execution_count": 112,
|
||||
"id": "eight-redhead",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def count_words(words):\n",
|
||||
" word_count = {}\n",
|
||||
" for word in words:\n",
|
||||
" if word in word_count:\n",
|
||||
" word_count[word] += 1\n",
|
||||
" else:\n",
|
||||
" word_count[word] = 1\n",
|
||||
" return word_count\n",
|
||||
"\n",
|
||||
"def extract_terms(text):\n",
|
||||
" return []"
|
||||
" return count_words(get_nouns(text))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 113,
|
||||
"id": "374550d8",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'programmer': 1,\n",
|
||||
" 'section': 1,\n",
|
||||
" 'swing': 3,\n",
|
||||
" 'application': 3,\n",
|
||||
" 'command': 1,\n",
|
||||
" 'line': 1,\n",
|
||||
" 'information': 1,\n",
|
||||
" 'compilation': 1,\n",
|
||||
" 'instruction': 1,\n",
|
||||
" 'program': 4,\n",
|
||||
" 'applet': 1,\n",
|
||||
" 'step': 1,\n",
|
||||
" 'release': 1,\n",
|
||||
" 'platform': 1,\n",
|
||||
" 'component': 1}"
|
||||
]
|
||||
},
|
||||
"execution_count": 113,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"extract_terms(text)"
|
||||
]
|
||||
},
|
||||
{
|
||||
|
@ -393,13 +551,85 @@
|
|||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"execution_count": 114,
|
||||
"id": "monetary-mambo",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def get_verbs(text):\n",
|
||||
" doc = nlp(text)\n",
|
||||
" return [token.lemma_ for token in doc if token.pos_ == 'VERB']\n",
|
||||
"\n",
|
||||
"def get_adjectives(text):\n",
|
||||
" doc = nlp(text)\n",
|
||||
" return [token.lemma_ for token in doc if token.pos_ == 'ADJ']\n",
|
||||
"\n",
|
||||
"def extract_terms(text):\n",
|
||||
" return []"
|
||||
" return {\n",
|
||||
" \"nouns\": get_nouns(text),\n",
|
||||
" \"verbs\": get_verbs(text),\n",
|
||||
" \"adjectives\": get_adjectives(text)\n",
|
||||
" }"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 115,
|
||||
"id": "95494ac9",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"{'nouns': ['programmer',\n",
|
||||
" 'section',\n",
|
||||
" 'swing',\n",
|
||||
" 'application',\n",
|
||||
" 'command',\n",
|
||||
" 'line',\n",
|
||||
" 'information',\n",
|
||||
" 'swing',\n",
|
||||
" 'application',\n",
|
||||
" 'compilation',\n",
|
||||
" 'instruction',\n",
|
||||
" 'program',\n",
|
||||
" 'applet',\n",
|
||||
" 'application',\n",
|
||||
" 'step',\n",
|
||||
" 'release',\n",
|
||||
" 'platform',\n",
|
||||
" 'program',\n",
|
||||
" 'swing',\n",
|
||||
" 'component',\n",
|
||||
" 'program',\n",
|
||||
" 'program'],\n",
|
||||
" 'verbs': ['explain',\n",
|
||||
" 'compile',\n",
|
||||
" 'run',\n",
|
||||
" 'compile',\n",
|
||||
" 'run',\n",
|
||||
" 'use',\n",
|
||||
" 'see',\n",
|
||||
" 'run',\n",
|
||||
" 'work',\n",
|
||||
" 'need',\n",
|
||||
" 'follow',\n",
|
||||
" 'install',\n",
|
||||
" 'do',\n",
|
||||
" 'create',\n",
|
||||
" 'use',\n",
|
||||
" 'compile',\n",
|
||||
" 'run'],\n",
|
||||
" 'adjectives': ['late']}"
|
||||
]
|
||||
},
|
||||
"execution_count": 115,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"extract_terms(text)"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
@ -422,7 +652,7 @@
|
|||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.8.10"
|
||||
"version": "3.7.9"
|
||||
},
|
||||
"subtitle": "3. Terminologia",
|
||||
"title": "Komputerowe wspomaganie tłumaczenia",
|
||||
|
|
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
218
lab/lab_08.ipynb
218
lab/lab_08.ipynb
File diff suppressed because one or more lines are too long
Loading…
Reference in New Issue