Compare commits

...

3 Commits
main ... main

Author SHA1 Message Date
Marek Susniak e580651f9f Lab vol.2 2024-04-23 23:54:26 +02:00
Marek Susniak 7c845bcf8d Laboratoria 13.04.2024 2024-04-15 21:15:24 +02:00
Marek Susniak 957bd22d58 Laboratoria 13.04.2024 2024-04-15 19:34:20 +02:00
7 changed files with 596 additions and 896 deletions

BIN
.DS_Store vendored Normal file

Binary file not shown.

View File

@ -52,7 +52,7 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 77,
"id": "narrow-romantic",
"metadata": {},
"outputs": [],
@ -71,7 +71,7 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 78,
"id": "indonesian-electron",
"metadata": {},
"outputs": [],
@ -82,7 +82,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 79,
"id": "compact-trinidad",
"metadata": {},
"outputs": [
@ -92,7 +92,7 @@
"['Press the ENTER button']"
]
},
"execution_count": 3,
"execution_count": 79,
"metadata": {},
"output_type": "execute_result"
}
@ -119,7 +119,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 80,
"id": "exposed-daniel",
"metadata": {},
"outputs": [],
@ -139,7 +139,7 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 81,
"id": "serial-velvet",
"metadata": {},
"outputs": [
@ -149,7 +149,7 @@
"['Press the ENTER button', 'Press the ENTER key']"
]
},
"execution_count": 5,
"execution_count": 81,
"metadata": {},
"output_type": "execute_result"
}
@ -176,7 +176,7 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 82,
"id": "every-gibson",
"metadata": {},
"outputs": [
@ -186,7 +186,7 @@
"[]"
]
},
"execution_count": 6,
"execution_count": 82,
"metadata": {},
"output_type": "execute_result"
}
@ -213,13 +213,15 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 83,
"id": "protected-rings",
"metadata": {},
"outputs": [],
"source": [
"def tm_lookup(sentence):\n",
" return ''"
" lowerSentence = sentence.lower()\n",
"\n",
" return [entry[1] for entry in translation_memory if entry[0].lower() == lowerSentence]"
]
},
{
@ -232,17 +234,17 @@
},
{
"cell_type": "code",
"execution_count": 18,
"id": "severe-alloy",
"execution_count": 84,
"id": "60a6c976",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"''"
"[]"
]
},
"execution_count": 18,
"execution_count": 84,
"metadata": {},
"output_type": "execute_result"
}
@ -261,13 +263,21 @@
},
{
"cell_type": "code",
"execution_count": 11,
"execution_count": 85,
"id": "structural-diesel",
"metadata": {},
"outputs": [],
"source": [
"import string\n",
"\n",
"def prepare_sentence(sentence):\n",
" translator = str.maketrans('', '', string.punctuation)\n",
"\n",
" return sentence.lower().translate(translator)\n",
"\n",
"def tm_lookup(sentence):\n",
" return ''"
" lowerSentence = prepare_sentence(sentence)\n",
" return [entry[1] for entry in translation_memory if prepare_sentence(entry[0]) == lowerSentence]"
]
},
{
@ -280,17 +290,17 @@
},
{
"cell_type": "code",
"execution_count": 12,
"execution_count": 86,
"id": "brief-senegal",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"''"
"[]"
]
},
"execution_count": 12,
"execution_count": 86,
"metadata": {},
"output_type": "execute_result"
}
@ -317,13 +327,43 @@
},
{
"cell_type": "code",
"execution_count": 14,
"execution_count": 87,
"id": "mathematical-customs",
"metadata": {},
"outputs": [],
"source": [
"import string\n",
"\n",
"def prepare_sentence(sentence):\n",
" translator = str.maketrans('', '', string.punctuation)\n",
"\n",
" return sentence.lower().translate(translator)\n",
"\n",
"def sentence_similar(sentence1, sentence2):\n",
" words1 = sentence1.split()\n",
" words2 = sentence2.split()\n",
" \n",
" min_length = min(len(words1), len(words2))\n",
" \n",
" matched_count = 0\n",
" for i in range(min_length):\n",
" if prepare_sentence(words1[i]) == prepare_sentence(words2[i]):\n",
" matched_count += 1\n",
" \n",
" return {\n",
" \"count\": matched_count,\n",
" \"length\": min_length\n",
" }\n",
"\n",
"def tm_lookup(sentence):\n",
" return ''"
" collection = []\n",
"\n",
" for entry in translation_memory:\n",
" similarity = sentence_similar(sentence, entry[0])\n",
" if similarity[\"length\"] - similarity[\"count\"] <= 1:\n",
" collection.append(entry[1])\n",
"\n",
" return collection\n"
]
},
{
@ -344,7 +384,7 @@
},
{
"cell_type": "code",
"execution_count": 15,
"execution_count": 88,
"id": "humanitarian-wrong",
"metadata": {},
"outputs": [],
@ -362,7 +402,7 @@
},
{
"cell_type": "code",
"execution_count": 16,
"execution_count": 89,
"id": "located-perception",
"metadata": {},
"outputs": [],
@ -374,7 +414,7 @@
},
{
"cell_type": "code",
"execution_count": 17,
"execution_count": 90,
"id": "advised-casting",
"metadata": {},
"outputs": [
@ -384,7 +424,7 @@
"[('przycisk', 'button'), ('drukarka', 'printer')]"
]
},
"execution_count": 17,
"execution_count": 90,
"metadata": {},
"output_type": "execute_result"
}
@ -406,7 +446,7 @@
"id": "defensive-fifteen",
"metadata": {},
"source": [
"Odpowiedź:"
"Odpowiedź: Jest to n przeszukiwań po liście m-elementowej co daje złozonosc O(n*m)"
]
},
{
@ -419,13 +459,40 @@
},
{
"cell_type": "code",
"execution_count": 19,
"execution_count": 91,
"id": "original-tunisia",
"metadata": {},
"outputs": [],
"source": [
"def prepare_sentence(sentence):\n",
" return sentence.lower()\n",
"\n",
"def glossary_lookup(sentence):\n",
" return ''"
" sentence_words = sentence.split()\n",
" lowered_words = list(map(prepare_sentence, sentence_words))\n",
"\n",
" return [entry for entry in glossary if prepare_sentence(entry[0]) in lowered_words]"
]
},
{
"cell_type": "code",
"execution_count": 92,
"id": "df948bb3",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[('komputer', 'computer')]"
]
},
"execution_count": 92,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"glossary_lookup(\"Komputer\")"
]
},
{
@ -438,13 +505,44 @@
},
{
"cell_type": "code",
"execution_count": 20,
"execution_count": 93,
"id": "adolescent-semiconductor",
"metadata": {},
"outputs": [],
"source": [
"def prepare_dictionary(sentences):\n",
" dict = {}\n",
"\n",
" for entry in sentences:\n",
" dict[entry[0].lower()] = entry\n",
"\n",
" return dict\n",
"\n",
"glossary_dict = prepare_dictionary(glossary)\n",
"\n",
"def glossary_lookup(sentence):\n",
" return ''"
" return glossary_dict[sentence.lower()]"
]
},
{
"cell_type": "code",
"execution_count": 94,
"id": "98e9ff56",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"('komputer', 'computer')"
]
},
"execution_count": 94,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"glossary_lookup(\"Komputer\")"
]
}
],
@ -467,7 +565,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.10"
"version": "3.7.9"
},
"subtitle": "1. Podstawowe techniki wspomagania tłumaczenia",
"title": "Komputerowe wspomaganie tłumaczenia",

View File

@ -57,7 +57,7 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 92,
"id": "confident-prison",
"metadata": {},
"outputs": [],
@ -80,15 +80,51 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 93,
"id": "continental-submission",
"metadata": {},
"outputs": [],
"source": [
"def ice_lookup(sentence, prev_sentence, next_sentence):\n",
"def prepare_dictionary(sentences):\n",
" dict = {}\n",
"\n",
" for entry in sentences:\n",
" dict[entry[0].lower()] = entry\n",
"\n",
" return dict\n",
"\n",
"memory_dict = prepare_dictionary(translation_memory)\n",
"\n",
"def ice_lookup(input, prev_sentence, next_sentence): \n",
" sentence = input.lower()\n",
"\n",
" if prev_sentence.lower() in memory_dict and next_sentence.lower() in memory_dict and sentence in memory_dict:\n",
" return memory_dict[sentence]\n",
"\n",
" return []"
]
},
{
"cell_type": "code",
"execution_count": 94,
"id": "bdc1df76",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"('Wciśnij przycisk Enter', 'Press the ENTER button')"
]
},
"execution_count": 94,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ice_lookup(\"Wciśnij przycisk Enter\", \"Sprawdź ustawienia sieciowe\", \"Drukarka jest wyłączona\")"
]
},
{
"cell_type": "markdown",
"id": "figured-server",
@ -119,7 +155,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 95,
"id": "fourth-pillow",
"metadata": {},
"outputs": [],
@ -141,7 +177,11 @@
"id": "graduate-theorem",
"metadata": {},
"source": [
"Odpowiedź:"
"Odpowiedź: Nie, nie jest poprawna. Cechy:\n",
"- nieujemna (abs > 0)\n",
"- identyfikacja nie jest spelniona -> moga miec taka sama dlugosc, a byc inne\n",
"- symetryczna - wynik z wartosci bezwglednej\n",
"- nierownosc trojkata nie jest spelniona"
]
},
{
@ -154,7 +194,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 96,
"id": "continued-christopher",
"metadata": {},
"outputs": [],
@ -179,7 +219,7 @@
"id": "metallic-leave",
"metadata": {},
"source": [
"Odpowiedź:"
"Odpowiedź: z punktu widzenia cech, wszystkie cechy sa spelnione, jednak funkcja sama w sobie jest bezuyteczna poprzez to, ze wartosci sa stale. "
]
},
{
@ -206,7 +246,10 @@
"id": "bibliographic-stopping",
"metadata": {},
"source": [
"Odpowiedź:"
"Odpowiedź: Tak, poniewaz spelnia cechy: \n",
"- nieujemnosci - zawsze dodatni lub zero gdy a i b jest rowny sobie\n",
"- symetria - dystans od a i b jest taki sam jak b i a\n",
"- nierownosc trojkata - dystans od ciągu A do C przez B jest zawsze mniejszy lub równy sumie dystansów od A do B i od B do C"
]
},
{
@ -223,7 +266,7 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 97,
"id": "secondary-wrist",
"metadata": {},
"outputs": [
@ -233,7 +276,7 @@
"2"
]
},
"execution_count": 5,
"execution_count": 97,
"metadata": {},
"output_type": "execute_result"
}
@ -254,7 +297,7 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 98,
"id": "associate-tuner",
"metadata": {},
"outputs": [],
@ -273,7 +316,7 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 99,
"id": "focal-pathology",
"metadata": {},
"outputs": [
@ -283,7 +326,7 @@
"0.9166666666666666"
]
},
"execution_count": 7,
"execution_count": 99,
"metadata": {},
"output_type": "execute_result"
}
@ -294,7 +337,7 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 100,
"id": "roman-ceiling",
"metadata": {},
"outputs": [
@ -304,7 +347,7 @@
"0.9428571428571428"
]
},
"execution_count": 8,
"execution_count": 100,
"metadata": {},
"output_type": "execute_result"
}
@ -315,7 +358,7 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": 101,
"id": "invisible-cambodia",
"metadata": {},
"outputs": [
@ -325,7 +368,7 @@
"0.631578947368421"
]
},
"execution_count": 9,
"execution_count": 101,
"metadata": {},
"output_type": "execute_result"
}
@ -344,13 +387,61 @@
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": 102,
"id": "genetic-cradle",
"metadata": {},
"outputs": [],
"source": [
"def fuzzy_lookup(sentence, threshold):\n",
" return []"
" col = []\n",
"\n",
" for entry in translation_memory:\n",
" if (levenshtein_similarity(entry[0], sentence)) >= threshold:\n",
" col.append(entry)\n",
" \n",
" return col"
]
},
{
"cell_type": "code",
"execution_count": 103,
"id": "57fb39b9",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[]"
]
},
"execution_count": 103,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"fuzzy_lookup('Spróbuj wyłączyć i włączyć komputer', 0.7)"
]
},
{
"cell_type": "code",
"execution_count": 104,
"id": "94e1b3be",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[('Wciśnij przycisk Enter', 'Press the ENTER button')]"
]
},
"execution_count": 104,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"fuzzy_lookup('Wciśnij przycisk escape', 0.7)"
]
}
],
@ -373,7 +464,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.10"
"version": "3.7.9"
},
"subtitle": "2. Zaawansowane użycie pamięci tłumaczeń",
"title": "Komputerowe wspomaganie tłumaczenia",

View File

@ -63,7 +63,7 @@
"id": "diverse-sunglasses",
"metadata": {},
"source": [
"Odpowiedź:"
"Odpowiedź: metal cabinet guides lub metal cabinet slides. Skorzystalem z dwoch slownikow oraz duzego modelu jezykowego."
]
},
{
@ -86,7 +86,7 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 102,
"id": "loving-prince",
"metadata": {},
"outputs": [],
@ -110,12 +110,12 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 103,
"id": "bound-auction",
"metadata": {},
"outputs": [],
"source": [
"dictionary = ['program', 'application', 'applet' 'compile']"
"dictionary = ['program', 'application', 'applet', 'compile']"
]
},
{
@ -128,13 +128,47 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 104,
"id": "cognitive-cedar",
"metadata": {},
"outputs": [],
"source": [
"def terminology_lookup():\n",
" return []"
"import re\n",
"\n",
"def terminology_lookup(txt, labels):\n",
" results = []\n",
"\n",
" for label in labels:\n",
" results.append((\n",
" label,\n",
" [(m.start(), m.end() - 1) for m in re.finditer(label, txt)]\n",
" ))\n",
"\n",
" return results"
]
},
{
"cell_type": "code",
"execution_count": 105,
"id": "7cc3ad1f",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[('program', [(14, 20), (291, 297), (468, 474), (516, 522), (533, 539)]),\n",
" ('application', [(80, 90), (164, 174), (322, 332)]),\n",
" ('applet', [(302, 307)]),\n",
" ('compile', [(56, 62)])]"
]
},
"execution_count": 105,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"terminology_lookup(text, dictionary)"
]
},
{
@ -161,7 +195,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 106,
"id": "tribal-attention",
"metadata": {},
"outputs": [
@ -169,108 +203,108 @@
"name": "stdout",
"output_type": "stream",
"text": [
" \n",
"for\n",
"all\n",
"Java\n",
"programmer\n",
":\n",
"this\n",
"section\n",
"explain\n",
"how\n",
"to\n",
"compile\n",
"and\n",
"run\n",
"a\n",
"swing\n",
"application\n",
"from\n",
"the\n",
"command\n",
"line\n",
".\n",
"for\n",
"information\n",
"on\n",
"compile\n",
"and\n",
"run\n",
"a\n",
"swing\n",
"application\n",
"use\n",
"NetBeans\n",
"IDE\n",
",\n",
"see\n",
"Running\n",
"Tutorial\n",
"Examples\n",
"in\n",
"NetBeans\n",
"IDE\n",
".\n",
"the\n",
"compilation\n",
"instruction\n",
"work\n",
"for\n",
"all\n",
"swing\n",
"program\n",
"—\n",
"applet\n",
",\n",
"as\n",
"well\n",
"as\n",
"application\n",
".\n",
"here\n",
"be\n",
"the\n",
"step\n",
"-PRON-\n",
"need\n",
"to\n",
"follow\n",
":\n",
"install\n",
"the\n",
"late\n",
"release\n",
"of\n",
"the\n",
"Java\n",
"SE\n",
"platform\n",
",\n",
"if\n",
"-PRON-\n",
"have\n",
"not\n",
"already\n",
"do\n",
"so\n",
".\n",
"create\n",
"a\n",
"program\n",
"that\n",
"use\n",
"Swing\n",
"component\n",
".\n",
"compile\n",
"the\n",
"program\n",
".\n",
"run\n",
"the\n",
"program\n",
".\n"
" 0\n",
"For for 1\n",
"all all 5\n",
"Java Java 9\n",
"programmers programmer 14\n",
": : 25\n",
"This this 27\n",
"section section 32\n",
"explains explain 40\n",
"how how 49\n",
"to to 53\n",
"compile compile 56\n",
"and and 64\n",
"run run 68\n",
"a a 72\n",
"Swing swing 74\n",
"application application 80\n",
"from from 92\n",
"the the 97\n",
"command command 101\n",
"line line 109\n",
". . 113\n",
"For for 115\n",
"information information 119\n",
"on on 131\n",
"compiling compile 134\n",
"and and 144\n",
"running run 148\n",
"a a 156\n",
"Swing swing 158\n",
"application application 164\n",
"using use 176\n",
"NetBeans NetBeans 182\n",
"IDE IDE 191\n",
", , 194\n",
"see see 196\n",
"Running run 200\n",
"Tutorial Tutorial 208\n",
"Examples Examples 217\n",
"in in 226\n",
"NetBeans NetBeans 229\n",
"IDE IDE 238\n",
". . 241\n",
"The the 243\n",
"compilation compilation 247\n",
"instructions instruction 259\n",
"work work 272\n",
"for for 277\n",
"all all 281\n",
"Swing Swing 285\n",
"programs program 291\n",
"— — 300\n",
"applets applet 302\n",
", , 309\n",
"as as 311\n",
"well well 314\n",
"as as 319\n",
"applications application 322\n",
". . 334\n",
"Here here 336\n",
"are be 341\n",
"the the 345\n",
"steps step 349\n",
"you you 355\n",
"need need 359\n",
"to to 364\n",
"follow follow 367\n",
": : 373\n",
"Install install 375\n",
"the the 383\n",
"latest late 387\n",
"release release 394\n",
"of of 402\n",
"the the 405\n",
"Java Java 409\n",
"SE SE 414\n",
"platform platform 417\n",
", , 425\n",
"if if 427\n",
"you you 430\n",
"have have 434\n",
"n't not 438\n",
"already already 442\n",
"done do 450\n",
"so so 455\n",
". . 457\n",
"Create create 459\n",
"a a 466\n",
"program program 468\n",
"that that 476\n",
"uses use 481\n",
"Swing swing 486\n",
"components component 492\n",
". . 502\n",
"Compile compile 504\n",
"the the 512\n",
"program program 516\n",
". . 523\n",
"Run run 525\n",
"the the 529\n",
"program program 533\n",
". . 540\n"
]
}
],
@ -281,7 +315,7 @@
"doc = nlp(text)\n",
"\n",
"for token in doc:\n",
" print(token.lemma_)"
" print(token, token.lemma_, token.idx)"
]
},
{
@ -302,13 +336,50 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 107,
"id": "surgical-demonstration",
"metadata": {},
"outputs": [],
"source": [
"def terminology_lookup():\n",
" return []"
"import spacy\n",
"nlp = spacy.load(\"en_core_web_sm\")\n",
"\n",
"\n",
"def terminology_lookup(txt, labels):\n",
" result = {};\n",
" doc = nlp(txt)\n",
"\n",
" for token in doc:\n",
" if token.lemma_ in labels: \n",
" if token.lemma_ not in result:\n",
" result[token.lemma_] = []\n",
" result[token.lemma_].append((token.idx, token.idx + len(token)))\n",
"\n",
" return result"
]
},
{
"cell_type": "code",
"execution_count": 108,
"id": "4772c1b1",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'compile': [(56, 63), (134, 143), (504, 511)],\n",
" 'application': [(80, 91), (164, 175), (322, 334)],\n",
" 'program': [(291, 299), (468, 475), (516, 523), (533, 540)],\n",
" 'applet': [(302, 309)]}"
]
},
"execution_count": 108,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"terminology_lookup(text, dictionary)"
]
},
{
@ -337,13 +408,56 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 109,
"id": "superb-butterfly",
"metadata": {},
"outputs": [],
"source": [
"def get_nouns(text):\n",
" return []"
" doc = nlp(text)\n",
" return [token.lemma_ for token in doc if token.pos_ == 'NOUN']"
]
},
{
"cell_type": "code",
"execution_count": 110,
"id": "3c916a3e",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['programmer',\n",
" 'section',\n",
" 'swing',\n",
" 'application',\n",
" 'command',\n",
" 'line',\n",
" 'information',\n",
" 'swing',\n",
" 'application',\n",
" 'compilation',\n",
" 'instruction',\n",
" 'program',\n",
" 'applet',\n",
" 'application',\n",
" 'step',\n",
" 'release',\n",
" 'platform',\n",
" 'program',\n",
" 'swing',\n",
" 'component',\n",
" 'program',\n",
" 'program']"
]
},
"execution_count": 110,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"get_nouns(text)"
]
},
{
@ -356,7 +470,7 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 111,
"id": "acting-tolerance",
"metadata": {},
"outputs": [],
@ -374,13 +488,57 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 112,
"id": "eight-redhead",
"metadata": {},
"outputs": [],
"source": [
"def count_words(words):\n",
" word_count = {}\n",
" for word in words:\n",
" if word in word_count:\n",
" word_count[word] += 1\n",
" else:\n",
" word_count[word] = 1\n",
" return word_count\n",
"\n",
"def extract_terms(text):\n",
" return []"
" return count_words(get_nouns(text))"
]
},
{
"cell_type": "code",
"execution_count": 113,
"id": "374550d8",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'programmer': 1,\n",
" 'section': 1,\n",
" 'swing': 3,\n",
" 'application': 3,\n",
" 'command': 1,\n",
" 'line': 1,\n",
" 'information': 1,\n",
" 'compilation': 1,\n",
" 'instruction': 1,\n",
" 'program': 4,\n",
" 'applet': 1,\n",
" 'step': 1,\n",
" 'release': 1,\n",
" 'platform': 1,\n",
" 'component': 1}"
]
},
"execution_count": 113,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"extract_terms(text)"
]
},
{
@ -393,13 +551,85 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": 114,
"id": "monetary-mambo",
"metadata": {},
"outputs": [],
"source": [
"def get_verbs(text):\n",
" doc = nlp(text)\n",
" return [token.lemma_ for token in doc if token.pos_ == 'VERB']\n",
"\n",
"def get_adjectives(text):\n",
" doc = nlp(text)\n",
" return [token.lemma_ for token in doc if token.pos_ == 'ADJ']\n",
"\n",
"def extract_terms(text):\n",
" return []"
" return {\n",
" \"nouns\": get_nouns(text),\n",
" \"verbs\": get_verbs(text),\n",
" \"adjectives\": get_adjectives(text)\n",
" }"
]
},
{
"cell_type": "code",
"execution_count": 115,
"id": "95494ac9",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'nouns': ['programmer',\n",
" 'section',\n",
" 'swing',\n",
" 'application',\n",
" 'command',\n",
" 'line',\n",
" 'information',\n",
" 'swing',\n",
" 'application',\n",
" 'compilation',\n",
" 'instruction',\n",
" 'program',\n",
" 'applet',\n",
" 'application',\n",
" 'step',\n",
" 'release',\n",
" 'platform',\n",
" 'program',\n",
" 'swing',\n",
" 'component',\n",
" 'program',\n",
" 'program'],\n",
" 'verbs': ['explain',\n",
" 'compile',\n",
" 'run',\n",
" 'compile',\n",
" 'run',\n",
" 'use',\n",
" 'see',\n",
" 'run',\n",
" 'work',\n",
" 'need',\n",
" 'follow',\n",
" 'install',\n",
" 'do',\n",
" 'create',\n",
" 'use',\n",
" 'compile',\n",
" 'run'],\n",
" 'adjectives': ['late']}"
]
},
"execution_count": 115,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"extract_terms(text)"
]
}
],
@ -422,7 +652,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.10"
"version": "3.7.9"
},
"subtitle": "3. Terminologia",
"title": "Komputerowe wspomaganie tłumaczenia",

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long