Laboratoria 13.04.2024

This commit is contained in:
Marek Susniak 2024-04-15 21:15:24 +02:00
parent 957bd22d58
commit 7c845bcf8d
2 changed files with 316 additions and 120 deletions

BIN
.DS_Store vendored Normal file

Binary file not shown.

View File

@ -63,7 +63,7 @@
"id": "diverse-sunglasses", "id": "diverse-sunglasses",
"metadata": {}, "metadata": {},
"source": [ "source": [
"Odpowiedź:" "Odpowiedź: metal cabinet guides lub metal cabinet slides. Skorzystalem z dwoch slownikow oraz duzego modelu jezykowego."
] ]
}, },
{ {
@ -86,7 +86,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 31, "execution_count": 102,
"id": "loving-prince", "id": "loving-prince",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
@ -110,7 +110,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 32, "execution_count": 103,
"id": "bound-auction", "id": "bound-auction",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
@ -128,7 +128,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 33, "execution_count": 104,
"id": "cognitive-cedar", "id": "cognitive-cedar",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
@ -149,7 +149,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 34, "execution_count": 105,
"id": "7cc3ad1f", "id": "7cc3ad1f",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
@ -162,7 +162,7 @@
" ('compile', [(56, 62)])]" " ('compile', [(56, 62)])]"
] ]
}, },
"execution_count": 34, "execution_count": 105,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -195,7 +195,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 35, "execution_count": 106,
"id": "tribal-attention", "id": "tribal-attention",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
@ -203,108 +203,108 @@
"name": "stdout", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
" \n", " 0\n",
"for\n", "For for 1\n",
"all\n", "all all 5\n",
"Java\n", "Java Java 9\n",
"programmer\n", "programmers programmer 14\n",
":\n", ": : 25\n",
"this\n", "This this 27\n",
"section\n", "section section 32\n",
"explain\n", "explains explain 40\n",
"how\n", "how how 49\n",
"to\n", "to to 53\n",
"compile\n", "compile compile 56\n",
"and\n", "and and 64\n",
"run\n", "run run 68\n",
"a\n", "a a 72\n",
"swing\n", "Swing swing 74\n",
"application\n", "application application 80\n",
"from\n", "from from 92\n",
"the\n", "the the 97\n",
"command\n", "command command 101\n",
"line\n", "line line 109\n",
".\n", ". . 113\n",
"for\n", "For for 115\n",
"information\n", "information information 119\n",
"on\n", "on on 131\n",
"compile\n", "compiling compile 134\n",
"and\n", "and and 144\n",
"run\n", "running run 148\n",
"a\n", "a a 156\n",
"swing\n", "Swing swing 158\n",
"application\n", "application application 164\n",
"use\n", "using use 176\n",
"NetBeans\n", "NetBeans NetBeans 182\n",
"IDE\n", "IDE IDE 191\n",
",\n", ", , 194\n",
"see\n", "see see 196\n",
"run\n", "Running run 200\n",
"Tutorial\n", "Tutorial Tutorial 208\n",
"Examples\n", "Examples Examples 217\n",
"in\n", "in in 226\n",
"NetBeans\n", "NetBeans NetBeans 229\n",
"IDE\n", "IDE IDE 238\n",
".\n", ". . 241\n",
"the\n", "The the 243\n",
"compilation\n", "compilation compilation 247\n",
"instruction\n", "instructions instruction 259\n",
"work\n", "work work 272\n",
"for\n", "for for 277\n",
"all\n", "all all 281\n",
"Swing\n", "Swing Swing 285\n",
"program\n", "programs program 291\n",
"—\n", "— — 300\n",
"applet\n", "applets applet 302\n",
",\n", ", , 309\n",
"as\n", "as as 311\n",
"well\n", "well well 314\n",
"as\n", "as as 319\n",
"application\n", "applications application 322\n",
".\n", ". . 334\n",
"here\n", "Here here 336\n",
"be\n", "are be 341\n",
"the\n", "the the 345\n",
"step\n", "steps step 349\n",
"you\n", "you you 355\n",
"need\n", "need need 359\n",
"to\n", "to to 364\n",
"follow\n", "follow follow 367\n",
":\n", ": : 373\n",
"install\n", "Install install 375\n",
"the\n", "the the 383\n",
"late\n", "latest late 387\n",
"release\n", "release release 394\n",
"of\n", "of of 402\n",
"the\n", "the the 405\n",
"Java\n", "Java Java 409\n",
"SE\n", "SE SE 414\n",
"platform\n", "platform platform 417\n",
",\n", ", , 425\n",
"if\n", "if if 427\n",
"you\n", "you you 430\n",
"have\n", "have have 434\n",
"not\n", "n't not 438\n",
"already\n", "already already 442\n",
"do\n", "done do 450\n",
"so\n", "so so 455\n",
".\n", ". . 457\n",
"create\n", "Create create 459\n",
"a\n", "a a 466\n",
"program\n", "program program 468\n",
"that\n", "that that 476\n",
"use\n", "uses use 481\n",
"swing\n", "Swing swing 486\n",
"component\n", "components component 492\n",
".\n", ". . 502\n",
"compile\n", "Compile compile 504\n",
"the\n", "the the 512\n",
"program\n", "program program 516\n",
".\n", ". . 523\n",
"run\n", "Run run 525\n",
"the\n", "the the 529\n",
"program\n", "program program 533\n",
".\n" ". . 540\n"
] ]
} }
], ],
@ -315,7 +315,7 @@
"doc = nlp(text)\n", "doc = nlp(text)\n",
"\n", "\n",
"for token in doc:\n", "for token in doc:\n",
" print(token.lemma_)" " print(token, token.lemma_, token.idx)"
] ]
}, },
{ {
@ -336,13 +336,50 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 36, "execution_count": 107,
"id": "surgical-demonstration", "id": "surgical-demonstration",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"def terminology_lookup():\n", "import spacy\n",
" return []" "nlp = spacy.load(\"en_core_web_sm\")\n",
"\n",
"\n",
"def terminology_lookup(txt, labels):\n",
" result = {};\n",
" doc = nlp(txt)\n",
"\n",
" for token in doc:\n",
" if token.lemma_ in labels: \n",
" if token.lemma_ not in result:\n",
" result[token.lemma_] = []\n",
" result[token.lemma_].append((token.idx, token.idx + len(token)))\n",
"\n",
" return result"
]
},
{
"cell_type": "code",
"execution_count": 108,
"id": "4772c1b1",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'compile': [(56, 63), (134, 143), (504, 511)],\n",
" 'application': [(80, 91), (164, 175), (322, 334)],\n",
" 'program': [(291, 299), (468, 475), (516, 523), (533, 540)],\n",
" 'applet': [(302, 309)]}"
]
},
"execution_count": 108,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"terminology_lookup(text, dictionary)"
] ]
}, },
{ {
@ -371,13 +408,56 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 37, "execution_count": 109,
"id": "superb-butterfly", "id": "superb-butterfly",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"def get_nouns(text):\n", "def get_nouns(text):\n",
" return []" " doc = nlp(text)\n",
" return [token.lemma_ for token in doc if token.pos_ == 'NOUN']"
]
},
{
"cell_type": "code",
"execution_count": 110,
"id": "3c916a3e",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['programmer',\n",
" 'section',\n",
" 'swing',\n",
" 'application',\n",
" 'command',\n",
" 'line',\n",
" 'information',\n",
" 'swing',\n",
" 'application',\n",
" 'compilation',\n",
" 'instruction',\n",
" 'program',\n",
" 'applet',\n",
" 'application',\n",
" 'step',\n",
" 'release',\n",
" 'platform',\n",
" 'program',\n",
" 'swing',\n",
" 'component',\n",
" 'program',\n",
" 'program']"
]
},
"execution_count": 110,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"get_nouns(text)"
] ]
}, },
{ {
@ -390,7 +470,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 38, "execution_count": 111,
"id": "acting-tolerance", "id": "acting-tolerance",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
@ -408,13 +488,57 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 39, "execution_count": 112,
"id": "eight-redhead", "id": "eight-redhead",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"def count_words(words):\n",
" word_count = {}\n",
" for word in words:\n",
" if word in word_count:\n",
" word_count[word] += 1\n",
" else:\n",
" word_count[word] = 1\n",
" return word_count\n",
"\n",
"def extract_terms(text):\n", "def extract_terms(text):\n",
" return []" " return count_words(get_nouns(text))"
]
},
{
"cell_type": "code",
"execution_count": 113,
"id": "374550d8",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'programmer': 1,\n",
" 'section': 1,\n",
" 'swing': 3,\n",
" 'application': 3,\n",
" 'command': 1,\n",
" 'line': 1,\n",
" 'information': 1,\n",
" 'compilation': 1,\n",
" 'instruction': 1,\n",
" 'program': 4,\n",
" 'applet': 1,\n",
" 'step': 1,\n",
" 'release': 1,\n",
" 'platform': 1,\n",
" 'component': 1}"
]
},
"execution_count": 113,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"extract_terms(text)"
] ]
}, },
{ {
@ -427,13 +551,85 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 40, "execution_count": 114,
"id": "monetary-mambo", "id": "monetary-mambo",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"def get_verbs(text):\n",
" doc = nlp(text)\n",
" return [token.lemma_ for token in doc if token.pos_ == 'VERB']\n",
"\n",
"def get_adjectives(text):\n",
" doc = nlp(text)\n",
" return [token.lemma_ for token in doc if token.pos_ == 'ADJ']\n",
"\n",
"def extract_terms(text):\n", "def extract_terms(text):\n",
" return []" " return {\n",
" \"nouns\": get_nouns(text),\n",
" \"verbs\": get_verbs(text),\n",
" \"adjectives\": get_adjectives(text)\n",
" }"
]
},
{
"cell_type": "code",
"execution_count": 115,
"id": "95494ac9",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'nouns': ['programmer',\n",
" 'section',\n",
" 'swing',\n",
" 'application',\n",
" 'command',\n",
" 'line',\n",
" 'information',\n",
" 'swing',\n",
" 'application',\n",
" 'compilation',\n",
" 'instruction',\n",
" 'program',\n",
" 'applet',\n",
" 'application',\n",
" 'step',\n",
" 'release',\n",
" 'platform',\n",
" 'program',\n",
" 'swing',\n",
" 'component',\n",
" 'program',\n",
" 'program'],\n",
" 'verbs': ['explain',\n",
" 'compile',\n",
" 'run',\n",
" 'compile',\n",
" 'run',\n",
" 'use',\n",
" 'see',\n",
" 'run',\n",
" 'work',\n",
" 'need',\n",
" 'follow',\n",
" 'install',\n",
" 'do',\n",
" 'create',\n",
" 'use',\n",
" 'compile',\n",
" 'run'],\n",
" 'adjectives': ['late']}"
]
},
"execution_count": 115,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"extract_terms(text)"
] ]
} }
], ],