forked from bfijalkowski/KWT-2024
lab 3
This commit is contained in:
parent
2b22583359
commit
9b9e46df22
@ -63,7 +63,7 @@
|
||||
"id": "diverse-sunglasses",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Odpowiedź:"
|
||||
"Odpowiedź: \"metal cabinet guides\". https://translate.google.pl/"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -115,7 +115,7 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"dictionary = ['program', 'application', 'applet' 'compile']"
|
||||
"dictionary = ['program', 'application', 'applet', 'compile']"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -133,8 +133,18 @@
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import re\n",
|
||||
"\n",
|
||||
"def terminology_lookup():\n",
|
||||
" return []"
|
||||
" result = []\n",
|
||||
" regex = ''\n",
|
||||
" for word in dictionary:\n",
|
||||
" if regex != '':\n",
|
||||
" regex += '|'\n",
|
||||
" regex += '(' + word + ')'\n",
|
||||
" for occurrence in re.finditer(regex, text, re.I):\n",
|
||||
" result.append((occurrence.group(), occurrence.start(), occurrence.end()))\n",
|
||||
" return result"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -308,7 +318,12 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def terminology_lookup():\n",
|
||||
" return []"
|
||||
" result = []\n",
|
||||
" for token in doc:\n",
|
||||
" if token.lemma_ in dictionary:\n",
|
||||
" result.append((token, token.idx, token.idx + len(token)))\n",
|
||||
"\n",
|
||||
" return result"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -343,7 +358,13 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def get_nouns(text):\n",
|
||||
" return []"
|
||||
" result = []\n",
|
||||
" doc = nlp(text)\n",
|
||||
" for token in doc:\n",
|
||||
" if token.pos_ == 'NOUN':\n",
|
||||
" result.append(token)\n",
|
||||
"\n",
|
||||
" return result"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -380,7 +401,16 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def extract_terms(text):\n",
|
||||
" return []"
|
||||
" result = {}\n",
|
||||
" doc = nlp(text)\n",
|
||||
" for token in doc:\n",
|
||||
" if token.pos_ == 'NOUN':\n",
|
||||
" if result.get(token.lemma_) is None:\n",
|
||||
" result[token.lemma_] = 1\n",
|
||||
" else:\n",
|
||||
" result[token.lemma_] += 1\n",
|
||||
"\n",
|
||||
" return result"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -399,7 +429,16 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def extract_terms(text):\n",
|
||||
" return []"
|
||||
" result = {}\n",
|
||||
" doc = nlp(text)\n",
|
||||
" for token in doc:\n",
|
||||
" if token.pos_ in ['NOUN', 'VERB', 'ADJ']:\n",
|
||||
" if result.get(token.lemma_) is None:\n",
|
||||
" result[token.lemma_] = 1\n",
|
||||
" else:\n",
|
||||
" result[token.lemma_] += 1\n",
|
||||
"\n",
|
||||
" return result"
|
||||
]
|
||||
}
|
||||
],
|
||||
@ -407,7 +446,7 @@
|
||||
"author": "Rafał Jaworski",
|
||||
"email": "rjawor@amu.edu.pl",
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
@ -422,7 +461,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.8.10"
|
||||
"version": "3.9.2"
|
||||
},
|
||||
"subtitle": "3. Terminologia",
|
||||
"title": "Komputerowe wspomaganie tłumaczenia",
|
||||
|
Loading…
Reference in New Issue
Block a user