This commit is contained in:
Patryk 2024-04-16 21:12:25 +02:00
parent 2b22583359
commit 9b9e46df22

View File

@ -63,7 +63,7 @@
"id": "diverse-sunglasses", "id": "diverse-sunglasses",
"metadata": {}, "metadata": {},
"source": [ "source": [
"Odpowiedź:" "Odpowiedź: \"metal cabinet guides\". https://translate.google.pl/"
] ]
}, },
{ {
@ -115,7 +115,7 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"dictionary = ['program', 'application', 'applet' 'compile']" "dictionary = ['program', 'application', 'applet', 'compile']"
] ]
}, },
{ {
@ -133,8 +133,18 @@
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"import re\n",
"\n",
"def terminology_lookup():\n", "def terminology_lookup():\n",
" return []" " result = []\n",
" regex = ''\n",
" for word in dictionary:\n",
" if regex != '':\n",
" regex += '|'\n",
" regex += '(' + word + ')'\n",
" for occurrence in re.finditer(regex, text, re.I):\n",
" result.append((occurrence.group(), occurrence.start(), occurrence.end()))\n",
" return result"
] ]
}, },
{ {
@ -308,7 +318,12 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"def terminology_lookup():\n", "def terminology_lookup():\n",
" return []" " result = []\n",
" for token in doc:\n",
" if token.lemma_ in dictionary:\n",
" result.append((token, token.idx, token.idx + len(token)))\n",
"\n",
" return result"
] ]
}, },
{ {
@ -343,7 +358,13 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"def get_nouns(text):\n", "def get_nouns(text):\n",
" return []" " result = []\n",
" doc = nlp(text)\n",
" for token in doc:\n",
" if token.pos_ == 'NOUN':\n",
" result.append(token)\n",
"\n",
" return result"
] ]
}, },
{ {
@ -380,7 +401,16 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"def extract_terms(text):\n", "def extract_terms(text):\n",
" return []" " result = {}\n",
" doc = nlp(text)\n",
" for token in doc:\n",
" if token.pos_ == 'NOUN':\n",
" if result.get(token.lemma_) is None:\n",
" result[token.lemma_] = 1\n",
" else:\n",
" result[token.lemma_] += 1\n",
"\n",
" return result"
] ]
}, },
{ {
@ -399,7 +429,16 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"def extract_terms(text):\n", "def extract_terms(text):\n",
" return []" " result = {}\n",
" doc = nlp(text)\n",
" for token in doc:\n",
" if token.pos_ in ['NOUN', 'VERB', 'ADJ']:\n",
" if result.get(token.lemma_) is None:\n",
" result[token.lemma_] = 1\n",
" else:\n",
" result[token.lemma_] += 1\n",
"\n",
" return result"
] ]
} }
], ],
@ -407,7 +446,7 @@
"author": "Rafał Jaworski", "author": "Rafał Jaworski",
"email": "rjawor@amu.edu.pl", "email": "rjawor@amu.edu.pl",
"kernelspec": { "kernelspec": {
"display_name": "Python 3", "display_name": "Python 3 (ipykernel)",
"language": "python", "language": "python",
"name": "python3" "name": "python3"
}, },
@ -422,7 +461,7 @@
"name": "python", "name": "python",
"nbconvert_exporter": "python", "nbconvert_exporter": "python",
"pygments_lexer": "ipython3", "pygments_lexer": "ipython3",
"version": "3.8.10" "version": "3.9.2"
}, },
"subtitle": "3. Terminologia", "subtitle": "3. Terminologia",
"title": "Komputerowe wspomaganie tłumaczenia", "title": "Komputerowe wspomaganie tłumaczenia",