From 9b9e46df221063a31583ed657d2779717125f2a1 Mon Sep 17 00:00:00 2001 From: Patryk Date: Tue, 16 Apr 2024 21:12:25 +0200 Subject: [PATCH] lab 3 --- lab/lab_03.ipynb | 57 ++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 48 insertions(+), 9 deletions(-) diff --git a/lab/lab_03.ipynb b/lab/lab_03.ipynb index 5707f0d..d379ce0 100644 --- a/lab/lab_03.ipynb +++ b/lab/lab_03.ipynb @@ -63,7 +63,7 @@ "id": "diverse-sunglasses", "metadata": {}, "source": [ - "Odpowiedź:" + "Odpowiedź: \"metal cabinet guides\". https://translate.google.pl/" ] }, { @@ -115,7 +115,7 @@ "metadata": {}, "outputs": [], "source": [ - "dictionary = ['program', 'application', 'applet' 'compile']" + "dictionary = ['program', 'application', 'applet', 'compile']" ] }, { @@ -133,8 +133,18 @@ "metadata": {}, "outputs": [], "source": [ + "import re\n", + "\n", "def terminology_lookup():\n", - " return []" + " result = []\n", + " regex = ''\n", + " for word in dictionary:\n", + " if regex != '':\n", + " regex += '|'\n", + " regex += '(' + word + ')'\n", + " for occurrence in re.finditer(regex, text, re.I):\n", + " result.append((occurrence.group(), occurrence.start(), occurrence.end()))\n", + " return result" ] }, { @@ -308,7 +318,12 @@ "outputs": [], "source": [ "def terminology_lookup():\n", - " return []" + " result = []\n", + " for token in doc:\n", + " if token.lemma_ in dictionary:\n", + " result.append((token, token.idx, token.idx + len(token)))\n", + "\n", + " return result" ] }, { @@ -343,7 +358,13 @@ "outputs": [], "source": [ "def get_nouns(text):\n", - " return []" + " result = []\n", + " doc = nlp(text)\n", + " for token in doc:\n", + " if token.pos_ == 'NOUN':\n", + " result.append(token)\n", + "\n", + " return result" ] }, { @@ -380,7 +401,16 @@ "outputs": [], "source": [ "def extract_terms(text):\n", - " return []" + " result = {}\n", + " doc = nlp(text)\n", + " for token in doc:\n", + " if token.pos_ == 'NOUN':\n", + " if result.get(token.lemma_) is None:\n", + " result[token.lemma_] = 1\n", + " else:\n", + " result[token.lemma_] += 1\n", + "\n", + " return result" ] }, { @@ -399,7 +429,16 @@ "outputs": [], "source": [ "def extract_terms(text):\n", - " return []" + " result = {}\n", + " doc = nlp(text)\n", + " for token in doc:\n", + " if token.pos_ in ['NOUN', 'VERB', 'ADJ']:\n", + " if result.get(token.lemma_) is None:\n", + " result[token.lemma_] = 1\n", + " else:\n", + " result[token.lemma_] += 1\n", + "\n", + " return result" ] } ], @@ -407,7 +446,7 @@ "author": "Rafał Jaworski", "email": "rjawor@amu.edu.pl", "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -422,7 +461,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.10" + "version": "3.9.2" }, "subtitle": "3. Terminologia", "title": "Komputerowe wspomaganie tłumaczenia",