added notes

2024-05-05 08:21:38 +02:00 · 2024-05-05 08:21:38 +02:00 · 58e6e90d2a
commit 58e6e90d2a
parent d824ffc9d9
1 changed files with 7 additions and 24 deletions
--- a/lab/lab_08.ipynb
+++ b/lab/lab_08.ipynb
@ -317,24 +317,10 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 4,
   "id": "descending-easter",
   "metadata": {},
-   "outputs": [
-    {
-     "ename": "NameError",
-     "evalue": "name 'remove_punctuation' is not defined",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
-      "Cell \u001b[0;32mIn[1], line 45\u001b[0m\n\u001b[1;32m     38\u001b[0m     \u001b[38;5;28mprint\u001b[39m(human_sum)\n\u001b[1;32m     41\u001b[0m         \u001b[38;5;66;03m# tranlsations = [PyDictionary().translate(word, 'de') for word in element]\u001b[39;00m\n\u001b[0;32m---> 45\u001b[0m analyze_translations()\n",
-      "Cell \u001b[0;32mIn[1], line 19\u001b[0m, in \u001b[0;36manalyze_translations\u001b[0;34m()\u001b[0m\n\u001b[1;32m     17\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21manalyze_translations\u001b[39m():\n\u001b[1;32m     18\u001b[0m     ourZip \u001b[38;5;241m=\u001b[39m zipfile\u001b[38;5;241m.\u001b[39mZipFile(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mdata/corpus_corrected.zip\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[0;32m---> 19\u001b[0m     files \u001b[38;5;241m=\u001b[39m {name: remove_punctuation(ourZip\u001b[38;5;241m.\u001b[39mread(name)\u001b[38;5;241m.\u001b[39mdecode(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mutf-8\u001b[39m\u001b[38;5;124m'\u001b[39m))\n\u001b[1;32m     20\u001b[0m             \u001b[38;5;28;01mfor\u001b[39;00m name \u001b[38;5;129;01min\u001b[39;00m ourZip\u001b[38;5;241m.\u001b[39mnamelist()}\n\u001b[1;32m     22\u001b[0m     corpus_de_human, corpus_de_nmt, corpus_en \u001b[38;5;241m=\u001b[39m files[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mcorpus_de_human.txt\u001b[39m\u001b[38;5;124m'\u001b[39m], files[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mcorpus_de_nmt.txt\u001b[39m\u001b[38;5;124m'\u001b[39m], files[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mcorpus_en.txt\u001b[39m\u001b[38;5;124m'\u001b[39m]\n\u001b[1;32m     24\u001b[0m     nmt_sum \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m0\u001b[39m\n",
-      "Cell \u001b[0;32mIn[1], line 19\u001b[0m, in \u001b[0;36m<dictcomp>\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m     17\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21manalyze_translations\u001b[39m():\n\u001b[1;32m     18\u001b[0m     ourZip \u001b[38;5;241m=\u001b[39m zipfile\u001b[38;5;241m.\u001b[39mZipFile(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mdata/corpus_corrected.zip\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[0;32m---> 19\u001b[0m     files \u001b[38;5;241m=\u001b[39m {name: remove_punctuation(ourZip\u001b[38;5;241m.\u001b[39mread(name)\u001b[38;5;241m.\u001b[39mdecode(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mutf-8\u001b[39m\u001b[38;5;124m'\u001b[39m))\n\u001b[1;32m     20\u001b[0m             \u001b[38;5;28;01mfor\u001b[39;00m name \u001b[38;5;129;01min\u001b[39;00m ourZip\u001b[38;5;241m.\u001b[39mnamelist()}\n\u001b[1;32m     22\u001b[0m     corpus_de_human, corpus_de_nmt, corpus_en \u001b[38;5;241m=\u001b[39m files[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mcorpus_de_human.txt\u001b[39m\u001b[38;5;124m'\u001b[39m], files[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mcorpus_de_nmt.txt\u001b[39m\u001b[38;5;124m'\u001b[39m], files[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mcorpus_en.txt\u001b[39m\u001b[38;5;124m'\u001b[39m]\n\u001b[1;32m     24\u001b[0m     nmt_sum \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m0\u001b[39m\n",
-      "\u001b[0;31mNameError\u001b[0m: name 'remove_punctuation' is not defined"
-     ]
-    }
-   ],
+   "outputs": [],
   "source": [
    "from PyDictionary import PyDictionary\n",
    "import zipfile\n",
@ -365,22 +351,19 @@
    "    for human_element, nmt_element, element  in zip(corpus_de_human, corpus_de_nmt, corpus_en):\n",
    "        transalted_words = transalate(element)\n",
    "\n",
-    "        # words = set(re.findall(r'\\w+', nmt_element.lower()))\n",
    "        nmt_sum += sum(1 for word in nmt_element if transalted_words.get(word.lower()))\n",
    "\n",
-    "        # words = set(re.findall(r'\\w+', human_element.lower()))\n",
    "        human_sum += sum(1 for word in human_element if transalted_words.get(word.lower()))\n",
    "\n",
    "\n",
    "    print(nmt_sum)\n",
    "    print(human_sum)\n",
    "\n",
-    "\n",
-    "        # tranlsations = [PyDictionary().translate(word, 'de') for word in element]\n",
-    "        \n",
-    "\n",
-    "\n",
-    "analyze_translations()"
+    "#I think the PyDictionary mode doesn't work, the info from https://github.com/geekpradd/PyDictionary\n",
+    "#NOTE: Mainintaing this module requires constantly changing the scrapping endpoints which unfortunately I no longer have the bandwidth to do so, so this module is DEPRECATED. Kindly use other substitutes available on PyPI. Thanks!\n",
+    "#PyDictionary is a Dictionary Module for Python 2/3 to get meanings, translations, synonyms and Antonyms of words. It uses WordNet for getting meanings, Google for translations, and synonym.com for getting synonyms and antonyms.\n",
+    "#This module uses Python Requests, BeautifulSoup4 and goslate as dependencies\n",
+    "        \n"
   ]
  }
 ],