From 58e6e90d2a0e233f223128743f07d7a861310f37 Mon Sep 17 00:00:00 2001 From: Adam Stelmaszyk Date: Sun, 5 May 2024 08:21:38 +0200 Subject: [PATCH] added notes --- lab/lab_08.ipynb | 31 +++++++------------------------ 1 file changed, 7 insertions(+), 24 deletions(-) diff --git a/lab/lab_08.ipynb b/lab/lab_08.ipynb index 18d2e4d..c281113 100644 --- a/lab/lab_08.ipynb +++ b/lab/lab_08.ipynb @@ -317,24 +317,10 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 4, "id": "descending-easter", "metadata": {}, - "outputs": [ - { - "ename": "NameError", - "evalue": "name 'remove_punctuation' is not defined", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[1], line 45\u001b[0m\n\u001b[1;32m 38\u001b[0m \u001b[38;5;28mprint\u001b[39m(human_sum)\n\u001b[1;32m 41\u001b[0m \u001b[38;5;66;03m# tranlsations = [PyDictionary().translate(word, 'de') for word in element]\u001b[39;00m\n\u001b[0;32m---> 45\u001b[0m analyze_translations()\n", - "Cell \u001b[0;32mIn[1], line 19\u001b[0m, in \u001b[0;36manalyze_translations\u001b[0;34m()\u001b[0m\n\u001b[1;32m 17\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21manalyze_translations\u001b[39m():\n\u001b[1;32m 18\u001b[0m ourZip \u001b[38;5;241m=\u001b[39m zipfile\u001b[38;5;241m.\u001b[39mZipFile(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mdata/corpus_corrected.zip\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[0;32m---> 19\u001b[0m files \u001b[38;5;241m=\u001b[39m {name: remove_punctuation(ourZip\u001b[38;5;241m.\u001b[39mread(name)\u001b[38;5;241m.\u001b[39mdecode(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mutf-8\u001b[39m\u001b[38;5;124m'\u001b[39m))\n\u001b[1;32m 20\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m name \u001b[38;5;129;01min\u001b[39;00m ourZip\u001b[38;5;241m.\u001b[39mnamelist()}\n\u001b[1;32m 22\u001b[0m corpus_de_human, corpus_de_nmt, corpus_en \u001b[38;5;241m=\u001b[39m files[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mcorpus_de_human.txt\u001b[39m\u001b[38;5;124m'\u001b[39m], files[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mcorpus_de_nmt.txt\u001b[39m\u001b[38;5;124m'\u001b[39m], files[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mcorpus_en.txt\u001b[39m\u001b[38;5;124m'\u001b[39m]\n\u001b[1;32m 24\u001b[0m nmt_sum \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m0\u001b[39m\n", - "Cell \u001b[0;32mIn[1], line 19\u001b[0m, in \u001b[0;36m\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m 17\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21manalyze_translations\u001b[39m():\n\u001b[1;32m 18\u001b[0m ourZip \u001b[38;5;241m=\u001b[39m zipfile\u001b[38;5;241m.\u001b[39mZipFile(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mdata/corpus_corrected.zip\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[0;32m---> 19\u001b[0m files \u001b[38;5;241m=\u001b[39m {name: remove_punctuation(ourZip\u001b[38;5;241m.\u001b[39mread(name)\u001b[38;5;241m.\u001b[39mdecode(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mutf-8\u001b[39m\u001b[38;5;124m'\u001b[39m))\n\u001b[1;32m 20\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m name \u001b[38;5;129;01min\u001b[39;00m ourZip\u001b[38;5;241m.\u001b[39mnamelist()}\n\u001b[1;32m 22\u001b[0m corpus_de_human, corpus_de_nmt, corpus_en \u001b[38;5;241m=\u001b[39m files[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mcorpus_de_human.txt\u001b[39m\u001b[38;5;124m'\u001b[39m], files[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mcorpus_de_nmt.txt\u001b[39m\u001b[38;5;124m'\u001b[39m], files[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mcorpus_en.txt\u001b[39m\u001b[38;5;124m'\u001b[39m]\n\u001b[1;32m 24\u001b[0m nmt_sum \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m0\u001b[39m\n", - "\u001b[0;31mNameError\u001b[0m: name 'remove_punctuation' is not defined" - ] - } - ], + "outputs": [], "source": [ "from PyDictionary import PyDictionary\n", "import zipfile\n", @@ -365,22 +351,19 @@ " for human_element, nmt_element, element in zip(corpus_de_human, corpus_de_nmt, corpus_en):\n", " transalted_words = transalate(element)\n", "\n", - " # words = set(re.findall(r'\\w+', nmt_element.lower()))\n", " nmt_sum += sum(1 for word in nmt_element if transalted_words.get(word.lower()))\n", "\n", - " # words = set(re.findall(r'\\w+', human_element.lower()))\n", " human_sum += sum(1 for word in human_element if transalted_words.get(word.lower()))\n", "\n", "\n", " print(nmt_sum)\n", " print(human_sum)\n", "\n", - "\n", - " # tranlsations = [PyDictionary().translate(word, 'de') for word in element]\n", - " \n", - "\n", - "\n", - "analyze_translations()" + "#I think the PyDictionary mode doesn't work, the info from https://github.com/geekpradd/PyDictionary\n", + "#NOTE: Mainintaing this module requires constantly changing the scrapping endpoints which unfortunately I no longer have the bandwidth to do so, so this module is DEPRECATED. Kindly use other substitutes available on PyPI. Thanks!\n", + "#PyDictionary is a Dictionary Module for Python 2/3 to get meanings, translations, synonyms and Antonyms of words. It uses WordNet for getting meanings, Google for translations, and synonym.com for getting synonyms and antonyms.\n", + "#This module uses Python Requests, BeautifulSoup4 and goslate as dependencies\n", + " \n" ] } ],