From 80333aca0ac58598c8c9741d8ad86d71c287ff66 Mon Sep 17 00:00:00 2001 From: Jakub Pokrywka Date: Wed, 16 Dec 2020 01:01:20 +0100 Subject: [PATCH] pretty view --- sentiment_analysis_embed_ff.ipynb | 250 ++++++++++++++++-------------- word2vec.ipynb | 56 +++++-- 2 files changed, 181 insertions(+), 125 deletions(-) diff --git a/sentiment_analysis_embed_ff.ipynb b/sentiment_analysis_embed_ff.ipynb index b9a65ce..30fcbfa 100644 --- a/sentiment_analysis_embed_ff.ipynb +++ b/sentiment_analysis_embed_ff.ipynb @@ -14,11 +14,23 @@ "cell_type": "code", "execution_count": 2, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ - "#conda install torchtext -c pytorch\n", - "#conda install spacy\n", - "#python -m spacy download en" + "from IPython.core.display import display, HTML\n", + "display(HTML(\"\"))" ] }, { @@ -27,14 +39,91 @@ "metadata": { "scrolled": true }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Collecting package metadata (current_repodata.json): done\n", + "Solving environment: done\n", + "\n", + "\n", + "==> WARNING: A newer version of conda exists. <==\n", + " current version: 4.8.3\n", + " latest version: 4.9.2\n", + "\n", + "Please update conda by running\n", + "\n", + " $ conda update -n base -c defaults conda\n", + "\n", + "\n", + "\n", + "# All requested packages already installed.\n", + "\n", + "Collecting package metadata (current_repodata.json): done\n", + "Solving environment: done\n", + "\n", + "\n", + "==> WARNING: A newer version of conda exists. <==\n", + " current version: 4.8.3\n", + " latest version: 4.9.2\n", + "\n", + "Please update conda by running\n", + "\n", + " $ conda update -n base -c defaults conda\n", + "\n", + "\n", + "\n", + "# All requested packages already installed.\n", + "\n", + "Requirement already satisfied: en_core_web_sm==2.3.1 from https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-2.3.1/en_core_web_sm-2.3.1.tar.gz#egg=en_core_web_sm==2.3.1 in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (2.3.1)\n", + "Requirement already satisfied: spacy<2.4.0,>=2.3.0 in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from en_core_web_sm==2.3.1) (2.3.2)\n", + "Requirement already satisfied: cymem<2.1.0,>=2.0.2 in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (2.0.4)\n", + "Requirement already satisfied: preshed<3.1.0,>=3.0.2 in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (3.0.2)\n", + "Requirement already satisfied: requests<3.0.0,>=2.13.0 in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (2.25.0)\n", + "Requirement already satisfied: wasabi<1.1.0,>=0.4.0 in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (0.8.0)\n", + "Requirement already satisfied: numpy>=1.15.0 in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (1.19.2)\n", + "Requirement already satisfied: murmurhash<1.1.0,>=0.28.0 in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (1.0.5)\n", + "Requirement already satisfied: srsly<1.1.0,>=1.0.2 in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (1.0.5)\n", + "Requirement already satisfied: catalogue<1.1.0,>=0.0.7 in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (1.0.0)\n", + "Requirement already satisfied: tqdm<5.0.0,>=4.38.0 in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (4.54.1)\n", + "Requirement already satisfied: plac<1.2.0,>=0.9.6 in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (0.9.6)\n", + "Requirement already satisfied: blis<0.5.0,>=0.4.0 in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (0.4.1)\n", + "Requirement already satisfied: setuptools in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (50.3.1.post20201107)\n", + "Requirement already satisfied: thinc==7.4.1 in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (7.4.1)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from requests<3.0.0,>=2.13.0->spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (2020.12.5)\n", + "Requirement already satisfied: chardet<4,>=3.0.2 in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from requests<3.0.0,>=2.13.0->spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (3.0.4)\n", + "Requirement already satisfied: idna<3,>=2.5 in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from requests<3.0.0,>=2.13.0->spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (2.10)\n", + "Requirement already satisfied: urllib3<1.27,>=1.21.1 in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from requests<3.0.0,>=2.13.0->spacy<2.4.0,>=2.3.0->en_core_web_sm==2.3.1) (1.25.11)\n", + "\u001b[38;5;2m✔ Download and installation successful\u001b[0m\n", + "You can now load the model via spacy.load('en_core_web_sm')\n", + "\u001b[38;5;2m✔ Linking successful\u001b[0m\n", + "/home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages/en_core_web_sm -->\n", + "/home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages/spacy/data/en\n", + "You can now load the model via spacy.load('en')\n" + ] + } + ], + "source": [ + "!conda install torchtext -c pytorch -y\n", + "!conda install spacy -y\n", + "!python -m spacy download en" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "scrolled": true + }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "/media/kuba/ssd/anaconda3/envs/tau/lib/python3.8/site-packages/torchtext/data/field.py:150: UserWarning: Field class will be retired soon and moved to torchtext.legacy. Please see the most recent release notes for further information.\n", + "/home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages/torchtext/data/field.py:150: UserWarning: Field class will be retired soon and moved to torchtext.legacy. Please see the most recent release notes for further information.\n", " warnings.warn('{} class will be retired soon and moved to torchtext.legacy. Please see the most recent release notes for further information.'.format(self.__class__.__name__), UserWarning)\n", - "/media/kuba/ssd/anaconda3/envs/tau/lib/python3.8/site-packages/torchtext/data/field.py:150: UserWarning: LabelField class will be retired soon and moved to torchtext.legacy. Please see the most recent release notes for further information.\n", + "/home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages/torchtext/data/field.py:150: UserWarning: LabelField class will be retired soon and moved to torchtext.legacy. Please see the most recent release notes for further information.\n", " warnings.warn('{} class will be retired soon and moved to torchtext.legacy. Please see the most recent release notes for further information.'.format(self.__class__.__name__), UserWarning)\n" ] } @@ -55,14 +144,14 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "/media/kuba/ssd/anaconda3/envs/tau/lib/python3.8/site-packages/torchtext/data/example.py:78: UserWarning: Example class will be retired soon and moved to torchtext.legacy. Please see the most recent release notes for further information.\n", + "/home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages/torchtext/data/example.py:78: UserWarning: Example class will be retired soon and moved to torchtext.legacy. Please see the most recent release notes for further information.\n", " warnings.warn('Example class will be retired soon and moved to torchtext.legacy. Please see the most recent release notes for further information.', UserWarning)\n" ] } @@ -77,7 +166,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 6, "metadata": { "scrolled": true }, @@ -100,7 +189,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 7, "metadata": { "scrolled": true }, @@ -109,7 +198,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "{'text': ['...', 'through', 'the', 'similarly', 'minded', 'antics', 'of', 'Eric', 'Stanze', '.', 'A', 'not', '-', 'particularly', 'talented', 'director', 'has', 'helmed', 'a', 'not', '-', 'particularly', 'good', 'movie', ',', 'yet', 'I', 'still', 'found', 'myself', 'sitting', 'through', 'it', 'to', 'the', 'closing', 'credits', ',', 'if', 'for', 'nothing', 'more', 'than', 'to', 'see', 'what', 'happens', 'next.A', 'rapist', 'escapes', 'from', 'prison', 'and', 'calls', 'up', 'his', 'old', 'flame', '.', 'After', 'capturing', 'her', '(', 'even', 'though', 'she', 'came', 'willingly', ')', 'and', 'threatening', 'her', 'into', 'having', 'sex', '(', 'another', 'event', 'she', 'was', 'also', 'willing', 'to', 'do', ')', 'he', 'reveals', 'that', 'he', 'has', 'kidnapped', 'three', 'guys', 'who', 'wronged', 'her', 'in', 'the', 'past', '.', 'He', 'then', 'decides', 'to', 'kill', 'her', '(', 'huh', '?', ')', 'but', 'is', 'foiled', 'and', 'dies', 'instead', '.', 'The', 'girl', \"'s\", 'mind', 'snaps', '(', 'or', 'something', 'like', 'that', ')', 'and', 'she', 'takes', 'out', 'her', 'rage', 'on', 'the', 'unlucky', 'chaps', 'in', 'the', 'basement.Alright', ',', 'the', 'writing', 'sucks', ':', 'it', \"'s\", 'long', 'winded', ',', 'loaded', 'with', 'ten', '-', 'cent', 'words', 'and', 'there', 'is', 'WAY', 'too', 'much', 'of', 'it.The', 'acting', 'sucks', ':', 'what', 'a', 'minute', ',', 'what', 'acting', '?', '<', 'br', '/>The', 'filming', 'sucks', ':', 'home', 'video', 'is', 'bad', 'enough', ',', 'but', '20', 'minutes', 'of', 'graveyard', 'footage', 'is', 'just', 'a', 'damn', 'insult.And', 'the', 'budget', 'is', 'a', 'joke', ':', 'get', 'it', '...', \"'budget\", \"'\", ',', 'that', 'was', 'the', 'punchline.And', 'yet', 'there', 'was', 'a', 'charm', 'to', 'the', 'thing', '.', 'Back', 'in', 'the', '70', \"'s\", 'these', 'kind', 'of', 'movies', 'came', 'out', 'in', 'theatres', 'with', 'actual', 'budgets', 'and', 'talent', 'attached', 'to', 'them', ',', 'not', 'in', 'this', 'day', 'and', 'age', 'though', '.', 'If', 'you', 'want', 'to', 'watch', 'this', 'kind', 'of', 'violent', ',', 'sexually', 'exploitive', 'trash', '(', 'do', \"n't\", 'lie', ',', 'some', 'of', 'us', 'do', ')', 'then', 'this', 'is', 'all', 'your', 'gon', 'na', 'get', 'nowadays.Some', 'brief', 'hardcore', 'shots', 'in', 'a', 'sex', 'scene', ',', 'torture', 'with', 'fecal', 'material', ',', 'fun', 'with', 'axes', ',', 'anal', 'rape', 'by', 'broom', 'stick', 'and', 'a', 'lengthy', 'shot', 'of', 'the', 'crazy', 'chick', 'masturbating', 'with', 'the', 'same', 'broom', 'stick', 'are', 'some', 'of', 'the', 'better', 'items', 'on', 'the', 'menu.It', \"'s\", 'not', 'good', 'and', 'it', 'wo', \"n't\", 'be', 'remembered', ',', 'but', 'not', 'since', 'the', 'heyday', 'of', 'Joe', \"D'amato\", 'have', 'people', 'made', 'movies', 'like', 'this.4/10'], 'label': 'neg'}\n" + "{'text': ['Why', 'do', 'people', 'who', 'do', 'not', 'know', 'what', 'a', 'particular', 'time', 'in', 'the', 'past', 'was', 'like', 'feel', 'the', 'need', 'to', 'try', 'to', 'define', 'that', 'time', 'for', 'others', '?', 'Replace', 'Woodstock', 'with', 'the', 'Civil', 'War', 'and', 'the', 'Apollo', 'moon', '-', 'landing', 'with', 'the', 'Titanic', 'sinking', 'and', 'you', \"'ve\", 'got', 'as', 'realistic', 'a', 'flick', 'as', 'this', 'formulaic', 'soap', 'opera', 'populated', 'entirely', 'by', 'low', '-', 'life', 'trash', '.', 'Is', 'this', 'what', 'kids', 'who', 'were', 'too', 'young', 'to', 'be', 'allowed', 'to', 'go', 'to', 'Woodstock', 'and', 'who', 'failed', 'grade', 'school', 'composition', 'do', '?', '\"', 'I', \"'ll\", 'show', 'those', 'old', 'meanies', ',', 'I', \"'ll\", 'put', 'out', 'my', 'own', 'movie', 'and', 'prove', 'that', 'you', 'do', \"n't\", 'have', 'to', 'know', 'nuttin', 'about', 'your', 'topic', 'to', 'still', 'make', 'money', '!', '\"', 'Yeah', ',', 'we', 'already', 'know', 'that', '.', 'The', 'one', 'thing', 'watching', 'this', 'film', 'did', 'for', 'me', 'was', 'to', 'give', 'me', 'a', 'little', 'insight', 'into', 'underclass', 'thinking', '.', 'The', 'next', 'time', 'I', 'see', 'a', 'slut', 'in', 'a', 'bar', 'who', 'looks', 'like', 'Diane', 'Lane', ',', 'I', \"'m\", 'running', 'the', 'other', 'way', '.', 'It', \"'s\", 'child', 'abuse', 'to', 'let', 'parents', 'that', 'worthless', 'raise', 'kids', '.', 'It', \"'s\", 'audience', 'abuse', 'to', 'simply', 'stick', 'Woodstock', 'and', 'the', 'moonlanding', 'into', 'a', 'flick', 'as', 'if', 'that', 'ipso', 'facto', 'means', 'the', 'film', 'portrays', '1969', '.'], 'label': 'neg'}\n" ] } ], @@ -119,7 +208,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 8, "metadata": { "scrolled": true }, @@ -134,7 +223,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 9, "metadata": {}, "outputs": [ { @@ -153,14 +242,14 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "[('the', 202389), (',', 192527), ('.', 165463), ('a', 109375), ('and', 109303), ('of', 100836), ('to', 93959), ('is', 76223), ('in', 61140), ('I', 54434), ('it', 53612), ('that', 49147), ('\"', 44429), (\"'s\", 43357), ('this', 42421), ('-', 37080), ('/>=1.8.1 in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from gensim) (4.0.1)\n", + "Requirement already satisfied: scipy>=0.18.1 in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from gensim) (1.5.2)\n", + "Requirement already satisfied: six>=1.5.0 in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from gensim) (1.15.0)\n", + "Requirement already satisfied: numpy>=1.11.3 in /home/kuba/anaconda3/envs/tau/lib/python3.8/site-packages (from gensim) (1.19.2)\n" + ] + } + ], "source": [ - "# conda install gensim" + "!pip install gensim " ] }, { @@ -74,7 +86,38 @@ "cell_type": "code", "execution_count": 4, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[===============-----------------------------------] 30.2% 38.7/128.1MB downloaded" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "IOPub message rate exceeded.\n", + "The notebook server will temporarily stop sending output\n", + "to the client in order to avoid crashing it.\n", + "To change this limit, set the config variable\n", + "`--NotebookApp.iopub_msg_rate_limit`.\n", + "\n", + "Current values:\n", + "NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)\n", + "NotebookApp.rate_limit_window=3.0 (secs)\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[==================================================] 100.0% 128.1/128.1MB downloaded\n" + ] + } + ], "source": [ "word_vectors = gensim.downloader.load(\"glove-wiki-gigaword-100\")" ] @@ -400,13 +443,6 @@ "source": [ "![image.png](linear-relationships.png)" ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": {