forked from bfijalkowski/KWT-2024
Laboratorium 1
This commit is contained in:
parent
e5ed49b0b0
commit
d32188878d
382
lab/lab_03.ipynb
382
lab/lab_03.ipynb
@ -88,7 +88,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 1,
|
"execution_count": 8,
|
||||||
"id": "8f6b6fa9",
|
"id": "8f6b6fa9",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
@ -98,7 +98,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 2,
|
"execution_count": 9,
|
||||||
"id": "loving-prince",
|
"id": "loving-prince",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
@ -122,7 +122,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 3,
|
"execution_count": 10,
|
||||||
"id": "bound-auction",
|
"id": "bound-auction",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
@ -130,6 +130,27 @@
|
|||||||
"dictionary = ['program', 'application', 'applet' 'compile']"
|
"dictionary = ['program', 'application', 'applet' 'compile']"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 14,
|
||||||
|
"id": "821ee3ee",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"\" For all Java programmers: This section explains how to compile and run a Swing application from the command line. For information on compiling and running a Swing application using NetBeans IDE, see Running Tutorial Examples in NetBeans IDE. The compilation instructions work for all Swing programs — applets, as well as applications. Here are the steps you need to follow: Install the latest release of the Java SE platform, if you haven't already done so. Create a program that uses Swing components. Compile the program. Run the program.\""
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 14,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"text"
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
"id": "other-trinidad",
|
"id": "other-trinidad",
|
||||||
@ -140,7 +161,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 4,
|
"execution_count": 11,
|
||||||
"id": "cognitive-cedar",
|
"id": "cognitive-cedar",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
@ -148,6 +169,7 @@
|
|||||||
"count_dictionary = {}\n",
|
"count_dictionary = {}\n",
|
||||||
"\n",
|
"\n",
|
||||||
"def terminology_lookup(text, tags):\n",
|
"def terminology_lookup(text, tags):\n",
|
||||||
|
" text = text.lower()\n",
|
||||||
" return [(tag, [[m.start(), m.end()] \n",
|
" return [(tag, [[m.start(), m.end()] \n",
|
||||||
" for m in re.finditer(tag, text)])\n",
|
" for m in re.finditer(tag, text)])\n",
|
||||||
" for tag in tags if tag in text]\n",
|
" for tag in tags if tag in text]\n",
|
||||||
@ -156,7 +178,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 5,
|
"execution_count": 12,
|
||||||
"id": "9fe3b66f",
|
"id": "9fe3b66f",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
@ -167,7 +189,7 @@
|
|||||||
" ('application', [[80, 91], [164, 175], [322, 333]])]"
|
" ('application', [[80, 91], [164, 175], [322, 333]])]"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"execution_count": 5,
|
"execution_count": 12,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"output_type": "execute_result"
|
"output_type": "execute_result"
|
||||||
}
|
}
|
||||||
@ -200,7 +222,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 6,
|
"execution_count": 3,
|
||||||
"id": "7b7b7569",
|
"id": "7b7b7569",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
@ -248,26 +270,181 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"!pip install spacy"
|
"!pip3 install spacy"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 8,
|
"execution_count": 4,
|
||||||
|
"id": "f4d06ed3",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"Collecting en-core-web-sm==3.7.1\n",
|
||||||
|
" Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.7.1/en_core_web_sm-3.7.1-py3-none-any.whl (12.8 MB)\n",
|
||||||
|
"\u001b[2K \u001b[38;2;114;156;31m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m12.8/12.8 MB\u001b[0m \u001b[31m27.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0mm eta \u001b[36m0:00:01\u001b[0m0:01\u001b[0m:01\u001b[0m\n",
|
||||||
|
"\u001b[?25hRequirement already satisfied: spacy<3.8.0,>=3.7.2 in /Users/potoato/.pyenv/versions/3.11.0/lib/python3.11/site-packages (from en-core-web-sm==3.7.1) (3.7.4)\n",
|
||||||
|
"Requirement already satisfied: spacy-legacy<3.1.0,>=3.0.11 in /Users/potoato/.pyenv/versions/3.11.0/lib/python3.11/site-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (3.0.12)\n",
|
||||||
|
"Requirement already satisfied: spacy-loggers<2.0.0,>=1.0.0 in /Users/potoato/.pyenv/versions/3.11.0/lib/python3.11/site-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (1.0.5)\n",
|
||||||
|
"Requirement already satisfied: murmurhash<1.1.0,>=0.28.0 in /Users/potoato/.pyenv/versions/3.11.0/lib/python3.11/site-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (1.0.10)\n",
|
||||||
|
"Requirement already satisfied: cymem<2.1.0,>=2.0.2 in /Users/potoato/.pyenv/versions/3.11.0/lib/python3.11/site-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (2.0.8)\n",
|
||||||
|
"Requirement already satisfied: preshed<3.1.0,>=3.0.2 in /Users/potoato/.pyenv/versions/3.11.0/lib/python3.11/site-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (3.0.9)\n",
|
||||||
|
"Requirement already satisfied: thinc<8.3.0,>=8.2.2 in /Users/potoato/.pyenv/versions/3.11.0/lib/python3.11/site-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (8.2.3)\n",
|
||||||
|
"Requirement already satisfied: wasabi<1.2.0,>=0.9.1 in /Users/potoato/.pyenv/versions/3.11.0/lib/python3.11/site-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (1.1.2)\n",
|
||||||
|
"Requirement already satisfied: srsly<3.0.0,>=2.4.3 in /Users/potoato/.pyenv/versions/3.11.0/lib/python3.11/site-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (2.4.8)\n",
|
||||||
|
"Requirement already satisfied: catalogue<2.1.0,>=2.0.6 in /Users/potoato/.pyenv/versions/3.11.0/lib/python3.11/site-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (2.0.10)\n",
|
||||||
|
"Requirement already satisfied: weasel<0.4.0,>=0.1.0 in /Users/potoato/.pyenv/versions/3.11.0/lib/python3.11/site-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (0.3.4)\n",
|
||||||
|
"Requirement already satisfied: typer<0.10.0,>=0.3.0 in /Users/potoato/.pyenv/versions/3.11.0/lib/python3.11/site-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (0.9.4)\n",
|
||||||
|
"Requirement already satisfied: smart-open<7.0.0,>=5.2.1 in /Users/potoato/.pyenv/versions/3.11.0/lib/python3.11/site-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (6.4.0)\n",
|
||||||
|
"Requirement already satisfied: tqdm<5.0.0,>=4.38.0 in /Users/potoato/.pyenv/versions/3.11.0/lib/python3.11/site-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (4.66.2)\n",
|
||||||
|
"Requirement already satisfied: requests<3.0.0,>=2.13.0 in /Users/potoato/.pyenv/versions/3.11.0/lib/python3.11/site-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (2.31.0)\n",
|
||||||
|
"Requirement already satisfied: pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4 in /Users/potoato/.pyenv/versions/3.11.0/lib/python3.11/site-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (2.7.0)\n",
|
||||||
|
"Requirement already satisfied: jinja2 in /Users/potoato/.pyenv/versions/3.11.0/lib/python3.11/site-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (3.1.3)\n",
|
||||||
|
"Requirement already satisfied: setuptools in /Users/potoato/.pyenv/versions/3.11.0/lib/python3.11/site-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (65.5.0)\n",
|
||||||
|
"Requirement already satisfied: packaging>=20.0 in /Users/potoato/.pyenv/versions/3.11.0/lib/python3.11/site-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (24.0)\n",
|
||||||
|
"Requirement already satisfied: langcodes<4.0.0,>=3.2.0 in /Users/potoato/.pyenv/versions/3.11.0/lib/python3.11/site-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (3.3.0)\n",
|
||||||
|
"Requirement already satisfied: numpy>=1.19.0 in /Users/potoato/.pyenv/versions/3.11.0/lib/python3.11/site-packages (from spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (1.26.4)\n",
|
||||||
|
"Requirement already satisfied: annotated-types>=0.4.0 in /Users/potoato/.pyenv/versions/3.11.0/lib/python3.11/site-packages (from pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (0.6.0)\n",
|
||||||
|
"Requirement already satisfied: pydantic-core==2.18.1 in /Users/potoato/.pyenv/versions/3.11.0/lib/python3.11/site-packages (from pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (2.18.1)\n",
|
||||||
|
"Requirement already satisfied: typing-extensions>=4.6.1 in /Users/potoato/.pyenv/versions/3.11.0/lib/python3.11/site-packages (from pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (4.11.0)\n",
|
||||||
|
"Requirement already satisfied: charset-normalizer<4,>=2 in /Users/potoato/.pyenv/versions/3.11.0/lib/python3.11/site-packages (from requests<3.0.0,>=2.13.0->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (3.3.2)\n",
|
||||||
|
"Requirement already satisfied: idna<4,>=2.5 in /Users/potoato/.pyenv/versions/3.11.0/lib/python3.11/site-packages (from requests<3.0.0,>=2.13.0->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (3.7)\n",
|
||||||
|
"Requirement already satisfied: urllib3<3,>=1.21.1 in /Users/potoato/.pyenv/versions/3.11.0/lib/python3.11/site-packages (from requests<3.0.0,>=2.13.0->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (2.2.1)\n",
|
||||||
|
"Requirement already satisfied: certifi>=2017.4.17 in /Users/potoato/.pyenv/versions/3.11.0/lib/python3.11/site-packages (from requests<3.0.0,>=2.13.0->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (2024.2.2)\n",
|
||||||
|
"Requirement already satisfied: blis<0.8.0,>=0.7.8 in /Users/potoato/.pyenv/versions/3.11.0/lib/python3.11/site-packages (from thinc<8.3.0,>=8.2.2->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (0.7.11)\n",
|
||||||
|
"Requirement already satisfied: confection<1.0.0,>=0.0.1 in /Users/potoato/.pyenv/versions/3.11.0/lib/python3.11/site-packages (from thinc<8.3.0,>=8.2.2->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (0.1.4)\n",
|
||||||
|
"Requirement already satisfied: click<9.0.0,>=7.1.1 in /Users/potoato/.pyenv/versions/3.11.0/lib/python3.11/site-packages (from typer<0.10.0,>=0.3.0->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (8.1.7)\n",
|
||||||
|
"Requirement already satisfied: cloudpathlib<0.17.0,>=0.7.0 in /Users/potoato/.pyenv/versions/3.11.0/lib/python3.11/site-packages (from weasel<0.4.0,>=0.1.0->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (0.16.0)\n",
|
||||||
|
"Requirement already satisfied: MarkupSafe>=2.0 in /Users/potoato/.pyenv/versions/3.11.0/lib/python3.11/site-packages (from jinja2->spacy<3.8.0,>=3.7.2->en-core-web-sm==3.7.1) (2.1.5)\n",
|
||||||
|
"Installing collected packages: en-core-web-sm\n",
|
||||||
|
"Successfully installed en-core-web-sm-3.7.1\n",
|
||||||
|
"\n",
|
||||||
|
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.3\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.0\u001b[0m\n",
|
||||||
|
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n",
|
||||||
|
"\u001b[38;5;2m✔ Download and installation successful\u001b[0m\n",
|
||||||
|
"You can now load the package via spacy.load('en_core_web_sm')\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"!python3 -m spacy download en_core_web_sm"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 13,
|
||||||
"id": "tribal-attention",
|
"id": "tribal-attention",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"ename": "OSError",
|
"name": "stdout",
|
||||||
"evalue": "[E050] Can't find model 'en_core_web_sm'. It doesn't seem to be a Python package or a valid path to a data directory.",
|
"output_type": "stream",
|
||||||
"output_type": "error",
|
"text": [
|
||||||
"traceback": [
|
" \n",
|
||||||
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
"for\n",
|
||||||
"\u001b[0;31mOSError\u001b[0m Traceback (most recent call last)",
|
"all\n",
|
||||||
"Cell \u001b[0;32mIn[8], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mspacy\u001b[39;00m\n\u001b[0;32m----> 2\u001b[0m nlp \u001b[38;5;241m=\u001b[39m \u001b[43mspacy\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mload\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43men_core_web_sm\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 4\u001b[0m doc \u001b[38;5;241m=\u001b[39m nlp(text)\n\u001b[1;32m 6\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m token \u001b[38;5;129;01min\u001b[39;00m doc:\n",
|
"Java\n",
|
||||||
"File \u001b[0;32m~/.pyenv/versions/3.11.0/lib/python3.11/site-packages/spacy/__init__.py:51\u001b[0m, in \u001b[0;36mload\u001b[0;34m(name, vocab, disable, enable, exclude, config)\u001b[0m\n\u001b[1;32m 27\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mload\u001b[39m(\n\u001b[1;32m 28\u001b[0m name: Union[\u001b[38;5;28mstr\u001b[39m, Path],\n\u001b[1;32m 29\u001b[0m \u001b[38;5;241m*\u001b[39m,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 34\u001b[0m config: Union[Dict[\u001b[38;5;28mstr\u001b[39m, Any], Config] \u001b[38;5;241m=\u001b[39m util\u001b[38;5;241m.\u001b[39mSimpleFrozenDict(),\n\u001b[1;32m 35\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m Language:\n\u001b[1;32m 36\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Load a spaCy model from an installed package or a local path.\u001b[39;00m\n\u001b[1;32m 37\u001b[0m \n\u001b[1;32m 38\u001b[0m \u001b[38;5;124;03m name (str): Package name or model path.\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 49\u001b[0m \u001b[38;5;124;03m RETURNS (Language): The loaded nlp object.\u001b[39;00m\n\u001b[1;32m 50\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m---> 51\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mutil\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mload_model\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 52\u001b[0m \u001b[43m \u001b[49m\u001b[43mname\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 53\u001b[0m \u001b[43m \u001b[49m\u001b[43mvocab\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mvocab\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 54\u001b[0m \u001b[43m \u001b[49m\u001b[43mdisable\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdisable\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 55\u001b[0m \u001b[43m \u001b[49m\u001b[43menable\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43menable\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 56\u001b[0m \u001b[43m \u001b[49m\u001b[43mexclude\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mexclude\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 57\u001b[0m \u001b[43m \u001b[49m\u001b[43mconfig\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mconfig\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 58\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n",
|
"programmer\n",
|
||||||
"File \u001b[0;32m~/.pyenv/versions/3.11.0/lib/python3.11/site-packages/spacy/util.py:472\u001b[0m, in \u001b[0;36mload_model\u001b[0;34m(name, vocab, disable, enable, exclude, config)\u001b[0m\n\u001b[1;32m 470\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m name \u001b[38;5;129;01min\u001b[39;00m OLD_MODEL_SHORTCUTS:\n\u001b[1;32m 471\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mIOError\u001b[39;00m(Errors\u001b[38;5;241m.\u001b[39mE941\u001b[38;5;241m.\u001b[39mformat(name\u001b[38;5;241m=\u001b[39mname, full\u001b[38;5;241m=\u001b[39mOLD_MODEL_SHORTCUTS[name])) \u001b[38;5;66;03m# type: ignore[index]\u001b[39;00m\n\u001b[0;32m--> 472\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mIOError\u001b[39;00m(Errors\u001b[38;5;241m.\u001b[39mE050\u001b[38;5;241m.\u001b[39mformat(name\u001b[38;5;241m=\u001b[39mname))\n",
|
":\n",
|
||||||
"\u001b[0;31mOSError\u001b[0m: [E050] Can't find model 'en_core_web_sm'. It doesn't seem to be a Python package or a valid path to a data directory."
|
"this\n",
|
||||||
|
"section\n",
|
||||||
|
"explain\n",
|
||||||
|
"how\n",
|
||||||
|
"to\n",
|
||||||
|
"compile\n",
|
||||||
|
"and\n",
|
||||||
|
"run\n",
|
||||||
|
"a\n",
|
||||||
|
"swing\n",
|
||||||
|
"application\n",
|
||||||
|
"from\n",
|
||||||
|
"the\n",
|
||||||
|
"command\n",
|
||||||
|
"line\n",
|
||||||
|
".\n",
|
||||||
|
"for\n",
|
||||||
|
"information\n",
|
||||||
|
"on\n",
|
||||||
|
"compile\n",
|
||||||
|
"and\n",
|
||||||
|
"run\n",
|
||||||
|
"a\n",
|
||||||
|
"swing\n",
|
||||||
|
"application\n",
|
||||||
|
"use\n",
|
||||||
|
"NetBeans\n",
|
||||||
|
"IDE\n",
|
||||||
|
",\n",
|
||||||
|
"see\n",
|
||||||
|
"run\n",
|
||||||
|
"Tutorial\n",
|
||||||
|
"Examples\n",
|
||||||
|
"in\n",
|
||||||
|
"NetBeans\n",
|
||||||
|
"IDE\n",
|
||||||
|
".\n",
|
||||||
|
"the\n",
|
||||||
|
"compilation\n",
|
||||||
|
"instruction\n",
|
||||||
|
"work\n",
|
||||||
|
"for\n",
|
||||||
|
"all\n",
|
||||||
|
"Swing\n",
|
||||||
|
"program\n",
|
||||||
|
"—\n",
|
||||||
|
"applet\n",
|
||||||
|
",\n",
|
||||||
|
"as\n",
|
||||||
|
"well\n",
|
||||||
|
"as\n",
|
||||||
|
"application\n",
|
||||||
|
".\n",
|
||||||
|
"here\n",
|
||||||
|
"be\n",
|
||||||
|
"the\n",
|
||||||
|
"step\n",
|
||||||
|
"you\n",
|
||||||
|
"need\n",
|
||||||
|
"to\n",
|
||||||
|
"follow\n",
|
||||||
|
":\n",
|
||||||
|
"install\n",
|
||||||
|
"the\n",
|
||||||
|
"late\n",
|
||||||
|
"release\n",
|
||||||
|
"of\n",
|
||||||
|
"the\n",
|
||||||
|
"Java\n",
|
||||||
|
"SE\n",
|
||||||
|
"platform\n",
|
||||||
|
",\n",
|
||||||
|
"if\n",
|
||||||
|
"you\n",
|
||||||
|
"have\n",
|
||||||
|
"not\n",
|
||||||
|
"already\n",
|
||||||
|
"do\n",
|
||||||
|
"so\n",
|
||||||
|
".\n",
|
||||||
|
"create\n",
|
||||||
|
"a\n",
|
||||||
|
"program\n",
|
||||||
|
"that\n",
|
||||||
|
"use\n",
|
||||||
|
"swing\n",
|
||||||
|
"component\n",
|
||||||
|
".\n",
|
||||||
|
"compile\n",
|
||||||
|
"the\n",
|
||||||
|
"program\n",
|
||||||
|
".\n",
|
||||||
|
"run\n",
|
||||||
|
"the\n",
|
||||||
|
"program\n",
|
||||||
|
".\n"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
@ -305,7 +482,7 @@
|
|||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"def terminology_lookup():\n",
|
"def terminology_lookup():\n",
|
||||||
" return []"
|
" return None"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -334,13 +511,77 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": 17,
|
||||||
"id": "superb-butterfly",
|
"id": "superb-butterfly",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"def get_nouns(text):\n",
|
"def get_nouns(text):\n",
|
||||||
" return []"
|
" doc = nlp(text)\n",
|
||||||
|
" return [token.text for token in doc if token.pos_ == \"NOUN\"]"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 23,
|
||||||
|
"id": "5e2be152",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"'ADP'"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 23,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"doc[1].pos_"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 18,
|
||||||
|
"id": "11430dc5",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"['programmers',\n",
|
||||||
|
" 'section',\n",
|
||||||
|
" 'Swing',\n",
|
||||||
|
" 'application',\n",
|
||||||
|
" 'command',\n",
|
||||||
|
" 'line',\n",
|
||||||
|
" 'information',\n",
|
||||||
|
" 'Swing',\n",
|
||||||
|
" 'application',\n",
|
||||||
|
" 'compilation',\n",
|
||||||
|
" 'instructions',\n",
|
||||||
|
" 'programs',\n",
|
||||||
|
" 'applets',\n",
|
||||||
|
" 'applications',\n",
|
||||||
|
" 'steps',\n",
|
||||||
|
" 'release',\n",
|
||||||
|
" 'platform',\n",
|
||||||
|
" 'program',\n",
|
||||||
|
" 'Swing',\n",
|
||||||
|
" 'components',\n",
|
||||||
|
" 'program',\n",
|
||||||
|
" 'program']"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 18,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"get_nouns(text)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -371,13 +612,55 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": 24,
|
||||||
"id": "eight-redhead",
|
"id": "eight-redhead",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"def extract_terms(text):\n",
|
"def extract_terms(text):\n",
|
||||||
" return []"
|
" doc = nlp(text)\n",
|
||||||
|
" noun_counts = {}\n",
|
||||||
|
" for token in doc:\n",
|
||||||
|
" if token.pos_ == \"NOUN\":\n",
|
||||||
|
" noun_counts[token.text] = noun_counts.get(token.text, 0) + 1\n",
|
||||||
|
" return noun_counts"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 25,
|
||||||
|
"id": "c7d46f26",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"{'programmers': 1,\n",
|
||||||
|
" 'section': 1,\n",
|
||||||
|
" 'Swing': 3,\n",
|
||||||
|
" 'application': 2,\n",
|
||||||
|
" 'command': 1,\n",
|
||||||
|
" 'line': 1,\n",
|
||||||
|
" 'information': 1,\n",
|
||||||
|
" 'compilation': 1,\n",
|
||||||
|
" 'instructions': 1,\n",
|
||||||
|
" 'programs': 1,\n",
|
||||||
|
" 'applets': 1,\n",
|
||||||
|
" 'applications': 1,\n",
|
||||||
|
" 'steps': 1,\n",
|
||||||
|
" 'release': 1,\n",
|
||||||
|
" 'platform': 1,\n",
|
||||||
|
" 'program': 3,\n",
|
||||||
|
" 'components': 1}"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 25,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"extract_terms(text)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -390,14 +673,61 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": 35,
|
||||||
"id": "monetary-mambo",
|
"id": "monetary-mambo",
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"def extract_terms(text):\n",
|
"def extract_terms(text):\n",
|
||||||
" return []"
|
" doc = nlp(text.lower())\n",
|
||||||
|
" noun_counts = {}\n",
|
||||||
|
" for token in doc:\n",
|
||||||
|
" if token.pos_ in [\"NOUN\", \"VERB\", \"ADJ\"]:\n",
|
||||||
|
" noun_counts[token.text] = noun_counts.get(token.text, 0) + 1\n",
|
||||||
|
" return noun_counts"
|
||||||
]
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 34,
|
||||||
|
"id": "4259ee3f",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"{'explains': 1,\n",
|
||||||
|
" 'compile': 2,\n",
|
||||||
|
" 'run': 2,\n",
|
||||||
|
" 'compiling': 1,\n",
|
||||||
|
" 'running': 2,\n",
|
||||||
|
" 'using': 1,\n",
|
||||||
|
" 'see': 1,\n",
|
||||||
|
" 'work': 1,\n",
|
||||||
|
" 'need': 1,\n",
|
||||||
|
" 'follow': 1,\n",
|
||||||
|
" 'install': 1,\n",
|
||||||
|
" 'done': 1,\n",
|
||||||
|
" 'create': 1,\n",
|
||||||
|
" 'uses': 1}"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 34,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"extract_terms(text)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"id": "8d239c20",
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": []
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
|
Loading…
Reference in New Issue
Block a user