This commit is contained in:
potato 2024-04-15 21:46:15 +02:00
parent 71ca3b66ed
commit e5ed49b0b0
3 changed files with 286 additions and 163 deletions

View File

@ -82,7 +82,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 3, "execution_count": 5,
"id": "compact-trinidad", "id": "compact-trinidad",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
@ -92,7 +92,7 @@
"['Press the ENTER button']" "['Press the ENTER button']"
] ]
}, },
"execution_count": 3, "execution_count": 5,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -119,7 +119,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 4, "execution_count": 6,
"id": "exposed-daniel", "id": "exposed-daniel",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
@ -139,7 +139,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 5, "execution_count": 7,
"id": "serial-velvet", "id": "serial-velvet",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
@ -149,7 +149,7 @@
"['Press the ENTER button', 'Press the ENTER key']" "['Press the ENTER button', 'Press the ENTER key']"
] ]
}, },
"execution_count": 5, "execution_count": 7,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -176,17 +176,17 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 6, "execution_count": 11,
"id": "every-gibson", "id": "every-gibson",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
"data": { "data": {
"text/plain": [ "text/plain": [
"[]" "['Press the ENTER button', 'Press the ENTER key']"
] ]
}, },
"execution_count": 6, "execution_count": 11,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -213,13 +213,13 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 7, "execution_count": 9,
"id": "protected-rings", "id": "protected-rings",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"def tm_lookup(sentence):\n", "def tm_lookup(sentence):\n",
" return ''" " return [entry[1] for entry in translation_memory if entry[0].lower() == sentence.lower()]"
] ]
}, },
{ {
@ -232,17 +232,17 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 18, "execution_count": 13,
"id": "severe-alloy", "id": "severe-alloy",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
"data": { "data": {
"text/plain": [ "text/plain": [
"''" "['Press the ENTER button', 'Press the ENTER key']"
] ]
}, },
"execution_count": 18, "execution_count": 13,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -261,13 +261,20 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 11, "execution_count": 12,
"id": "structural-diesel", "id": "structural-diesel",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"import re\n",
"\n",
"def remove_punctuation(sentence):\n",
" return re.sub(r'[^\\w\\s]', '', sentence)\n",
"\n",
"\n",
"def tm_lookup(sentence):\n", "def tm_lookup(sentence):\n",
" return ''" " return [entry[1] for entry in translation_memory\n",
" if entry[0].lower() == remove_punctuation(sentence.lower())]"
] ]
}, },
{ {
@ -280,17 +287,17 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 12, "execution_count": 27,
"id": "brief-senegal", "id": "brief-senegal",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
"data": { "data": {
"text/plain": [ "text/plain": [
"''" "['System restart required']"
] ]
}, },
"execution_count": 12, "execution_count": 27,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -317,13 +324,26 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 14, "execution_count": 26,
"id": "mathematical-customs", "id": "mathematical-customs",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"import re\n",
"\n",
"def remove_punctuation(sentence):\n",
" return re.sub(r'[^\\w\\s]', '', sentence)\n",
"\n",
"\n",
"def tm_lookup(sentence):\n", "def tm_lookup(sentence):\n",
" return ''" " values = []\n",
" for entry in translation_memory:\n",
" key = set(entry[0].lower().split())\n",
" mod_sentence = set(remove_punctuation(sentence.lower()).split())\n",
" remainder = list(key - mod_sentence)\n",
" if len(remainder) <= 1:\n",
" values.append(entry[1])\n",
" return values"
] ]
}, },
{ {
@ -344,7 +364,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 15, "execution_count": 42,
"id": "humanitarian-wrong", "id": "humanitarian-wrong",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
@ -362,7 +382,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 16, "execution_count": 43,
"id": "located-perception", "id": "located-perception",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
@ -374,17 +394,31 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 17, "execution_count": 44,
"id": "3437b88b",
"metadata": {},
"outputs": [],
"source": [
"glossary = [('komputer', 'computer'), ('przycisk', 'button'), ('drukarka', 'printer')]\n",
"\n",
"def glossary_lookup(sentence):\n",
" sentence_words = sentence.split()\n",
" return [entry for entry in glossary if entry[0] in sentence_words]"
]
},
{
"cell_type": "code",
"execution_count": 51,
"id": "advised-casting", "id": "advised-casting",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
"data": { "data": {
"text/plain": [ "text/plain": [
"[('przycisk', 'button'), ('drukarka', 'printer')]" "[('drukarka', 'printer'), ('przycisk', 'button')]"
] ]
}, },
"execution_count": 17, "execution_count": 51,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -406,7 +440,7 @@
"id": "defensive-fifteen", "id": "defensive-fifteen",
"metadata": {}, "metadata": {},
"source": [ "source": [
"Odpowiedź:" "Odpowiedź: Operacja split w pierwszej linijce funkcji moze zostac uznana za stala. Biorac pod uwage ze lista krotek zawierajaca glosariusz musi byc przejrzana za kazdym razem cala, jak i caly string skomplikowaność obliczen bedzie wynosic O(n*m)."
] ]
}, },
{ {
@ -419,13 +453,14 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 19, "execution_count": 48,
"id": "original-tunisia", "id": "original-tunisia",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"def glossary_lookup(sentence):\n", "def glossary_lookup(sentence):\n",
" return ''" " sentence_words = [ element.lower() for element in sentence.split()]\n",
" return [entry for entry in glossary if entry[0].lower() in sentence_words]"
] ]
}, },
{ {
@ -438,13 +473,27 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 20, "execution_count": 49,
"id": "f69a873a",
"metadata": {},
"outputs": [],
"source": [
"glossary = [('komputer', 'computer'), ('przycisk', 'button'), ('drukarka', 'printer')]\n",
"glossary = { k:v for k,v in glossary}\n",
"translated_words = list(glossary.keys())"
]
},
{
"cell_type": "code",
"execution_count": 50,
"id": "adolescent-semiconductor", "id": "adolescent-semiconductor",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"def glossary_lookup(sentence):\n", "def glossary_lookup(sentence):\n",
" return ''" " words = sentence.split()\n",
" \n",
" return [(word, glossary[word]) for word in words if word in translated_words]"
] ]
} }
], ],
@ -467,7 +516,7 @@
"name": "python", "name": "python",
"nbconvert_exporter": "python", "nbconvert_exporter": "python",
"pygments_lexer": "ipython3", "pygments_lexer": "ipython3",
"version": "3.8.10" "version": "3.11.0"
}, },
"subtitle": "1. Podstawowe techniki wspomagania tłumaczenia", "subtitle": "1. Podstawowe techniki wspomagania tłumaczenia",
"title": "Komputerowe wspomaganie tłumaczenia", "title": "Komputerowe wspomaganie tłumaczenia",

View File

@ -67,7 +67,9 @@
" ('Sprawdź ustawienia sieciowe', 'Check the network settings'),\n", " ('Sprawdź ustawienia sieciowe', 'Check the network settings'),\n",
" ('Drukarka jest wyłączona', 'The printer is switched off'),\n", " ('Drukarka jest wyłączona', 'The printer is switched off'),\n",
" ('Wymagane ponowne uruchomienie komputera', 'System restart required')\n", " ('Wymagane ponowne uruchomienie komputera', 'System restart required')\n",
" ]" " ]\n",
"\n",
"translation_memory = { k:v for k,v in translation_memory}"
] ]
}, },
{ {
@ -86,7 +88,11 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"def ice_lookup(sentence, prev_sentence, next_sentence):\n", "def ice_lookup(sentence, prev_sentence, next_sentence):\n",
" return []" " s_t = translation_memory.get(sentence, False)\n",
" p_s = translation_memory.get(prev_sentence, False)\n",
" n_s = translation_memory.get(next_sentence, False)\n",
" if s_t and p_s and n_s:\n",
" return s_t "
] ]
}, },
{ {
@ -141,7 +147,7 @@
"id": "graduate-theorem", "id": "graduate-theorem",
"metadata": {}, "metadata": {},
"source": [ "source": [
"Odpowiedź:" "Odpowiedź: Nie jest to poprawna funkcja dystansu fuzzy match. Warunki 1,3,4 sa spelnione. 2 warunek jest nie spełniony poniewaz odleglosc pomiedzy dwoma zdaniami/slowami o tej samej dlugosci ale innych znakach bedzie rowna zero."
] ]
}, },
{ {
@ -179,7 +185,7 @@
"id": "metallic-leave", "id": "metallic-leave",
"metadata": {}, "metadata": {},
"source": [ "source": [
"Odpowiedź:" "Odpowiedź: Tak, jest to poprawna funkcja dystansu. Wszystkie warunki sa spelnione."
] ]
}, },
{ {
@ -201,12 +207,54 @@
"### Ćwiczenie 5: Czy dystans Levenshteina jest poprawną funkcją dystansu? Uzasadnij krótko swoją odpowiedź sprawdzając każdy z warunków." "### Ćwiczenie 5: Czy dystans Levenshteina jest poprawną funkcją dystansu? Uzasadnij krótko swoją odpowiedź sprawdzając każdy z warunków."
] ]
}, },
{
"cell_type": "code",
"execution_count": 22,
"id": "79e4deef",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"True"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from Levenshtein import distance as levenshtein_distance\n",
"\n",
"# warunek 1\n",
"levenshtein_distance(\"smthn\", \"nothin\")\n",
"# Output: 3\n",
"# zawsze nieujemne\n",
"\n",
"# warunek 2\n",
"levenshtein_distance(\"and\", \"and\")\n",
"# Output: 0\n",
"# dwa takie same zdania ktore sa w odleglosci 0 od siebie\n",
"\n",
"# warunek 3\n",
"levenshtein_distance(\"zombie\", \"mombie\") == levenshtein_distance(\"mombie\", \"zombie\")\n",
"# Output: True\n",
"# zamiennosc zdan\n",
"\n",
"# warunek 4\n",
"x,y,z = 'zombie', 'glombie', 'mombie'\n",
"levenshtein_distance(x,y) + levenshtein_distance(y,z) >= levenshtein_distance(x,z)\n",
"#Output: True\n",
"# miara każdej odleglosci musi być mniejsza lub równa sumie miar dwóch pozostałych"
]
},
{ {
"cell_type": "markdown", "cell_type": "markdown",
"id": "bibliographic-stopping", "id": "bibliographic-stopping",
"metadata": {}, "metadata": {},
"source": [ "source": [
"Odpowiedź:" "Odpowiedź: Tak jest poprawną funkcją dystansu."
] ]
}, },
{ {
@ -223,7 +271,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 5, "execution_count": 7,
"id": "secondary-wrist", "id": "secondary-wrist",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
@ -233,7 +281,7 @@
"2" "2"
] ]
}, },
"execution_count": 5, "execution_count": 7,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -254,7 +302,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 6, "execution_count": 8,
"id": "associate-tuner", "id": "associate-tuner",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
@ -273,7 +321,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 7, "execution_count": 9,
"id": "focal-pathology", "id": "focal-pathology",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
@ -283,7 +331,7 @@
"0.9166666666666666" "0.9166666666666666"
] ]
}, },
"execution_count": 7, "execution_count": 9,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -294,7 +342,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 8, "execution_count": 10,
"id": "roman-ceiling", "id": "roman-ceiling",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
@ -304,7 +352,7 @@
"0.9428571428571428" "0.9428571428571428"
] ]
}, },
"execution_count": 8, "execution_count": 10,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -315,7 +363,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 9, "execution_count": 11,
"id": "invisible-cambodia", "id": "invisible-cambodia",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
@ -325,7 +373,7 @@
"0.631578947368421" "0.631578947368421"
] ]
}, },
"execution_count": 9, "execution_count": 11,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -344,14 +392,43 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 10, "execution_count": 12,
"id": "genetic-cradle", "id": "genetic-cradle",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"def fuzzy_lookup(sentence, threshold):\n", "def fuzzy_lookup(sentence, threshold):\n",
" return []" " return [ v for k,v in translation_memory.items() if levenshtein_similarity(sentence, k) > threshold ]"
] ]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "2e72b54a",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['Press the ENTER button', 'System restart required']"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"fuzzy_lookup('Spróbuj wyłączyć i włączyć komputer', 0.25)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "e1f15316",
"metadata": {},
"outputs": [],
"source": []
} }
], ],
"metadata": { "metadata": {
@ -373,7 +450,7 @@
"name": "python", "name": "python",
"nbconvert_exporter": "python", "nbconvert_exporter": "python",
"pygments_lexer": "ipython3", "pygments_lexer": "ipython3",
"version": "3.8.10" "version": "3.11.0"
}, },
"subtitle": "2. Zaawansowane użycie pamięci tłumaczeń", "subtitle": "2. Zaawansowane użycie pamięci tłumaczeń",
"title": "Komputerowe wspomaganie tłumaczenia", "title": "Komputerowe wspomaganie tłumaczenia",

View File

@ -63,7 +63,9 @@
"id": "diverse-sunglasses", "id": "diverse-sunglasses",
"metadata": {}, "metadata": {},
"source": [ "source": [
"Odpowiedź:" "Odpowiedź: Narzędzie DeepL: https://www.deepl.com/translator\n",
"\n",
"przetłumaczyło tekst \"prowadnice szaf metalowych\" na \"metal cabinet slides\""
] ]
}, },
{ {
@ -87,6 +89,16 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 1, "execution_count": 1,
"id": "8f6b6fa9",
"metadata": {},
"outputs": [],
"source": [
"import re"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "loving-prince", "id": "loving-prince",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
@ -110,7 +122,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 2, "execution_count": 3,
"id": "bound-auction", "id": "bound-auction",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
@ -128,13 +140,40 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 3, "execution_count": 4,
"id": "cognitive-cedar", "id": "cognitive-cedar",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"def terminology_lookup():\n", "count_dictionary = {}\n",
" return []" "\n",
"def terminology_lookup(text, tags):\n",
" return [(tag, [[m.start(), m.end()] \n",
" for m in re.finditer(tag, text)])\n",
" for tag in tags if tag in text]\n",
" "
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "9fe3b66f",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[('program', [[14, 21], [291, 298], [468, 475], [516, 523], [533, 540]]),\n",
" ('application', [[80, 91], [164, 175], [322, 333]])]"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"terminology_lookup(text, dictionary)"
] ]
}, },
{ {
@ -161,116 +200,74 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 4, "execution_count": 6,
"id": "tribal-attention", "id": "7b7b7569",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
"name": "stdout", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"Requirement already satisfied: spacy in /Users/potoato/.pyenv/versions/3.11.0/lib/python3.11/site-packages (3.7.4)\n",
"Requirement already satisfied: spacy-legacy<3.1.0,>=3.0.11 in /Users/potoato/.pyenv/versions/3.11.0/lib/python3.11/site-packages (from spacy) (3.0.12)\n",
"Requirement already satisfied: spacy-loggers<2.0.0,>=1.0.0 in /Users/potoato/.pyenv/versions/3.11.0/lib/python3.11/site-packages (from spacy) (1.0.5)\n",
"Requirement already satisfied: murmurhash<1.1.0,>=0.28.0 in /Users/potoato/.pyenv/versions/3.11.0/lib/python3.11/site-packages (from spacy) (1.0.10)\n",
"Requirement already satisfied: cymem<2.1.0,>=2.0.2 in /Users/potoato/.pyenv/versions/3.11.0/lib/python3.11/site-packages (from spacy) (2.0.8)\n",
"Requirement already satisfied: preshed<3.1.0,>=3.0.2 in /Users/potoato/.pyenv/versions/3.11.0/lib/python3.11/site-packages (from spacy) (3.0.9)\n",
"Requirement already satisfied: thinc<8.3.0,>=8.2.2 in /Users/potoato/.pyenv/versions/3.11.0/lib/python3.11/site-packages (from spacy) (8.2.3)\n",
"Requirement already satisfied: wasabi<1.2.0,>=0.9.1 in /Users/potoato/.pyenv/versions/3.11.0/lib/python3.11/site-packages (from spacy) (1.1.2)\n",
"Requirement already satisfied: srsly<3.0.0,>=2.4.3 in /Users/potoato/.pyenv/versions/3.11.0/lib/python3.11/site-packages (from spacy) (2.4.8)\n",
"Requirement already satisfied: catalogue<2.1.0,>=2.0.6 in /Users/potoato/.pyenv/versions/3.11.0/lib/python3.11/site-packages (from spacy) (2.0.10)\n",
"Requirement already satisfied: weasel<0.4.0,>=0.1.0 in /Users/potoato/.pyenv/versions/3.11.0/lib/python3.11/site-packages (from spacy) (0.3.4)\n",
"Requirement already satisfied: typer<0.10.0,>=0.3.0 in /Users/potoato/.pyenv/versions/3.11.0/lib/python3.11/site-packages (from spacy) (0.9.4)\n",
"Requirement already satisfied: smart-open<7.0.0,>=5.2.1 in /Users/potoato/.pyenv/versions/3.11.0/lib/python3.11/site-packages (from spacy) (6.4.0)\n",
"Requirement already satisfied: tqdm<5.0.0,>=4.38.0 in /Users/potoato/.pyenv/versions/3.11.0/lib/python3.11/site-packages (from spacy) (4.66.2)\n",
"Requirement already satisfied: requests<3.0.0,>=2.13.0 in /Users/potoato/.pyenv/versions/3.11.0/lib/python3.11/site-packages (from spacy) (2.31.0)\n",
"Requirement already satisfied: pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4 in /Users/potoato/.pyenv/versions/3.11.0/lib/python3.11/site-packages (from spacy) (2.7.0)\n",
"Requirement already satisfied: jinja2 in /Users/potoato/.pyenv/versions/3.11.0/lib/python3.11/site-packages (from spacy) (3.1.3)\n",
"Requirement already satisfied: setuptools in /Users/potoato/.pyenv/versions/3.11.0/lib/python3.11/site-packages (from spacy) (65.5.0)\n",
"Requirement already satisfied: packaging>=20.0 in /Users/potoato/.pyenv/versions/3.11.0/lib/python3.11/site-packages (from spacy) (24.0)\n",
"Requirement already satisfied: langcodes<4.0.0,>=3.2.0 in /Users/potoato/.pyenv/versions/3.11.0/lib/python3.11/site-packages (from spacy) (3.3.0)\n",
"Requirement already satisfied: numpy>=1.19.0 in /Users/potoato/.pyenv/versions/3.11.0/lib/python3.11/site-packages (from spacy) (1.26.4)\n",
"Requirement already satisfied: annotated-types>=0.4.0 in /Users/potoato/.pyenv/versions/3.11.0/lib/python3.11/site-packages (from pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4->spacy) (0.6.0)\n",
"Requirement already satisfied: pydantic-core==2.18.1 in /Users/potoato/.pyenv/versions/3.11.0/lib/python3.11/site-packages (from pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4->spacy) (2.18.1)\n",
"Requirement already satisfied: typing-extensions>=4.6.1 in /Users/potoato/.pyenv/versions/3.11.0/lib/python3.11/site-packages (from pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4->spacy) (4.11.0)\n",
"Requirement already satisfied: charset-normalizer<4,>=2 in /Users/potoato/.pyenv/versions/3.11.0/lib/python3.11/site-packages (from requests<3.0.0,>=2.13.0->spacy) (3.3.2)\n",
"Requirement already satisfied: idna<4,>=2.5 in /Users/potoato/.pyenv/versions/3.11.0/lib/python3.11/site-packages (from requests<3.0.0,>=2.13.0->spacy) (3.7)\n",
"Requirement already satisfied: urllib3<3,>=1.21.1 in /Users/potoato/.pyenv/versions/3.11.0/lib/python3.11/site-packages (from requests<3.0.0,>=2.13.0->spacy) (2.2.1)\n",
"Requirement already satisfied: certifi>=2017.4.17 in /Users/potoato/.pyenv/versions/3.11.0/lib/python3.11/site-packages (from requests<3.0.0,>=2.13.0->spacy) (2024.2.2)\n",
"Requirement already satisfied: blis<0.8.0,>=0.7.8 in /Users/potoato/.pyenv/versions/3.11.0/lib/python3.11/site-packages (from thinc<8.3.0,>=8.2.2->spacy) (0.7.11)\n",
"Requirement already satisfied: confection<1.0.0,>=0.0.1 in /Users/potoato/.pyenv/versions/3.11.0/lib/python3.11/site-packages (from thinc<8.3.0,>=8.2.2->spacy) (0.1.4)\n",
"Requirement already satisfied: click<9.0.0,>=7.1.1 in /Users/potoato/.pyenv/versions/3.11.0/lib/python3.11/site-packages (from typer<0.10.0,>=0.3.0->spacy) (8.1.7)\n",
"Requirement already satisfied: cloudpathlib<0.17.0,>=0.7.0 in /Users/potoato/.pyenv/versions/3.11.0/lib/python3.11/site-packages (from weasel<0.4.0,>=0.1.0->spacy) (0.16.0)\n",
"Requirement already satisfied: MarkupSafe>=2.0 in /Users/potoato/.pyenv/versions/3.11.0/lib/python3.11/site-packages (from jinja2->spacy) (2.1.5)\n",
"\n", "\n",
"for\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.3\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.0\u001b[0m\n",
"all\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n"
"Java\n", ]
"programmer\n", }
":\n", ],
"this\n", "source": [
"section\n", "!pip install spacy"
"explain\n", ]
"how\n", },
"to\n", {
"compile\n", "cell_type": "code",
"and\n", "execution_count": 8,
"run\n", "id": "tribal-attention",
"a\n", "metadata": {},
"swing\n", "outputs": [
"application\n", {
"from\n", "ename": "OSError",
"the\n", "evalue": "[E050] Can't find model 'en_core_web_sm'. It doesn't seem to be a Python package or a valid path to a data directory.",
"command\n", "output_type": "error",
"line\n", "traceback": [
".\n", "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"for\n", "\u001b[0;31mOSError\u001b[0m Traceback (most recent call last)",
"information\n", "Cell \u001b[0;32mIn[8], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mspacy\u001b[39;00m\n\u001b[0;32m----> 2\u001b[0m nlp \u001b[38;5;241m=\u001b[39m \u001b[43mspacy\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mload\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43men_core_web_sm\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 4\u001b[0m doc \u001b[38;5;241m=\u001b[39m nlp(text)\n\u001b[1;32m 6\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m token \u001b[38;5;129;01min\u001b[39;00m doc:\n",
"on\n", "File \u001b[0;32m~/.pyenv/versions/3.11.0/lib/python3.11/site-packages/spacy/__init__.py:51\u001b[0m, in \u001b[0;36mload\u001b[0;34m(name, vocab, disable, enable, exclude, config)\u001b[0m\n\u001b[1;32m 27\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mload\u001b[39m(\n\u001b[1;32m 28\u001b[0m name: Union[\u001b[38;5;28mstr\u001b[39m, Path],\n\u001b[1;32m 29\u001b[0m \u001b[38;5;241m*\u001b[39m,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 34\u001b[0m config: Union[Dict[\u001b[38;5;28mstr\u001b[39m, Any], Config] \u001b[38;5;241m=\u001b[39m util\u001b[38;5;241m.\u001b[39mSimpleFrozenDict(),\n\u001b[1;32m 35\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m Language:\n\u001b[1;32m 36\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Load a spaCy model from an installed package or a local path.\u001b[39;00m\n\u001b[1;32m 37\u001b[0m \n\u001b[1;32m 38\u001b[0m \u001b[38;5;124;03m name (str): Package name or model path.\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 49\u001b[0m \u001b[38;5;124;03m RETURNS (Language): The loaded nlp object.\u001b[39;00m\n\u001b[1;32m 50\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m---> 51\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mutil\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mload_model\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 52\u001b[0m \u001b[43m \u001b[49m\u001b[43mname\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 53\u001b[0m \u001b[43m \u001b[49m\u001b[43mvocab\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mvocab\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 54\u001b[0m \u001b[43m \u001b[49m\u001b[43mdisable\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdisable\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 55\u001b[0m \u001b[43m \u001b[49m\u001b[43menable\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43menable\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 56\u001b[0m \u001b[43m \u001b[49m\u001b[43mexclude\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mexclude\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 57\u001b[0m \u001b[43m \u001b[49m\u001b[43mconfig\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mconfig\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 58\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n",
"compile\n", "File \u001b[0;32m~/.pyenv/versions/3.11.0/lib/python3.11/site-packages/spacy/util.py:472\u001b[0m, in \u001b[0;36mload_model\u001b[0;34m(name, vocab, disable, enable, exclude, config)\u001b[0m\n\u001b[1;32m 470\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m name \u001b[38;5;129;01min\u001b[39;00m OLD_MODEL_SHORTCUTS:\n\u001b[1;32m 471\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mIOError\u001b[39;00m(Errors\u001b[38;5;241m.\u001b[39mE941\u001b[38;5;241m.\u001b[39mformat(name\u001b[38;5;241m=\u001b[39mname, full\u001b[38;5;241m=\u001b[39mOLD_MODEL_SHORTCUTS[name])) \u001b[38;5;66;03m# type: ignore[index]\u001b[39;00m\n\u001b[0;32m--> 472\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mIOError\u001b[39;00m(Errors\u001b[38;5;241m.\u001b[39mE050\u001b[38;5;241m.\u001b[39mformat(name\u001b[38;5;241m=\u001b[39mname))\n",
"and\n", "\u001b[0;31mOSError\u001b[0m: [E050] Can't find model 'en_core_web_sm'. It doesn't seem to be a Python package or a valid path to a data directory."
"run\n",
"a\n",
"swing\n",
"application\n",
"use\n",
"NetBeans\n",
"IDE\n",
",\n",
"see\n",
"Running\n",
"Tutorial\n",
"Examples\n",
"in\n",
"NetBeans\n",
"IDE\n",
".\n",
"the\n",
"compilation\n",
"instruction\n",
"work\n",
"for\n",
"all\n",
"swing\n",
"program\n",
"—\n",
"applet\n",
",\n",
"as\n",
"well\n",
"as\n",
"application\n",
".\n",
"here\n",
"be\n",
"the\n",
"step\n",
"-PRON-\n",
"need\n",
"to\n",
"follow\n",
":\n",
"install\n",
"the\n",
"late\n",
"release\n",
"of\n",
"the\n",
"Java\n",
"SE\n",
"platform\n",
",\n",
"if\n",
"-PRON-\n",
"have\n",
"not\n",
"already\n",
"do\n",
"so\n",
".\n",
"create\n",
"a\n",
"program\n",
"that\n",
"use\n",
"Swing\n",
"component\n",
".\n",
"compile\n",
"the\n",
"program\n",
".\n",
"run\n",
"the\n",
"program\n",
".\n"
] ]
} }
], ],
@ -302,7 +299,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 5, "execution_count": null,
"id": "surgical-demonstration", "id": "surgical-demonstration",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
@ -337,7 +334,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 6, "execution_count": null,
"id": "superb-butterfly", "id": "superb-butterfly",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
@ -356,7 +353,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 7, "execution_count": null,
"id": "acting-tolerance", "id": "acting-tolerance",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
@ -374,7 +371,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 8, "execution_count": null,
"id": "eight-redhead", "id": "eight-redhead",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
@ -393,7 +390,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 9, "execution_count": null,
"id": "monetary-mambo", "id": "monetary-mambo",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
@ -422,7 +419,7 @@
"name": "python", "name": "python",
"nbconvert_exporter": "python", "nbconvert_exporter": "python",
"pygments_lexer": "ipython3", "pygments_lexer": "ipython3",
"version": "3.8.10" "version": "3.11.0"
}, },
"subtitle": "3. Terminologia", "subtitle": "3. Terminologia",
"title": "Komputerowe wspomaganie tłumaczenia", "title": "Komputerowe wspomaganie tłumaczenia",