forked from bfijalkowski/KWT-2024
Labs
This commit is contained in:
parent
71ca3b66ed
commit
e5ed49b0b0
111
lab/lab_01.ipynb
111
lab/lab_01.ipynb
@ -82,7 +82,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 5,
|
||||
"id": "compact-trinidad",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@ -92,7 +92,7 @@
|
||||
"['Press the ENTER button']"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@ -119,7 +119,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"execution_count": 6,
|
||||
"id": "exposed-daniel",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@ -139,7 +139,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": 7,
|
||||
"id": "serial-velvet",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@ -149,7 +149,7 @@
|
||||
"['Press the ENTER button', 'Press the ENTER key']"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@ -176,17 +176,17 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": 11,
|
||||
"id": "every-gibson",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[]"
|
||||
"['Press the ENTER button', 'Press the ENTER key']"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@ -213,13 +213,13 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": 9,
|
||||
"id": "protected-rings",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def tm_lookup(sentence):\n",
|
||||
" return ''"
|
||||
" return [entry[1] for entry in translation_memory if entry[0].lower() == sentence.lower()]"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -232,17 +232,17 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 18,
|
||||
"execution_count": 13,
|
||||
"id": "severe-alloy",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"''"
|
||||
"['Press the ENTER button', 'Press the ENTER key']"
|
||||
]
|
||||
},
|
||||
"execution_count": 18,
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@ -261,13 +261,20 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"execution_count": 12,
|
||||
"id": "structural-diesel",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import re\n",
|
||||
"\n",
|
||||
"def remove_punctuation(sentence):\n",
|
||||
" return re.sub(r'[^\\w\\s]', '', sentence)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def tm_lookup(sentence):\n",
|
||||
" return ''"
|
||||
" return [entry[1] for entry in translation_memory\n",
|
||||
" if entry[0].lower() == remove_punctuation(sentence.lower())]"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -280,17 +287,17 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"execution_count": 27,
|
||||
"id": "brief-senegal",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"''"
|
||||
"['System restart required']"
|
||||
]
|
||||
},
|
||||
"execution_count": 12,
|
||||
"execution_count": 27,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@ -317,13 +324,26 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 14,
|
||||
"execution_count": 26,
|
||||
"id": "mathematical-customs",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import re\n",
|
||||
"\n",
|
||||
"def remove_punctuation(sentence):\n",
|
||||
" return re.sub(r'[^\\w\\s]', '', sentence)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"def tm_lookup(sentence):\n",
|
||||
" return ''"
|
||||
" values = []\n",
|
||||
" for entry in translation_memory:\n",
|
||||
" key = set(entry[0].lower().split())\n",
|
||||
" mod_sentence = set(remove_punctuation(sentence.lower()).split())\n",
|
||||
" remainder = list(key - mod_sentence)\n",
|
||||
" if len(remainder) <= 1:\n",
|
||||
" values.append(entry[1])\n",
|
||||
" return values"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -344,7 +364,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 15,
|
||||
"execution_count": 42,
|
||||
"id": "humanitarian-wrong",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@ -362,7 +382,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 16,
|
||||
"execution_count": 43,
|
||||
"id": "located-perception",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@ -374,17 +394,31 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"execution_count": 44,
|
||||
"id": "3437b88b",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"glossary = [('komputer', 'computer'), ('przycisk', 'button'), ('drukarka', 'printer')]\n",
|
||||
"\n",
|
||||
"def glossary_lookup(sentence):\n",
|
||||
" sentence_words = sentence.split()\n",
|
||||
" return [entry for entry in glossary if entry[0] in sentence_words]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 51,
|
||||
"id": "advised-casting",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[('przycisk', 'button'), ('drukarka', 'printer')]"
|
||||
"[('drukarka', 'printer'), ('przycisk', 'button')]"
|
||||
]
|
||||
},
|
||||
"execution_count": 17,
|
||||
"execution_count": 51,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@ -406,7 +440,7 @@
|
||||
"id": "defensive-fifteen",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Odpowiedź:"
|
||||
"Odpowiedź: Operacja split w pierwszej linijce funkcji moze zostac uznana za stala. Biorac pod uwage ze lista krotek zawierajaca glosariusz musi byc przejrzana za kazdym razem cala, jak i caly string – skomplikowaność obliczen bedzie wynosic O(n*m)."
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -419,13 +453,14 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 19,
|
||||
"execution_count": 48,
|
||||
"id": "original-tunisia",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def glossary_lookup(sentence):\n",
|
||||
" return ''"
|
||||
" sentence_words = [ element.lower() for element in sentence.split()]\n",
|
||||
" return [entry for entry in glossary if entry[0].lower() in sentence_words]"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -438,13 +473,27 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 20,
|
||||
"execution_count": 49,
|
||||
"id": "f69a873a",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"glossary = [('komputer', 'computer'), ('przycisk', 'button'), ('drukarka', 'printer')]\n",
|
||||
"glossary = { k:v for k,v in glossary}\n",
|
||||
"translated_words = list(glossary.keys())"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 50,
|
||||
"id": "adolescent-semiconductor",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def glossary_lookup(sentence):\n",
|
||||
" return ''"
|
||||
" words = sentence.split()\n",
|
||||
" \n",
|
||||
" return [(word, glossary[word]) for word in words if word in translated_words]"
|
||||
]
|
||||
}
|
||||
],
|
||||
@ -467,7 +516,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.8.10"
|
||||
"version": "3.11.0"
|
||||
},
|
||||
"subtitle": "1. Podstawowe techniki wspomagania tłumaczenia",
|
||||
"title": "Komputerowe wspomaganie tłumaczenia",
|
||||
|
111
lab/lab_02.ipynb
111
lab/lab_02.ipynb
@ -67,7 +67,9 @@
|
||||
" ('Sprawdź ustawienia sieciowe', 'Check the network settings'),\n",
|
||||
" ('Drukarka jest wyłączona', 'The printer is switched off'),\n",
|
||||
" ('Wymagane ponowne uruchomienie komputera', 'System restart required')\n",
|
||||
" ]"
|
||||
" ]\n",
|
||||
"\n",
|
||||
"translation_memory = { k:v for k,v in translation_memory}"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -86,7 +88,11 @@
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def ice_lookup(sentence, prev_sentence, next_sentence):\n",
|
||||
" return []"
|
||||
" s_t = translation_memory.get(sentence, False)\n",
|
||||
" p_s = translation_memory.get(prev_sentence, False)\n",
|
||||
" n_s = translation_memory.get(next_sentence, False)\n",
|
||||
" if s_t and p_s and n_s:\n",
|
||||
" return s_t "
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -141,7 +147,7 @@
|
||||
"id": "graduate-theorem",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Odpowiedź:"
|
||||
"Odpowiedź: Nie jest to poprawna funkcja dystansu fuzzy match. Warunki 1,3,4 sa spelnione. 2 warunek jest nie spełniony poniewaz odleglosc pomiedzy dwoma zdaniami/slowami o tej samej dlugosci ale innych znakach bedzie rowna zero."
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -179,7 +185,7 @@
|
||||
"id": "metallic-leave",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Odpowiedź:"
|
||||
"Odpowiedź: Tak, jest to poprawna funkcja dystansu. Wszystkie warunki sa spelnione."
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -201,12 +207,54 @@
|
||||
"### Ćwiczenie 5: Czy dystans Levenshteina jest poprawną funkcją dystansu? Uzasadnij krótko swoją odpowiedź sprawdzając każdy z warunków."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 22,
|
||||
"id": "79e4deef",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"True"
|
||||
]
|
||||
},
|
||||
"execution_count": 22,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"from Levenshtein import distance as levenshtein_distance\n",
|
||||
"\n",
|
||||
"# warunek 1\n",
|
||||
"levenshtein_distance(\"smthn\", \"nothin\")\n",
|
||||
"# Output: 3\n",
|
||||
"# zawsze nieujemne\n",
|
||||
"\n",
|
||||
"# warunek 2\n",
|
||||
"levenshtein_distance(\"and\", \"and\")\n",
|
||||
"# Output: 0\n",
|
||||
"# dwa takie same zdania ktore sa w odleglosci 0 od siebie\n",
|
||||
"\n",
|
||||
"# warunek 3\n",
|
||||
"levenshtein_distance(\"zombie\", \"mombie\") == levenshtein_distance(\"mombie\", \"zombie\")\n",
|
||||
"# Output: True\n",
|
||||
"# zamiennosc zdan\n",
|
||||
"\n",
|
||||
"# warunek 4\n",
|
||||
"x,y,z = 'zombie', 'glombie', 'mombie'\n",
|
||||
"levenshtein_distance(x,y) + levenshtein_distance(y,z) >= levenshtein_distance(x,z)\n",
|
||||
"#Output: True\n",
|
||||
"# miara każdej odleglosci musi być mniejsza lub równa sumie miar dwóch pozostałych"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "bibliographic-stopping",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Odpowiedź:"
|
||||
"Odpowiedź: Tak jest poprawną funkcją dystansu."
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -223,7 +271,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": 7,
|
||||
"id": "secondary-wrist",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@ -233,7 +281,7 @@
|
||||
"2"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@ -254,7 +302,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": 8,
|
||||
"id": "associate-tuner",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@ -273,7 +321,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": 9,
|
||||
"id": "focal-pathology",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@ -283,7 +331,7 @@
|
||||
"0.9166666666666666"
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@ -294,7 +342,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"execution_count": 10,
|
||||
"id": "roman-ceiling",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@ -304,7 +352,7 @@
|
||||
"0.9428571428571428"
|
||||
]
|
||||
},
|
||||
"execution_count": 8,
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@ -315,7 +363,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"execution_count": 11,
|
||||
"id": "invisible-cambodia",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@ -325,7 +373,7 @@
|
||||
"0.631578947368421"
|
||||
]
|
||||
},
|
||||
"execution_count": 9,
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@ -344,14 +392,43 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"execution_count": 12,
|
||||
"id": "genetic-cradle",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def fuzzy_lookup(sentence, threshold):\n",
|
||||
" return []"
|
||||
" return [ v for k,v in translation_memory.items() if levenshtein_similarity(sentence, k) > threshold ]"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 17,
|
||||
"id": "2e72b54a",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"['Press the ENTER button', 'System restart required']"
|
||||
]
|
||||
},
|
||||
"execution_count": 17,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"fuzzy_lookup('Spróbuj wyłączyć i włączyć komputer', 0.25)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "e1f15316",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
@ -373,7 +450,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.8.10"
|
||||
"version": "3.11.0"
|
||||
},
|
||||
"subtitle": "2. Zaawansowane użycie pamięci tłumaczeń",
|
||||
"title": "Komputerowe wspomaganie tłumaczenia",
|
||||
|
225
lab/lab_03.ipynb
225
lab/lab_03.ipynb
@ -63,7 +63,9 @@
|
||||
"id": "diverse-sunglasses",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"Odpowiedź:"
|
||||
"Odpowiedź: Narzędzie DeepL: https://www.deepl.com/translator\n",
|
||||
"\n",
|
||||
"przetłumaczyło tekst \"prowadnice szaf metalowych\" na \"metal cabinet slides\""
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -87,6 +89,16 @@
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "8f6b6fa9",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import re"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "loving-prince",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@ -110,7 +122,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"execution_count": 3,
|
||||
"id": "bound-auction",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@ -128,13 +140,40 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"execution_count": 4,
|
||||
"id": "cognitive-cedar",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"def terminology_lookup():\n",
|
||||
" return []"
|
||||
"count_dictionary = {}\n",
|
||||
"\n",
|
||||
"def terminology_lookup(text, tags):\n",
|
||||
" return [(tag, [[m.start(), m.end()] \n",
|
||||
" for m in re.finditer(tag, text)])\n",
|
||||
" for tag in tags if tag in text]\n",
|
||||
" "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "9fe3b66f",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[('program', [[14, 21], [291, 298], [468, 475], [516, 523], [533, 540]]),\n",
|
||||
" ('application', [[80, 91], [164, 175], [322, 333]])]"
|
||||
]
|
||||
},
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"terminology_lookup(text, dictionary)"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -161,116 +200,74 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "tribal-attention",
|
||||
"execution_count": 6,
|
||||
"id": "7b7b7569",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Requirement already satisfied: spacy in /Users/potoato/.pyenv/versions/3.11.0/lib/python3.11/site-packages (3.7.4)\n",
|
||||
"Requirement already satisfied: spacy-legacy<3.1.0,>=3.0.11 in /Users/potoato/.pyenv/versions/3.11.0/lib/python3.11/site-packages (from spacy) (3.0.12)\n",
|
||||
"Requirement already satisfied: spacy-loggers<2.0.0,>=1.0.0 in /Users/potoato/.pyenv/versions/3.11.0/lib/python3.11/site-packages (from spacy) (1.0.5)\n",
|
||||
"Requirement already satisfied: murmurhash<1.1.0,>=0.28.0 in /Users/potoato/.pyenv/versions/3.11.0/lib/python3.11/site-packages (from spacy) (1.0.10)\n",
|
||||
"Requirement already satisfied: cymem<2.1.0,>=2.0.2 in /Users/potoato/.pyenv/versions/3.11.0/lib/python3.11/site-packages (from spacy) (2.0.8)\n",
|
||||
"Requirement already satisfied: preshed<3.1.0,>=3.0.2 in /Users/potoato/.pyenv/versions/3.11.0/lib/python3.11/site-packages (from spacy) (3.0.9)\n",
|
||||
"Requirement already satisfied: thinc<8.3.0,>=8.2.2 in /Users/potoato/.pyenv/versions/3.11.0/lib/python3.11/site-packages (from spacy) (8.2.3)\n",
|
||||
"Requirement already satisfied: wasabi<1.2.0,>=0.9.1 in /Users/potoato/.pyenv/versions/3.11.0/lib/python3.11/site-packages (from spacy) (1.1.2)\n",
|
||||
"Requirement already satisfied: srsly<3.0.0,>=2.4.3 in /Users/potoato/.pyenv/versions/3.11.0/lib/python3.11/site-packages (from spacy) (2.4.8)\n",
|
||||
"Requirement already satisfied: catalogue<2.1.0,>=2.0.6 in /Users/potoato/.pyenv/versions/3.11.0/lib/python3.11/site-packages (from spacy) (2.0.10)\n",
|
||||
"Requirement already satisfied: weasel<0.4.0,>=0.1.0 in /Users/potoato/.pyenv/versions/3.11.0/lib/python3.11/site-packages (from spacy) (0.3.4)\n",
|
||||
"Requirement already satisfied: typer<0.10.0,>=0.3.0 in /Users/potoato/.pyenv/versions/3.11.0/lib/python3.11/site-packages (from spacy) (0.9.4)\n",
|
||||
"Requirement already satisfied: smart-open<7.0.0,>=5.2.1 in /Users/potoato/.pyenv/versions/3.11.0/lib/python3.11/site-packages (from spacy) (6.4.0)\n",
|
||||
"Requirement already satisfied: tqdm<5.0.0,>=4.38.0 in /Users/potoato/.pyenv/versions/3.11.0/lib/python3.11/site-packages (from spacy) (4.66.2)\n",
|
||||
"Requirement already satisfied: requests<3.0.0,>=2.13.0 in /Users/potoato/.pyenv/versions/3.11.0/lib/python3.11/site-packages (from spacy) (2.31.0)\n",
|
||||
"Requirement already satisfied: pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4 in /Users/potoato/.pyenv/versions/3.11.0/lib/python3.11/site-packages (from spacy) (2.7.0)\n",
|
||||
"Requirement already satisfied: jinja2 in /Users/potoato/.pyenv/versions/3.11.0/lib/python3.11/site-packages (from spacy) (3.1.3)\n",
|
||||
"Requirement already satisfied: setuptools in /Users/potoato/.pyenv/versions/3.11.0/lib/python3.11/site-packages (from spacy) (65.5.0)\n",
|
||||
"Requirement already satisfied: packaging>=20.0 in /Users/potoato/.pyenv/versions/3.11.0/lib/python3.11/site-packages (from spacy) (24.0)\n",
|
||||
"Requirement already satisfied: langcodes<4.0.0,>=3.2.0 in /Users/potoato/.pyenv/versions/3.11.0/lib/python3.11/site-packages (from spacy) (3.3.0)\n",
|
||||
"Requirement already satisfied: numpy>=1.19.0 in /Users/potoato/.pyenv/versions/3.11.0/lib/python3.11/site-packages (from spacy) (1.26.4)\n",
|
||||
"Requirement already satisfied: annotated-types>=0.4.0 in /Users/potoato/.pyenv/versions/3.11.0/lib/python3.11/site-packages (from pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4->spacy) (0.6.0)\n",
|
||||
"Requirement already satisfied: pydantic-core==2.18.1 in /Users/potoato/.pyenv/versions/3.11.0/lib/python3.11/site-packages (from pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4->spacy) (2.18.1)\n",
|
||||
"Requirement already satisfied: typing-extensions>=4.6.1 in /Users/potoato/.pyenv/versions/3.11.0/lib/python3.11/site-packages (from pydantic!=1.8,!=1.8.1,<3.0.0,>=1.7.4->spacy) (4.11.0)\n",
|
||||
"Requirement already satisfied: charset-normalizer<4,>=2 in /Users/potoato/.pyenv/versions/3.11.0/lib/python3.11/site-packages (from requests<3.0.0,>=2.13.0->spacy) (3.3.2)\n",
|
||||
"Requirement already satisfied: idna<4,>=2.5 in /Users/potoato/.pyenv/versions/3.11.0/lib/python3.11/site-packages (from requests<3.0.0,>=2.13.0->spacy) (3.7)\n",
|
||||
"Requirement already satisfied: urllib3<3,>=1.21.1 in /Users/potoato/.pyenv/versions/3.11.0/lib/python3.11/site-packages (from requests<3.0.0,>=2.13.0->spacy) (2.2.1)\n",
|
||||
"Requirement already satisfied: certifi>=2017.4.17 in /Users/potoato/.pyenv/versions/3.11.0/lib/python3.11/site-packages (from requests<3.0.0,>=2.13.0->spacy) (2024.2.2)\n",
|
||||
"Requirement already satisfied: blis<0.8.0,>=0.7.8 in /Users/potoato/.pyenv/versions/3.11.0/lib/python3.11/site-packages (from thinc<8.3.0,>=8.2.2->spacy) (0.7.11)\n",
|
||||
"Requirement already satisfied: confection<1.0.0,>=0.0.1 in /Users/potoato/.pyenv/versions/3.11.0/lib/python3.11/site-packages (from thinc<8.3.0,>=8.2.2->spacy) (0.1.4)\n",
|
||||
"Requirement already satisfied: click<9.0.0,>=7.1.1 in /Users/potoato/.pyenv/versions/3.11.0/lib/python3.11/site-packages (from typer<0.10.0,>=0.3.0->spacy) (8.1.7)\n",
|
||||
"Requirement already satisfied: cloudpathlib<0.17.0,>=0.7.0 in /Users/potoato/.pyenv/versions/3.11.0/lib/python3.11/site-packages (from weasel<0.4.0,>=0.1.0->spacy) (0.16.0)\n",
|
||||
"Requirement already satisfied: MarkupSafe>=2.0 in /Users/potoato/.pyenv/versions/3.11.0/lib/python3.11/site-packages (from jinja2->spacy) (2.1.5)\n",
|
||||
"\n",
|
||||
"for\n",
|
||||
"all\n",
|
||||
"Java\n",
|
||||
"programmer\n",
|
||||
":\n",
|
||||
"this\n",
|
||||
"section\n",
|
||||
"explain\n",
|
||||
"how\n",
|
||||
"to\n",
|
||||
"compile\n",
|
||||
"and\n",
|
||||
"run\n",
|
||||
"a\n",
|
||||
"swing\n",
|
||||
"application\n",
|
||||
"from\n",
|
||||
"the\n",
|
||||
"command\n",
|
||||
"line\n",
|
||||
".\n",
|
||||
"for\n",
|
||||
"information\n",
|
||||
"on\n",
|
||||
"compile\n",
|
||||
"and\n",
|
||||
"run\n",
|
||||
"a\n",
|
||||
"swing\n",
|
||||
"application\n",
|
||||
"use\n",
|
||||
"NetBeans\n",
|
||||
"IDE\n",
|
||||
",\n",
|
||||
"see\n",
|
||||
"Running\n",
|
||||
"Tutorial\n",
|
||||
"Examples\n",
|
||||
"in\n",
|
||||
"NetBeans\n",
|
||||
"IDE\n",
|
||||
".\n",
|
||||
"the\n",
|
||||
"compilation\n",
|
||||
"instruction\n",
|
||||
"work\n",
|
||||
"for\n",
|
||||
"all\n",
|
||||
"swing\n",
|
||||
"program\n",
|
||||
"—\n",
|
||||
"applet\n",
|
||||
",\n",
|
||||
"as\n",
|
||||
"well\n",
|
||||
"as\n",
|
||||
"application\n",
|
||||
".\n",
|
||||
"here\n",
|
||||
"be\n",
|
||||
"the\n",
|
||||
"step\n",
|
||||
"-PRON-\n",
|
||||
"need\n",
|
||||
"to\n",
|
||||
"follow\n",
|
||||
":\n",
|
||||
"install\n",
|
||||
"the\n",
|
||||
"late\n",
|
||||
"release\n",
|
||||
"of\n",
|
||||
"the\n",
|
||||
"Java\n",
|
||||
"SE\n",
|
||||
"platform\n",
|
||||
",\n",
|
||||
"if\n",
|
||||
"-PRON-\n",
|
||||
"have\n",
|
||||
"not\n",
|
||||
"already\n",
|
||||
"do\n",
|
||||
"so\n",
|
||||
".\n",
|
||||
"create\n",
|
||||
"a\n",
|
||||
"program\n",
|
||||
"that\n",
|
||||
"use\n",
|
||||
"Swing\n",
|
||||
"component\n",
|
||||
".\n",
|
||||
"compile\n",
|
||||
"the\n",
|
||||
"program\n",
|
||||
".\n",
|
||||
"run\n",
|
||||
"the\n",
|
||||
"program\n",
|
||||
".\n"
|
||||
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip available: \u001b[0m\u001b[31;49m22.3\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.0\u001b[0m\n",
|
||||
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"!pip install spacy"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "tribal-attention",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"ename": "OSError",
|
||||
"evalue": "[E050] Can't find model 'en_core_web_sm'. It doesn't seem to be a Python package or a valid path to a data directory.",
|
||||
"output_type": "error",
|
||||
"traceback": [
|
||||
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
||||
"\u001b[0;31mOSError\u001b[0m Traceback (most recent call last)",
|
||||
"Cell \u001b[0;32mIn[8], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mspacy\u001b[39;00m\n\u001b[0;32m----> 2\u001b[0m nlp \u001b[38;5;241m=\u001b[39m \u001b[43mspacy\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mload\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43men_core_web_sm\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 4\u001b[0m doc \u001b[38;5;241m=\u001b[39m nlp(text)\n\u001b[1;32m 6\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m token \u001b[38;5;129;01min\u001b[39;00m doc:\n",
|
||||
"File \u001b[0;32m~/.pyenv/versions/3.11.0/lib/python3.11/site-packages/spacy/__init__.py:51\u001b[0m, in \u001b[0;36mload\u001b[0;34m(name, vocab, disable, enable, exclude, config)\u001b[0m\n\u001b[1;32m 27\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mload\u001b[39m(\n\u001b[1;32m 28\u001b[0m name: Union[\u001b[38;5;28mstr\u001b[39m, Path],\n\u001b[1;32m 29\u001b[0m \u001b[38;5;241m*\u001b[39m,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 34\u001b[0m config: Union[Dict[\u001b[38;5;28mstr\u001b[39m, Any], Config] \u001b[38;5;241m=\u001b[39m util\u001b[38;5;241m.\u001b[39mSimpleFrozenDict(),\n\u001b[1;32m 35\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m Language:\n\u001b[1;32m 36\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Load a spaCy model from an installed package or a local path.\u001b[39;00m\n\u001b[1;32m 37\u001b[0m \n\u001b[1;32m 38\u001b[0m \u001b[38;5;124;03m name (str): Package name or model path.\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 49\u001b[0m \u001b[38;5;124;03m RETURNS (Language): The loaded nlp object.\u001b[39;00m\n\u001b[1;32m 50\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m---> 51\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mutil\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mload_model\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 52\u001b[0m \u001b[43m \u001b[49m\u001b[43mname\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 53\u001b[0m \u001b[43m \u001b[49m\u001b[43mvocab\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mvocab\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 54\u001b[0m \u001b[43m \u001b[49m\u001b[43mdisable\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdisable\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 55\u001b[0m \u001b[43m \u001b[49m\u001b[43menable\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43menable\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 56\u001b[0m \u001b[43m \u001b[49m\u001b[43mexclude\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mexclude\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 57\u001b[0m \u001b[43m \u001b[49m\u001b[43mconfig\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mconfig\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 58\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n",
|
||||
"File \u001b[0;32m~/.pyenv/versions/3.11.0/lib/python3.11/site-packages/spacy/util.py:472\u001b[0m, in \u001b[0;36mload_model\u001b[0;34m(name, vocab, disable, enable, exclude, config)\u001b[0m\n\u001b[1;32m 470\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m name \u001b[38;5;129;01min\u001b[39;00m OLD_MODEL_SHORTCUTS:\n\u001b[1;32m 471\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mIOError\u001b[39;00m(Errors\u001b[38;5;241m.\u001b[39mE941\u001b[38;5;241m.\u001b[39mformat(name\u001b[38;5;241m=\u001b[39mname, full\u001b[38;5;241m=\u001b[39mOLD_MODEL_SHORTCUTS[name])) \u001b[38;5;66;03m# type: ignore[index]\u001b[39;00m\n\u001b[0;32m--> 472\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mIOError\u001b[39;00m(Errors\u001b[38;5;241m.\u001b[39mE050\u001b[38;5;241m.\u001b[39mformat(name\u001b[38;5;241m=\u001b[39mname))\n",
|
||||
"\u001b[0;31mOSError\u001b[0m: [E050] Can't find model 'en_core_web_sm'. It doesn't seem to be a Python package or a valid path to a data directory."
|
||||
]
|
||||
}
|
||||
],
|
||||
@ -302,7 +299,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": null,
|
||||
"id": "surgical-demonstration",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@ -337,7 +334,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": null,
|
||||
"id": "superb-butterfly",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@ -356,7 +353,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"execution_count": null,
|
||||
"id": "acting-tolerance",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@ -374,7 +371,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"execution_count": null,
|
||||
"id": "eight-redhead",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@ -393,7 +390,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"execution_count": null,
|
||||
"id": "monetary-mambo",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
@ -422,7 +419,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.8.10"
|
||||
"version": "3.11.0"
|
||||
},
|
||||
"subtitle": "3. Terminologia",
|
||||
"title": "Komputerowe wspomaganie tłumaczenia",
|
||||
|
Loading…
Reference in New Issue
Block a user