Compare commits

...

14 Commits
main ... main

Author SHA1 Message Date
Adam Stelmaszyk
6040e09fa8 finished 2024-06-22 12:22:36 +02:00
Adam Stelmaszyk
0c0ef36a89 new code 2024-06-22 12:05:36 +02:00
Adam Stelmaszyk
018f891442 changes 2024-06-21 18:51:23 +02:00
Adam Stelmaszyk
8c41ad1dbe code 2024-06-21 18:19:28 +02:00
Adam Stelmaszyk
58e6e90d2a added notes 2024-05-05 08:21:38 +02:00
Adam Stelmaszyk
d824ffc9d9 updated 2024-05-05 08:06:32 +02:00
Adam Stelmaszyk
efcac35c9e updated 2024-05-05 08:05:59 +02:00
Adam Stelmaszyk
7fd3eb01b3 updated 2024-05-04 14:59:05 +02:00
Adam Stelmaszyk
ff131152f6 finished 6-7 2024-04-28 07:18:57 +03:00
Adam Stelmaszyk
622860d71e finshed 4 exercices 2024-04-27 19:50:45 +03:00
Adam Stelmaszyk
add3f3c9c2 not finished labs 2024-04-26 22:32:35 +03:00
Adam Stelmaszyk
854b3629df added lab3 2024-04-15 22:09:49 +02:00
Adam Stelmaszyk
e343070e32 finished second 2 ipynb 2024-04-13 14:23:30 +02:00
Adam Stelmaszyk
22d22f8f7f first task 2024-04-13 13:33:54 +02:00
36 changed files with 4068 additions and 250 deletions

BIN
.DS_Store vendored Normal file

Binary file not shown.

BIN
lab/.DS_Store vendored Normal file

Binary file not shown.

1
lab/.gitignore vendored
View File

@ -1 +1,2 @@
.ipynb_checkpoints/ .ipynb_checkpoints/
./local_data/**

BIN
lab/data/.DS_Store vendored Normal file

Binary file not shown.

View File

@ -0,0 +1,29 @@
/**
* This is a simple Java class demonstrating grammatical errors in comments.
*/
public class SampleClass {
// This is a incorrect comment
private int sampleField;
/**
* This method does something very simple.
* @param value This is a parameter that take a integer value.
*/
public void doSomething(int value) {
// This is a method that does something
sampleField = value;
}
/**
* This method returns the value of sample field.
* It should be a integer value.
* @return the value of sampleField
*/
public int getSampleField() {
// Return the value of the sample field
return sampleField;
}
// Another bad comment here
}

View File

@ -52,7 +52,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 1, "execution_count": 2,
"id": "narrow-romantic", "id": "narrow-romantic",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
@ -71,7 +71,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 2, "execution_count": 3,
"id": "indonesian-electron", "id": "indonesian-electron",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
@ -82,7 +82,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 3, "execution_count": 4,
"id": "compact-trinidad", "id": "compact-trinidad",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
@ -92,7 +92,7 @@
"['Press the ENTER button']" "['Press the ENTER button']"
] ]
}, },
"execution_count": 3, "execution_count": 4,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -119,7 +119,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 4, "execution_count": 5,
"id": "exposed-daniel", "id": "exposed-daniel",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
@ -139,7 +139,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 5, "execution_count": 6,
"id": "serial-velvet", "id": "serial-velvet",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
@ -149,7 +149,7 @@
"['Press the ENTER button', 'Press the ENTER key']" "['Press the ENTER button', 'Press the ENTER key']"
] ]
}, },
"execution_count": 5, "execution_count": 6,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -176,7 +176,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 6, "execution_count": 9,
"id": "every-gibson", "id": "every-gibson",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
@ -186,7 +186,7 @@
"[]" "[]"
] ]
}, },
"execution_count": 6, "execution_count": 9,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -213,13 +213,26 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 7, "execution_count": 11,
"id": "protected-rings", "id": "protected-rings",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [
{
"data": {
"text/plain": [
"['Press the ENTER button', 'Press the ENTER key']"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [ "source": [
"def tm_lookup(sentence):\n", "def tm_lookup(sentence):\n",
" return ''" " return [entry[1] for entry in translation_memory if entry[0].lower() == sentence.lower()]\n",
"\n",
"tm_lookup('Wciśnij przycisk ENTER')"
] ]
}, },
{ {
@ -232,17 +245,17 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 18, "execution_count": 8,
"id": "severe-alloy", "id": "severe-alloy",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
"data": { "data": {
"text/plain": [ "text/plain": [
"''" "[]"
] ]
}, },
"execution_count": 18, "execution_count": 8,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -261,13 +274,29 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 11, "execution_count": 16,
"id": "structural-diesel", "id": "structural-diesel",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [
{
"data": {
"text/plain": [
"['Press the ENTER button', 'Press the ENTER key']"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [ "source": [
"import string\n",
"\n",
"def tm_lookup(sentence):\n", "def tm_lookup(sentence):\n",
" return ''" " sentence = sentence.translate(str.maketrans('', '', string.punctuation))\n",
" return [entry[1] for entry in translation_memory if entry[0].lower() == sentence.lower()]\n",
"\n",
"tm_lookup('Wciśnij przycisk [ENTER]')"
] ]
}, },
{ {
@ -280,17 +309,17 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 12, "execution_count": 17,
"id": "brief-senegal", "id": "brief-senegal",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
"data": { "data": {
"text/plain": [ "text/plain": [
"''" "[]"
] ]
}, },
"execution_count": 12, "execution_count": 17,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -317,13 +346,66 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 14, "execution_count": 26,
"id": "mathematical-customs", "id": "mathematical-customs",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Suggestion:\n"
]
},
{
"data": {
"text/plain": [
"['System restart required']"
]
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [ "source": [
"def differenceThanNotBiggerThanOneElement(firstSentence, secondSentence):\n",
" firstSentenceList = firstSentence.lower().split()\n",
" secondSentenceList = secondSentence.lower().split()\n",
"\n",
" diffNumber = 0\n",
"\n",
" for i in range(len(firstSentenceList)):\n",
" if(firstSentenceList[i] != secondSentenceList[i]):\n",
" diffNumber=diffNumber+1\n",
" if(diffNumber > 2):\n",
" return False\n",
"\n",
" return True\n",
"\n",
"def tm_lookup(sentence):\n", "def tm_lookup(sentence):\n",
" return ''" " sentence = sentence.translate(str.maketrans('', '', string.punctuation))\n",
"\n",
" exactMatchList = [entry[1] for entry in translation_memory if entry[0].lower() == sentence.lower()]\n",
"\n",
" if(len(exactMatchList) == 0):\n",
" diffMatchList = [entry[1] for entry in translation_memory if differenceThanNotBiggerThanOneElement(entry[0], sentence)]\n",
"\n",
" if(len(diffMatchList) > 0):\n",
" print('Suggestion:')\n",
" return diffMatchList\n",
"\n",
" else:\n",
" return exactMatchList\n",
"\n",
"\n",
"\n",
"\n",
"\n",
"\n",
" \n",
"\n",
"tm_lookup('Wymagane ponowne uruchomienie maszyny')"
] ]
}, },
{ {
@ -344,7 +426,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 15, "execution_count": 27,
"id": "humanitarian-wrong", "id": "humanitarian-wrong",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
@ -362,7 +444,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 16, "execution_count": 28,
"id": "located-perception", "id": "located-perception",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
@ -374,7 +456,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 17, "execution_count": 29,
"id": "advised-casting", "id": "advised-casting",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
@ -384,7 +466,7 @@
"[('przycisk', 'button'), ('drukarka', 'printer')]" "[('przycisk', 'button'), ('drukarka', 'printer')]"
] ]
}, },
"execution_count": 17, "execution_count": 29,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -406,7 +488,7 @@
"id": "defensive-fifteen", "id": "defensive-fifteen",
"metadata": {}, "metadata": {},
"source": [ "source": [
"Odpowiedź:" "Odpowiedź: O(n * m)"
] ]
}, },
{ {
@ -419,13 +501,27 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 19, "execution_count": 34,
"id": "original-tunisia", "id": "original-tunisia",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [
{
"data": {
"text/plain": [
"[]"
]
},
"execution_count": 34,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [ "source": [
"def glossary_lookup(sentence):\n", "def glossary_lookup(sentence):\n",
" return ''" " sentence_words = sentence.lower().split()\n",
" return [entry for entry in glossary if entry[0] in sentence_words]\n",
"\n",
"glossary_lookup('Każda Drukarka posiada Przycisk wznowienia drukowania')"
] ]
}, },
{ {
@ -438,13 +534,27 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 20, "execution_count": 35,
"id": "adolescent-semiconductor", "id": "adolescent-semiconductor",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [
{
"data": {
"text/plain": [
"[('przycisk', 'button'), ('drukarka', 'printer')]"
]
},
"execution_count": 35,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [ "source": [
"def glossary_lookup(sentence):\n", "def glossary_lookup(sentence):\n",
" return ''" " sentence_words = set(sentence.lower().split())\n",
" return [entry for entry in glossary if entry[0] in sentence_words]\n",
"\n",
"glossary_lookup('Każda Drukarka posiada Przycisk wznowienia drukowania')"
] ]
} }
], ],
@ -467,7 +577,7 @@
"name": "python", "name": "python",
"nbconvert_exporter": "python", "nbconvert_exporter": "python",
"pygments_lexer": "ipython3", "pygments_lexer": "ipython3",
"version": "3.8.10" "version": "3.11.7"
}, },
"subtitle": "1. Podstawowe techniki wspomagania tłumaczenia", "subtitle": "1. Podstawowe techniki wspomagania tłumaczenia",
"title": "Komputerowe wspomaganie tłumaczenia", "title": "Komputerowe wspomaganie tłumaczenia",

View File

@ -57,7 +57,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 1, "execution_count": 22,
"id": "confident-prison", "id": "confident-prison",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
@ -80,13 +80,44 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 2, "execution_count": 43,
"id": "continental-submission", "id": "continental-submission",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [
{
"data": {
"text/plain": [
"['The printer is switched off',\n",
" 'Check the network settings',\n",
" 'System restart required']"
]
},
"execution_count": 43,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [ "source": [
"\n",
"def ice_lookup(sentence, prev_sentence, next_sentence):\n", "def ice_lookup(sentence, prev_sentence, next_sentence):\n",
" return []" " for index in range(len(translation_memory)):\n",
" if index == 0:\n",
" continue\n",
" elif index + 1 >= len(translation_memory):\n",
" return []\n",
" else:\n",
" middleText = translation_memory[index]\n",
" prevText = translation_memory[index-1]\n",
" nextText = translation_memory[index+1]\n",
" if(sentence == middleText[0] and prev_sentence == prevText[0] and next_sentence == nextText[0]):\n",
" return [middleText[1], prevText[1], nextText[1]]\n",
" \n",
" return []\n",
" \n",
"\n",
" \n",
" \n",
"ice_lookup('Drukarka jest wyłączona','Sprawdź ustawienia sieciowe','Wymagane ponowne uruchomienie komputera') \n"
] ]
}, },
{ {
@ -119,7 +150,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 3, "execution_count": 44,
"id": "fourth-pillow", "id": "fourth-pillow",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
@ -141,7 +172,7 @@
"id": "graduate-theorem", "id": "graduate-theorem",
"metadata": {}, "metadata": {},
"source": [ "source": [
"Odpowiedź:" "Odpowiedź: Tak, Spełnia warunki dla 1,2,3,4, "
] ]
}, },
{ {
@ -179,7 +210,7 @@
"id": "metallic-leave", "id": "metallic-leave",
"metadata": {}, "metadata": {},
"source": [ "source": [
"Odpowiedź:" "Odpowiedź: Nie, Spełnia dla warunku 3, poniewaz x = 4, y = 4, to wychodzi d(x,y) = 3, a nie 0"
] ]
}, },
{ {
@ -223,7 +254,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 5, "execution_count": 45,
"id": "secondary-wrist", "id": "secondary-wrist",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
@ -233,7 +264,7 @@
"2" "2"
] ]
}, },
"execution_count": 5, "execution_count": 45,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -254,7 +285,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 6, "execution_count": 46,
"id": "associate-tuner", "id": "associate-tuner",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
@ -273,7 +304,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 7, "execution_count": 47,
"id": "focal-pathology", "id": "focal-pathology",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
@ -283,7 +314,7 @@
"0.9166666666666666" "0.9166666666666666"
] ]
}, },
"execution_count": 7, "execution_count": 47,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -294,7 +325,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 8, "execution_count": 48,
"id": "roman-ceiling", "id": "roman-ceiling",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
@ -304,7 +335,7 @@
"0.9428571428571428" "0.9428571428571428"
] ]
}, },
"execution_count": 8, "execution_count": 48,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -315,7 +346,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 9, "execution_count": 49,
"id": "invisible-cambodia", "id": "invisible-cambodia",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
@ -325,7 +356,7 @@
"0.631578947368421" "0.631578947368421"
] ]
}, },
"execution_count": 9, "execution_count": 49,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -344,13 +375,26 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 10, "execution_count": 51,
"id": "genetic-cradle", "id": "genetic-cradle",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [
{
"data": {
"text/plain": [
"['Press the ENTER button']"
]
},
"execution_count": 51,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [ "source": [
"def fuzzy_lookup(sentence, threshold):\n", "def fuzzy_lookup(sentence, threshold):\n",
" return []" " return [entry[1] for entry in translation_memory if levenshtein_similarity(entry[0],sentence ) > threshold]\n",
"\n",
"fuzzy_lookup('Wciśnij przycisk Enter', 0.5)"
] ]
} }
], ],
@ -373,7 +417,7 @@
"name": "python", "name": "python",
"nbconvert_exporter": "python", "nbconvert_exporter": "python",
"pygments_lexer": "ipython3", "pygments_lexer": "ipython3",
"version": "3.8.10" "version": "3.11.7"
}, },
"subtitle": "2. Zaawansowane użycie pamięci tłumaczeń", "subtitle": "2. Zaawansowane użycie pamięci tłumaczeń",
"title": "Komputerowe wspomaganie tłumaczenia", "title": "Komputerowe wspomaganie tłumaczenia",

View File

@ -86,7 +86,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 1, "execution_count": 55,
"id": "loving-prince", "id": "loving-prince",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
@ -100,6 +100,14 @@
"text += \" Create a program that uses Swing components. Compile the program. Run the program.\"" "text += \" Create a program that uses Swing components. Compile the program. Run the program.\""
] ]
}, },
{
"cell_type": "code",
"execution_count": null,
"id": "05436dad",
"metadata": {},
"outputs": [],
"source": []
},
{ {
"cell_type": "markdown", "cell_type": "markdown",
"id": "extreme-cycling", "id": "extreme-cycling",
@ -110,12 +118,12 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 2, "execution_count": 56,
"id": "bound-auction", "id": "bound-auction",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"dictionary = ['program', 'application', 'applet' 'compile']" "dictionary = ['program', 'application', 'applet', 'compile']"
] ]
}, },
{ {
@ -128,13 +136,41 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 3, "execution_count": 17,
"id": "cognitive-cedar", "id": "cognitive-cedar",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [
{
"data": {
"text/plain": [
"{'program': [(468, 475), (516, 523), (533, 540)],\n",
" 'application': [(80, 91), (164, 175)],\n",
" 'compile': [(56, 63), (504, 511)]}"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [ "source": [
"def terminology_lookup():\n", "import re\n",
" return []" "\n",
"def terminology_lookup(dictionary, text):\n",
" termValues = dict()\n",
" for element in dictionary:\n",
" values = []\n",
" pattern = re.compile(r'\\b{}\\b'.format(re.escape(element)))\n",
" for match in pattern.finditer(text.lower()):\n",
" values.append((match.start(), match.end()))\n",
" \n",
" if len(values) != 0:\n",
" termValues[element] = values\n",
" \n",
" return termValues\n",
"\n",
"terminology_lookup(dictionary, text)\n",
"\n"
] ]
}, },
{ {
@ -161,7 +197,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 4, "execution_count": 18,
"id": "tribal-attention", "id": "tribal-attention",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
@ -205,7 +241,7 @@
"IDE\n", "IDE\n",
",\n", ",\n",
"see\n", "see\n",
"Running\n", "run\n",
"Tutorial\n", "Tutorial\n",
"Examples\n", "Examples\n",
"in\n", "in\n",
@ -218,7 +254,7 @@
"work\n", "work\n",
"for\n", "for\n",
"all\n", "all\n",
"swing\n", "Swing\n",
"program\n", "program\n",
"—\n", "—\n",
"applet\n", "applet\n",
@ -232,7 +268,7 @@
"be\n", "be\n",
"the\n", "the\n",
"step\n", "step\n",
"-PRON-\n", "you\n",
"need\n", "need\n",
"to\n", "to\n",
"follow\n", "follow\n",
@ -248,7 +284,7 @@
"platform\n", "platform\n",
",\n", ",\n",
"if\n", "if\n",
"-PRON-\n", "you\n",
"have\n", "have\n",
"not\n", "not\n",
"already\n", "already\n",
@ -260,7 +296,7 @@
"program\n", "program\n",
"that\n", "that\n",
"use\n", "use\n",
"Swing\n", "swing\n",
"component\n", "component\n",
".\n", ".\n",
"compile\n", "compile\n",
@ -302,13 +338,48 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 5, "execution_count": 43,
"id": "surgical-demonstration", "id": "surgical-demonstration",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [
{
"data": {
"text/plain": [
"{'program': [(291, 299), (468, 475), (516, 523), (533, 540)],\n",
" 'application': [(80, 91), (164, 175), (322, 334)],\n",
" 'applet': [(302, 309)],\n",
" 'compile': [(56, 63), (134, 143), (504, 511)]}"
]
},
"execution_count": 43,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [ "source": [
"def terminology_lookup():\n", "def terminology_lookup(dictionary, text):\n",
" return []" " termValues = dict()\n",
" lowerText = text.lower()\n",
" nlp = spacy.load(\"en_core_web_sm\")\n",
"\n",
" splitText = nlp(lowerText)\n",
" for findingWord in dictionary:\n",
" values = []\n",
" startFromIndex = 0\n",
"\n",
" for word in splitText:\n",
" if word.lemma_ == findingWord:\n",
" textBegining = lowerText.index(word.text,startFromIndex)\n",
" textEnding = textBegining + len(word)\n",
" startFromIndex = textEnding\n",
" values.append((textBegining,textEnding))\n",
" \n",
" if len(values) != 0:\n",
" termValues[findingWord] = values\n",
" \n",
" return termValues\n",
"\n",
"terminology_lookup(dictionary, text)"
] ]
}, },
{ {
@ -337,13 +408,31 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 6, "execution_count": 54,
"id": "superb-butterfly", "id": "superb-butterfly",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [
{
"data": {
"text/plain": [
"set()"
]
},
"execution_count": 54,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [ "source": [
"import spacy\n",
"\n",
"def get_nouns(text):\n", "def get_nouns(text):\n",
" return []" " nlp = spacy.load(\"en_core_web_sm\")\n",
" doc = nlp(text)\n",
" nouns = [token.text for token in doc if token.pos_ == \"NOUN\"]\n",
" return set(nouns)\n",
"\n",
"get_nouns(text)"
] ]
}, },
{ {
@ -374,13 +463,66 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 8, "execution_count": 71,
"id": "eight-redhead", "id": "eight-redhead",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [
{
"data": {
"text/plain": [
"{'line': 1,\n",
" 'release': 1,\n",
" 'compilation': 1,\n",
" 'component': 1,\n",
" 'section': 1,\n",
" 'information': 1,\n",
" 'program': 4,\n",
" 'command': 1,\n",
" 'platform': 1,\n",
" 'applet': 1,\n",
" 'application': 3,\n",
" 'swing': 4,\n",
" 'instruction': 1,\n",
" 'step': 1,\n",
" 'programmer': 1}"
]
},
"execution_count": 71,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [ "source": [
"import spacy\n",
"\n",
"def get_nouns(text):\n",
" nlp = spacy.load(\"en_core_web_sm\")\n",
" doc = nlp(text)\n",
" nouns = [token.lemma_ for token in doc if token.pos_ == \"NOUN\"]\n",
" return set(nouns)\n",
"\n",
"def getElementsNumbers(dictionary, text):\n",
" termValues = dict()\n",
" lowerText = text.lower()\n",
" nlp = spacy.load(\"en_core_web_sm\")\n",
"\n",
" splitText = nlp(lowerText)\n",
" for findingWord in dictionary:\n",
" elementNumber = 0\n",
"\n",
" for word in splitText:\n",
" if word.lemma_ == findingWord:\n",
" elementNumber = elementNumber +1\n",
" \n",
" if elementNumber != 0:\n",
" termValues[findingWord] = elementNumber\n",
" \n",
" return termValues\n",
"\n",
"def extract_terms(text):\n", "def extract_terms(text):\n",
" return []" " return getElementsNumbers(get_nouns(text), text)\n",
"\n",
"extract_terms(text)"
] ]
}, },
{ {
@ -393,13 +535,75 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 9, "execution_count": 86,
"id": "monetary-mambo", "id": "monetary-mambo",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"def extract_terms(text):\n", "def get_dictonery_by_type(text, type):\n",
" return []" " nlp = spacy.load(\"en_core_web_sm\")\n",
" doc = nlp(text)\n",
" nouns = [token.lemma_ for token in doc if token.pos_ == type]\n",
" return set(nouns)\n",
"\n",
"\n",
"def extract_terms(text, type):\n",
" return getElementsNumbers(get_dictonery_by_type(text, type), text)\n",
"\n",
"\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 87,
"id": "8f7eeb73",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'compile': 3,\n",
" 'work': 1,\n",
" 'install': 1,\n",
" 'create': 1,\n",
" 'explain': 1,\n",
" 'run': 4,\n",
" 'see': 1,\n",
" 'need': 1,\n",
" 'do': 1,\n",
" 'follow': 1,\n",
" 'use': 2}"
]
},
"execution_count": 87,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"extract_terms(text, 'VERB')"
]
},
{
"cell_type": "code",
"execution_count": 93,
"id": "71c14cab",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'late': 1}"
]
},
"execution_count": 93,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"extract_terms(text, 'ADJ')"
] ]
} }
], ],
@ -422,7 +626,7 @@
"name": "python", "name": "python",
"nbconvert_exporter": "python", "nbconvert_exporter": "python",
"pygments_lexer": "ipython3", "pygments_lexer": "ipython3",
"version": "3.8.10" "version": "3.1.undefined"
}, },
"subtitle": "3. Terminologia", "subtitle": "3. Terminologia",
"title": "Komputerowe wspomaganie tłumaczenia", "title": "Komputerowe wspomaganie tłumaczenia",

File diff suppressed because one or more lines are too long

View File

@ -55,13 +55,39 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 1, "execution_count": 91,
"id": "documented-hacker", "id": "documented-hacker",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [
{
"data": {
"text/plain": [
"[{'<root>': (0, 6)},\n",
" {'<name>': (6, 12)},\n",
" {'</name>': (16, 23)},\n",
" {'<age>': (23, 28)},\n",
" {'</age>': (30, 36)},\n",
" {'</root>': (36, 43)}]"
]
},
"execution_count": 91,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [ "source": [
"import re \n",
"\n",
"text = \"<root><name>John</name><age>30</age></root>\"\n",
"\n",
"def find_tags(text):\n", "def find_tags(text):\n",
" return []" " xml_tag_pattern = r\"<[^>]+>\"\n",
"\n",
" tags_positions = [{match.group(): (match.start(), match.end())} for match in re.finditer(xml_tag_pattern, text)]\n",
" \n",
" return tags_positions\n",
"\n",
"find_tags(text)\n"
] ]
}, },
{ {
@ -74,13 +100,58 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 2, "execution_count": 92,
"id": "unauthorized-study", "id": "unauthorized-study",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"test: True\n",
"<4.2.1>: False\n",
"<text>: False\n",
"<1.2.4>: False\n",
"test: True\n",
"test324234: True\n",
"test32443242: True\n",
"1.2.4: True\n",
"4.2.1: True\n",
">: True\n",
"<: True\n",
"><: True\n"
]
}
],
"source": [ "source": [
"\n",
"import re\n",
"\n",
"def is_translatable(text):\n", "def is_translatable(text):\n",
" return True" " non_translatable_pattern = r'<[^>]+>'\n",
"\n",
" if re.match(non_translatable_pattern, text.strip()):\n",
" return False \n",
" return True\n",
"\n",
"texts = [\n",
" \"test\",\n",
" \"<4.2.1>\",\n",
" \"<text>\",\n",
" \"<1.2.4>\",\n",
" \"test\",\n",
" \"test324234\",\n",
" \"test32443242\",\n",
" \"1.2.4\",\n",
" \"4.2.1\",\n",
" \">\",\n",
" \"<\",\n",
" \"><\",\n",
"]\n",
"\n",
"for text in texts:\n",
" print(f\"{text}: {is_translatable(text)}\")\n",
"\n"
] ]
}, },
{ {
@ -93,13 +164,100 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 3, "execution_count": 93,
"id": "beautiful-mathematics", "id": "beautiful-mathematics",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [
{
"data": {
"text/plain": [
"[{'position': (13, 23),\n",
" 'date_format': 'DD/MM/RRRR',\n",
" 'day': 12,\n",
" 'month': 4,\n",
" 'year': 2024},\n",
" {'position': (61, 71),\n",
" 'date_format': 'DD/MM/RRRR',\n",
" 'day': 12,\n",
" 'month': 4,\n",
" 'year': 2024},\n",
" {'position': (41, 51),\n",
" 'date_format': 'DD-MM-RRRR',\n",
" 'day': 2,\n",
" 'month': 12,\n",
" 'year': 2023},\n",
" {'position': (13, 23),\n",
" 'date_format': 'DD.MM.RRRR',\n",
" 'day': 12,\n",
" 'month': 4,\n",
" 'year': 2024},\n",
" {'position': (41, 51),\n",
" 'date_format': 'DD.MM.RRRR',\n",
" 'day': 2,\n",
" 'month': 12,\n",
" 'year': 2023},\n",
" {'position': (61, 71),\n",
" 'date_format': 'DD.MM.RRRR',\n",
" 'day': 12,\n",
" 'month': 4,\n",
" 'year': 2024},\n",
" {'position': (13, 21),\n",
" 'date_format': 'DD/MM/RR',\n",
" 'day': 12,\n",
" 'month': 4,\n",
" 'year': 20},\n",
" {'position': (61, 69),\n",
" 'date_format': 'DD/MM/RR',\n",
" 'day': 12,\n",
" 'month': 4,\n",
" 'year': 20},\n",
" {'position': (88, 96),\n",
" 'date_format': 'DD/MM/RR',\n",
" 'day': 20,\n",
" 'month': 4,\n",
" 'year': 24}]"
]
},
"execution_count": 93,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [ "source": [
"def find_dates(text):\n", "def find_dates(text):\n",
" return []" " patterns = [\n",
" r'(\\d{2})/(\\d{2})/(\\d{4})', # DD/MM/RRRR\n",
" r'(\\d{2})-(\\d{2})-(\\d{4})', # DD-MM-RRRR\n",
" r'(\\d{2}).(\\d{2}).(\\d{4})', # DD.MM.RRRR\n",
" r'(\\d{2}) (\\d{2}) (\\d{4})', # DD MM RRRR\n",
" r'(\\d{2})/(\\d{2})/(\\d{2})' # DD/MM/RR\n",
" ]\n",
"\n",
" date_formats = [\n",
" \"DD/MM/RRRR\",\n",
" \"DD-MM-RRRR\",\n",
" \"DD.MM.RRRR\",\n",
" \"DD MM RRRR\",\n",
" \"DD/MM/RR\"\n",
" ]\n",
"\n",
" results = []\n",
" for pattern, date_format in zip(patterns, date_formats):\n",
" for match in re.finditer(pattern, text):\n",
" day, month, year = match.groups()\n",
" results.append({\n",
" \"position\": match.span(),\n",
" \"date_format\": date_format,\n",
" \"day\": int(day),\n",
" \"month\": int(month),\n",
" \"year\": int(year)\n",
" })\n",
"\n",
" return results\n",
"\n",
"text = \"Data dsadasdj12/04/2024 oraz dnasjdjasndj02-12-2023, jeszcze 12/04/2024 i 04.12.24 oraz 20/04/24.\"\n",
"\n",
"find_dates(text)\n"
] ]
}, },
{ {
@ -124,14 +282,109 @@
] ]
}, },
{ {
"cell_type": "code", "cell_type": "markdown",
"execution_count": 4, "id": "dc46baa6",
"id": "finished-essex",
"metadata": {}, "metadata": {},
"outputs": [], "source": []
},
{
"cell_type": "code",
"execution_count": 94,
"id": "4ee148d5",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[{'position': (54, 64), 'date_format': '\\\\1/\\\\2/\\\\3', 'day': 21, 'month': 4, 'year': 2024}, {'position': (22, 32), 'date_format': '\\\\1.\\\\2.\\\\3', 'day': 21, 'month': 4, 'year': 2021}, {'position': (54, 64), 'date_format': '\\\\1.\\\\2.\\\\3', 'day': 21, 'month': 4, 'year': 2024}, {'position': (22, 32), 'date_format': '\\\\1 \\\\2 \\\\3', 'day': 21, 'month': 4, 'year': 2021}, {'position': (54, 62), 'date_format': '\\\\1/\\\\2/\\\\3', 'day': 21, 'month': 4, 'year': 20}]\n",
"\n",
"\n",
"Translated text (Europe format): Aujourd'hui, c'est le 21.04.2021 et demain ce sera le 21.04.2024.\n"
]
}
],
"source": [ "source": [
"def correct_dates(source_segment, target_segment, date_format):\n", "from queue import Full\n",
" return ''" "import re\n",
"\n",
"date_formats = [\n",
" r'\\1/\\2/\\3',\n",
" r'\\1-\\2-\\3',\n",
" r'\\1.\\2.\\3',\n",
" r'\\1 \\2 \\3',\n",
" r'\\1/\\2/\\3',\n",
" ]\n",
"\n",
"patterns = [\n",
" r'(\\d{2})/(\\d{2})/(\\d{4})', # DD/MM/RRRR\n",
" r'(\\d{2})-(\\d{2})-(\\d{4})', # DD-MM-RRRR\n",
" r'(\\d{2}).(\\d{2}).(\\d{4})', # DD.MM.RRRR\n",
" r'(\\d{2}) (\\d{2}) (\\d{4})', # DD MM RRRR\n",
" r'(\\d{2})/(\\d{2})/(\\d{2})' # DD/MM/RR\n",
" ]\n",
"\n",
"\n",
"def find_dates(text):\n",
" \n",
" results = []\n",
" for pattern, date_format in zip(patterns, date_formats):\n",
" for match in re.finditer(pattern, text):\n",
" day, month, year = match.groups()\n",
" results.append({\n",
" \"position\": match.span(),\n",
" \"date_format\": date_format,\n",
" \"day\": int(day),\n",
" \"month\": int(month),\n",
" \"year\": int(year)\n",
" })\n",
"\n",
" return results\n",
"\n",
"\n",
"def translate_dates(source_text, target_text, target_format):\n",
" source_dates = find_dates(source_text)\n",
" target_dates = find_dates(target_text)\n",
" print(target_dates)\n",
" print('\\n')\n",
"\n",
" if len(source_dates) != len(target_dates):\n",
" print(\"Uwaga: Rózna liczba dat\")\n",
" return\n",
"\n",
" for source_date, target_date in zip(source_dates, target_dates):\n",
" if f\"{source_date['day']}/{source_date['month']}/{source_date['year']}\" != f\"{target_date['day']}/{target_date['month']}/{target_date['year']}\":\n",
" print(\"Uwaga: Daty są rózne\")\n",
" return\n",
" \n",
" replacement = ''\n",
" if target_format == \"Europe\":\n",
" replacement = r'\\1/\\2/\\3'\n",
" elif target_format == \"US\":\n",
" replacement = r'\\1/\\2/\\3'\n",
" elif target_format == \"digit-dot\":\n",
" replacement = r'\\1.\\2.\\3'\n",
" else:\n",
" print(\"Uwaga: Niewspierany format.\")\n",
" return\n",
"\n",
" \n",
"\n",
" final_text = target_text\n",
"\n",
" for pattern in patterns:\n",
" final_text = re.sub(pattern, replacement, final_text)\n",
"\n",
" return final_text\n",
"\n",
"# Example usage:\n",
"source_text = \"Today is 21 04 2021 and tomorrow will be 21/04/2024.\"\n",
"target_text = \"Aujourd'hui, c'est le 21 04 2021 et demain ce sera le 21/04/2024.\"\n",
"translated_text = translate_dates(source_text, target_text, \"digit-dot\")\n",
"print(\"Translated text (Europe format):\", translated_text)\n",
"\n",
"\n",
"\n"
] ]
}, },
{ {
@ -176,13 +429,63 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 5, "execution_count": 153,
"id": "romance-judge", "id": "romance-judge",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [
{
"data": {
"text/plain": [
"'<greeting>Witaj </greeting><name>Ania! </name></name></name></name></name>John </name>'"
]
},
"execution_count": 153,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [ "source": [
"import re\n",
"\n",
"def split_text(text):\n",
" parts = re.split(r'(<\\w+>|<\\/\\w+>| )', text)\n",
" \n",
" split_list = []\n",
" \n",
" for part in parts:\n",
" if part.strip(): \n",
" split_list.append((part, part.startswith(\"<\") and part.endswith(\">\")))\n",
" \n",
" return split_list\n",
"\n",
"def transfer_tags(source_segment, target_segment):\n", "def transfer_tags(source_segment, target_segment):\n",
" return ''" " source_interation_index = 0\n",
"\n",
" split_source_segment = split_text(source_segment)\n",
"\n",
" target_text_with_tags = ''\n",
"\n",
" for text in target_segment.split():\n",
" source_element = split_source_segment[source_interation_index]\n",
" \n",
" while(source_element[1]):\n",
" target_text_with_tags += source_element[0]\n",
" source_interation_index = source_interation_index + 1\n",
" source_element = split_source_segment[source_interation_index]\n",
"\n",
" target_text_with_tags += text + ' '\n",
" source_interation_index = source_interation_index + 1\n",
"\n",
" for index in range(source_interation_index, len(split_source_segment)):\n",
" target_text_with_tags += split_source_segment[index][0]\n",
"\n",
" return target_text_with_tags\n",
"\n",
"\n",
"source_segment = \"<greeting>Hello</greeting> <name>Ania!</name></name></name></name></name>John</name>\"\n",
"target_segment = \"Witaj Ania! John\"\n",
"\n",
"transfer_tags(source_segment,target_segment)\n"
] ]
} }
], ],
@ -205,7 +508,7 @@
"name": "python", "name": "python",
"nbconvert_exporter": "python", "nbconvert_exporter": "python",
"pygments_lexer": "ipython3", "pygments_lexer": "ipython3",
"version": "3.8.10" "version": "3.11.7"
}, },
"subtitle": "6,7. Preprocessing i postprocessing", "subtitle": "6,7. Preprocessing i postprocessing",
"title": "Komputerowe wspomaganie tłumaczenia", "title": "Komputerowe wspomaganie tłumaczenia",

View File

@ -57,13 +57,59 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 1, "execution_count": 2,
"id": "moving-clothing", "id": "d4f068df",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/opt/anaconda3/lib/python3.11/site-packages/nltk/translate/bleu_score.py:552: UserWarning: \n",
"The hypothesis contains 0 counts of 3-gram overlaps.\n",
"Therefore the BLEU score evaluates to 0, independently of\n",
"how many N-gram overlaps of lower order it contains.\n",
"Consider using lower n-gram order or use SmoothingFunction()\n",
" warnings.warn(_msg)\n",
"/opt/anaconda3/lib/python3.11/site-packages/nltk/translate/bleu_score.py:552: UserWarning: \n",
"The hypothesis contains 0 counts of 4-gram overlaps.\n",
"Therefore the BLEU score evaluates to 0, independently of\n",
"how many N-gram overlaps of lower order it contains.\n",
"Consider using lower n-gram order or use SmoothingFunction()\n",
" warnings.warn(_msg)\n"
]
},
{
"data": {
"text/plain": [
"3.984587822441638e-156"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [ "source": [
"import zipfile\n",
"import nltk.translate.bleu_score as bleu\n",
"import string\n",
"\n",
"def remove_punctuation(text):\n",
" text_without_punctuations = text.translate(str.maketrans('', '', string.punctuation))\n",
" sentences = text_without_punctuations.split('\\n')\n",
" return [[word.lower() for word in sentence.split()] for sentence in sentences if sentence != '']\n",
"\n",
"def calculate_bleu():\n", "def calculate_bleu():\n",
" return 0" " zip = zipfile.ZipFile('data/corpus_corrected.zip')\n",
" files = {name: remove_punctuation(zip.read(name).decode('utf-8'))\n",
" for name in zip.namelist()}\n",
" \n",
" corpus_de_human, corpus_de_nmt = files['corpus_de_human.txt'], files['corpus_de_nmt.txt']\n",
" \n",
" return bleu.corpus_bleu(corpus_de_human, corpus_de_nmt)\n",
"\n",
"calculate_bleu()"
] ]
}, },
{ {
@ -76,13 +122,50 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 2, "execution_count": 3,
"id": "lasting-rolling", "id": "lasting-rolling",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0 to 100 - 4.97555004481153e-232\n",
"500 to 600 - 5.956707985683837e-232\n",
"800 to 900 - 4.774461089627919e-232\n",
"200 to 300 - 5.56331772444502e-232\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/opt/anaconda3/lib/python3.11/site-packages/nltk/translate/bleu_score.py:552: UserWarning: \n",
"The hypothesis contains 0 counts of 2-gram overlaps.\n",
"Therefore the BLEU score evaluates to 0, independently of\n",
"how many N-gram overlaps of lower order it contains.\n",
"Consider using lower n-gram order or use SmoothingFunction()\n",
" warnings.warn(_msg)\n"
]
}
],
"source": [ "source": [
"def analyze_bleu():\n", "\n",
" return []" "def analyze_bleu(start_sentence_index, finish_sentence_index):\n",
" zip = zipfile.ZipFile('data/corpus_corrected.zip')\n",
" files = {name: remove_punctuation(zip.read(name).decode('utf-8'))\n",
" for name in zip.namelist()}\n",
" \n",
" corpus_de_human, corpus_de_nmt = files['corpus_de_human.txt'][start_sentence_index:finish_sentence_index], files['corpus_de_nmt.txt'][start_sentence_index:finish_sentence_index]\n",
" \n",
" return bleu.corpus_bleu(corpus_de_human, corpus_de_nmt)\n",
"\n",
"\n",
"print(\"0 to 100 - \"+str(analyze_bleu(0, 100)))\n",
"print(\"500 to 600 - \"+str(analyze_bleu(500, 600)))\n",
"print(\"800 to 900 - \"+str(analyze_bleu(800, 900)))\n",
"print(\"200 to 300 - \"+str(analyze_bleu(200, 300)))\n",
"\n"
] ]
}, },
{ {
@ -102,6 +185,12 @@
" * N - liczba słów w tłumaczeniu referencyjnym (N=S+D+C)" " * N - liczba słów w tłumaczeniu referencyjnym (N=S+D+C)"
] ]
}, },
{
"cell_type": "markdown",
"id": "fb4f02ae",
"metadata": {},
"source": []
},
{ {
"cell_type": "markdown", "cell_type": "markdown",
"id": "conscious-cookbook", "id": "conscious-cookbook",
@ -120,13 +209,39 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 3, "execution_count": 25,
"id": "occupied-swing", "id": "occupied-swing",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [
{
"data": {
"text/plain": [
"0.17355216569308377"
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [ "source": [
"from jiwer import wer\n",
"import zipfile\n",
"\n",
"def calculate_wer():\n", "def calculate_wer():\n",
" return 0" " ourZip = zipfile.ZipFile('data/corpus_corrected.zip')\n",
" files = {name: remove_punctuation(ourZip.read(name).decode('utf-8'))\n",
" for name in ourZip.namelist()}\n",
" \n",
" corpus_de_human, corpus_de_nmt = files['corpus_de_human.txt'], files['corpus_de_nmt.txt']\n",
"\n",
" sum_wer = 0\n",
" for human_sent, nmt_sent in zip(corpus_de_human, corpus_de_nmt):\n",
" sum_wer+= wer(\" \".join(human_sent), \" \".join(nmt_sent))\n",
"\n",
" return sum_wer/(len(corpus_de_human))\n",
"\n",
"calculate_wer()"
] ]
}, },
{ {
@ -147,13 +262,38 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 4, "execution_count": 35,
"id": "immediate-element", "id": "immediate-element",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [
{
"data": {
"text/plain": [
"2.653"
]
},
"execution_count": 35,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [ "source": [
"import Levenshtein\n",
"\n",
"def calculate_levenshtein():\n", "def calculate_levenshtein():\n",
" return 0" " ourZip = zipfile.ZipFile('data/corpus_corrected.zip')\n",
" files = {name: remove_punctuation(ourZip.read(name).decode('utf-8'))\n",
" for name in ourZip.namelist()}\n",
" \n",
" corpus_de_human, corpus_de_nmt = files['corpus_de_human.txt'], files['corpus_de_nmt.txt']\n",
"\n",
" sum_disatnce = 0\n",
" for human_element, nmt_element in zip(corpus_de_human, corpus_de_nmt):\n",
" sum_disatnce+= Levenshtein.distance(human_element, nmt_element)\n",
"\n",
" return sum_disatnce/(len(corpus_de_human))\n",
"\n",
"calculate_levenshtein()\n"
] ]
}, },
{ {
@ -177,28 +317,65 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 5, "execution_count": 4,
"id": "descending-easter", "id": "descending-easter",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"from PyDictionary import PyDictionary\n",
"import zipfile\n",
"import re\n",
"\n",
"def transalate(word_list):\n",
" transalted_words = {}\n",
" for word in word_list:\n",
" try:\n",
" translation = PyDictionary().translate(word, 'German')\n",
" if translation:\n",
" transalted_words[word] = translation\n",
" except Exception as e:\n",
" print('Exception')\n",
"\n",
" return transalted_words\n",
"\n",
"def analyze_translations():\n", "def analyze_translations():\n",
" return []" " ourZip = zipfile.ZipFile('data/corpus_corrected.zip')\n",
" files = {name: remove_punctuation(ourZip.read(name).decode('utf-8'))\n",
" for name in ourZip.namelist()}\n",
" \n",
" corpus_de_human, corpus_de_nmt, corpus_en = files['corpus_de_human.txt'], files['corpus_de_nmt.txt'], files['corpus_en.txt']\n",
"\n",
" nmt_sum = 0\n",
" human_sum = 0\n",
"\n",
" for human_element, nmt_element, element in zip(corpus_de_human, corpus_de_nmt, corpus_en):\n",
" transalted_words = transalate(element)\n",
"\n",
" nmt_sum += sum(1 for word in nmt_element if transalted_words.get(word.lower()))\n",
"\n",
" human_sum += sum(1 for word in human_element if transalted_words.get(word.lower()))\n",
"\n",
"\n",
" print(nmt_sum)\n",
" print(human_sum)\n",
"\n",
"#I think the PyDictionary mode doesn't work, the info from https://github.com/geekpradd/PyDictionary\n",
"#NOTE: Mainintaing this module requires constantly changing the scrapping endpoints which unfortunately I no longer have the bandwidth to do so, so this module is DEPRECATED. Kindly use other substitutes available on PyPI. Thanks!\n",
"#PyDictionary is a Dictionary Module for Python 2/3 to get meanings, translations, synonyms and Antonyms of words. It uses WordNet for getting meanings, Google for translations, and synonym.com for getting synonyms and antonyms.\n",
"#This module uses Python Requests, BeautifulSoup4 and goslate as dependencies\n",
" \n"
] ]
} }
], ],
"metadata": { "metadata": {
"author": "Rafał Jaworski", "author": "Rafał Jaworski",
"email": "rjawor@amu.edu.pl", "email": "rjawor@amu.edu.pl",
"lang": "pl",
"subtitle": "8. Wykorzystanie tłumaczenia automatycznego we wspomaganiu tłumaczenia",
"title": "Komputerowe wspomaganie tłumaczenia",
"year": "2021",
"kernelspec": { "kernelspec": {
"display_name": "Python 3", "display_name": "Python 3",
"language": "python", "language": "python",
"name": "python3" "name": "python3"
}, },
"lang": "pl",
"language_info": { "language_info": {
"codemirror_mode": { "codemirror_mode": {
"name": "ipython", "name": "ipython",
@ -209,8 +386,11 @@
"name": "python", "name": "python",
"nbconvert_exporter": "python", "nbconvert_exporter": "python",
"pygments_lexer": "ipython3", "pygments_lexer": "ipython3",
"version": "3.8.10" "version": "3.11.7"
} },
"subtitle": "8. Wykorzystanie tłumaczenia automatycznego we wspomaganiu tłumaczenia",
"title": "Komputerowe wspomaganie tłumaczenia",
"year": "2021"
}, },
"nbformat": 4, "nbformat": 4,
"nbformat_minor": 5 "nbformat_minor": 5

File diff suppressed because one or more lines are too long

View File

@ -52,13 +52,39 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 1, "execution_count": 2,
"id": "german-dispute", "id": "german-dispute",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"To jest próba.\n",
"Test musi byc wykonany.\n",
"Wszystko jest okey.\n",
"Źródło wody.\n",
"Ósemka\n"
]
}
],
"source": [ "source": [
"def sentence_split(text):\n", "import regex as re\n",
" return []" "\n",
"def segment_text(text):\n",
" regex = re.compile(r'\\p{Lu}')\n",
" positions = [match.start() for match in regex.finditer(text)]\n",
" positions.insert(0, 0)\n",
" positions.append(len(text))\n",
" segments = [text[positions[i]:positions[i+1]].strip() for i in range(len(positions)-1)]\n",
"\n",
" return segments\n",
"\n",
"text = \"To jest próba. Test musi byc wykonany. Wszystko jest okey. Źródło wody. Ósemka\"\n",
"segments = segment_text(text)\n",
"for segment in segments:\n",
" print(segment)\n"
] ]
}, },
{ {
@ -71,13 +97,739 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 3, "execution_count": 6,
"id": "guilty-morocco", "id": "guilty-morocco",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [
{
"data": {
"text/plain": [
"['epoznan.pl - pierwszy portal',\n",
" 'Poznania',\n",
" 'Wyszukaj',\n",
" 'Zaloguj',\n",
" 'Login',\n",
" 'Hasło',\n",
" 'Zapomniałeś hasła?',\n",
" 'Zaloguj',\n",
" 'Nie masz konta?',\n",
" 'Zarejestruj się',\n",
" 'Wiadomości kronika policyjna inwestycje nieruchomości biznes polityka komunikacja wywiady kultura historia life style region pogoda',\n",
" 'Pogoda',\n",
" 'Sport',\n",
" 'Kultura i rozrywka',\n",
" 'Korki',\n",
" 'Forum',\n",
" 'Kontakt 1 godzina temu',\n",
" 'Zbierają niezbędne rzeczy dla pogorzelców z ul.',\n",
" 'Promienistej.',\n",
" 'Ogień strawił dorobek życia 31 1 godzina temu',\n",
" 'Korek na',\n",
" 'A2.',\n",
" 'Blokada jednego pasa ruchu z powodu stłuczki 28 1 godzina temu',\n",
" 'Spore utrudnienia za węzłem autostradowym.',\n",
" 'Kierowca samochodu wjechał w bariery.',\n",
" 'Ranna jedna osoba 11 2 godziny temu \"',\n",
" 'Cisza dzieciaki, cichutko!\".',\n",
" 'Idzie',\n",
" 'Magda',\n",
" 'Gessler.',\n",
" 'Kolejne rewolucje w',\n",
" 'Wielkopolsce 41 2 godziny temu',\n",
" 'Poszukujesz rehabilitacji - przyjęcie w 72 godziny',\n",
" 'Reklama 4 3 godziny temu',\n",
" 'Wielki turniej piłki siatkowej nad',\n",
" 'Rusałką.',\n",
" 'Kibice zobaczą 90 najlepszych polskich zespołów 63 3 godziny temu',\n",
" 'Z',\n",
" 'Tesli zniknęły kołpaki,',\n",
" 'Policja szuka mężczyzny.',\n",
" 'Sprawcę uwieczniły kamery zamontowane w aucie 95 1 3 godziny temu',\n",
" 'Sprzedają lemoniadę na poznańskim osiedlu, by zarobić na piłkę \"jak z',\n",
" 'E',\n",
" 'U',\n",
" 'R',\n",
" 'O 2024\" 81 3 godziny temu',\n",
" 'Mieszkańcy wielkopolskiej miejscowości od miesięcy bez lekarza rodzinnego.',\n",
" 'Dotychczasowy się rozchorował 22 3 godziny temu',\n",
" 'Zabarykadował się w domu, groził wysadzeniem budynku.',\n",
" 'Interweniowały służby 21 4 godziny temu',\n",
" 'Od poniedziałku zmiana organizacji ruchu na',\n",
" 'A2 pod',\n",
" 'Poznaniem! 14 2 4 godziny temu',\n",
" 'Policja próbuje ustalić tożsamość zmarłego mężczyzny.',\n",
" 'Zrekonstruowano jego wizerunek 20 2 4 godziny temu',\n",
" 'Budują nowe drogi rowerowe w centrum, będą utrudnienia dla kierowców 35 2 5 godzin temu',\n",
" 'Przetrwał pandemię i remont, od 30 lat prowadzi kram z pamiątkami na',\n",
" 'Starym',\n",
" 'Rynku 37 3 5 godzin temu',\n",
" 'Rodzina',\n",
" 'Med przyjazne miejsce dla',\n",
" 'Ciebie i rodziny - nowa placówka lekarza rodzinnego w',\n",
" 'Poznaniu',\n",
" 'Reklama 8 5 godzin temu',\n",
" 'Najpierw w kościele stworzył salkę zabaw dla dzieci na czas mszy, teraz postawił ofiaromat.',\n",
" 'Nie wszystkim się to podoba 99 2 6 godzin temu',\n",
" 'Uniewinniono policjantów, którzy w',\n",
" 'Antoninku postrzelili 39-latka chorego na schizofrenię 51 7 godzin temu',\n",
" 'Sprawdzają czy kierowcy pojazdów \"na aplikację\" mają stosowne dokumenty.',\n",
" 'Posypały się mandaty 71 4 7 godzin temu',\n",
" 'Koziołek od',\n",
" 'Kawu także w',\n",
" 'Berlinie. \"',\n",
" 'Walczymy dalej\".',\n",
" 'W piątek mecz o wszystko',\n",
" 'Polaków 72 8 godzin temu',\n",
" 'Praga i',\n",
" 'Kopenhaga z',\n",
" 'Ryanair - kierunki na letnią wycieczkę #',\n",
" 'Prostoz',\n",
" 'Poznania',\n",
" 'Reklama 5 8 godzin temu',\n",
" 'Były zastępca prezydenta',\n",
" 'Poznania ma nową pracę.',\n",
" 'Dość zaskakującą 106 9 godzin temu',\n",
" 'Onkolog z poznańskiego szpitala zatrzymana przez policję.',\n",
" 'Za opłatą masowo wystawiała recepty na opioidy, także na nieżyjących pacjentów 64 9 godzin temu',\n",
" 'Przebudowali perony, będzie parking dla pasażerów 33 10 godzin temu',\n",
" 'Miało jej nie być, ale jednak się odbędzie.',\n",
" 'Parada',\n",
" 'Sobótkowa w piątek na',\n",
" 'Warcie! 42 10 godzin temu 5 pomysłów na wyjątkowy prezent dla aktywnego taty - nasze propozycje',\n",
" 'Reklama 10 godzin temu',\n",
" 'Przed nami gorący piątek, który zakończy się burzami.',\n",
" 'Może być bardzo niebezpiecznie! 26 10 godzin temu',\n",
" 'Mieszkania w komfortowych lokalizacjach od',\n",
" 'Grupy',\n",
" 'Inwest',\n",
" 'Reklama 5 18 godzin temu',\n",
" 'Klub we',\n",
" 'Wtórku wydał kolejne oświadczenie w związku ze śmiercią 25-latka.',\n",
" 'Mają poprawić bezpieczeństwo 22 19 godzin temu',\n",
" 'Zauważyła starszą panią szukającą jedzenia w śmietniku, chciała jej pomóc 61 19 godzin temu',\n",
" 'W jednej z poznańskich \"',\n",
" 'Biedronek\" zmarnowała się żywność? \"',\n",
" 'Padły szafy chłodnicze\" 39',\n",
" 'Więcej wiadomości',\n",
" 'Na poznańskim osiedlu od rana skargi na intensywny zapach wanilii i karmelu. \"',\n",
" 'Nie można otworzyć okna\"',\n",
" 'Były proboszcz podpoznańskiej miejscowości skazany za molestowanie 13-latka.',\n",
" 'Ma trafić do więzienia',\n",
" 'Wydał 10 złotych, wygrał pół miliona!',\n",
" 'Kalendarium',\n",
" 'W najbliższym czasie:',\n",
" 'Zobacz więcej wydarzeń',\n",
" 'Imprezy',\n",
" 'R',\n",
" 'O',\n",
" 'N',\n",
" 'N',\n",
" 'I',\n",
" 'E',\n",
" 'F',\n",
" 'E',\n",
" 'R',\n",
" 'R',\n",
" 'A',\n",
" 'R',\n",
" 'I |',\n",
" 'X-',\n",
" 'D',\n",
" 'E',\n",
" 'M',\n",
" 'O',\n",
" 'N',\n",
" 'P',\n",
" 'O',\n",
" 'Z',\n",
" 'N',\n",
" 'A',\n",
" 'Ń',\n",
" 'X-',\n",
" 'Demon',\n",
" 'Poznań dzisiaj',\n",
" 'Kup bilet',\n",
" 'Spektakl',\n",
" 'E',\n",
" 'T',\n",
" 'H',\n",
" 'N',\n",
" 'O',\n",
" 'P',\n",
" 'O',\n",
" 'R',\n",
" 'T 2024/',\n",
" 'Małe',\n",
" 'Ethno:',\n",
" 'D',\n",
" 'A',\n",
" 'W',\n",
" 'N',\n",
" 'E',\n",
" 'Z',\n",
" 'A',\n",
" 'B',\n",
" 'A',\n",
" 'W',\n",
" 'K',\n",
" 'I',\n",
" 'L',\n",
" 'U',\n",
" 'D',\n",
" 'O',\n",
" 'W',\n",
" 'E',\n",
" 'W',\n",
" 'P',\n",
" 'O',\n",
" 'L',\n",
" 'S',\n",
" 'C',\n",
" 'E',\n",
" 'Dziedziniec',\n",
" 'Zamkowy jutro',\n",
" 'Kup bilet',\n",
" 'Spektakl',\n",
" 'Trzy akordy gniewu',\n",
" 'Republika',\n",
" 'Sztuki',\n",
" 'Tłusta',\n",
" 'Langusta jutro',\n",
" 'Kup bilet',\n",
" 'Hard',\n",
" 'Rock /',\n",
" 'Metal',\n",
" 'W',\n",
" 'I',\n",
" 'S',\n",
" 'D',\n",
" 'O',\n",
" 'M',\n",
" 'I',\n",
" 'N',\n",
" 'C',\n",
" 'H',\n",
" 'A',\n",
" 'I',\n",
" 'N',\n",
" 'S |',\n",
" 'P',\n",
" 'O',\n",
" 'Z',\n",
" 'N',\n",
" 'A',\n",
" 'Ń',\n",
" 'Pod',\n",
" 'Minogą za 33 dni',\n",
" 'Kup bilet',\n",
" 'Hip',\n",
" 'Hop /',\n",
" 'Rap',\n",
" 'Hip',\n",
" 'Hop',\n",
" 'Festival',\n",
" 'Poznań 2024',\n",
" 'Jezioro',\n",
" 'Maltańskie za 64 dni',\n",
" 'Kup bilet',\n",
" 'Spektakl',\n",
" 'Piękna i',\n",
" 'Bestia',\n",
" 'Teatr',\n",
" 'Cortiqué',\n",
" 'Anny',\n",
" 'Niedźwiedź za 109 dni',\n",
" 'Kup bilet',\n",
" 'Koncert',\n",
" 'Marcin',\n",
" 'Kydryński prezentuje:',\n",
" 'S',\n",
" 'I',\n",
" 'E',\n",
" 'S',\n",
" 'T',\n",
" 'A w drodze/',\n",
" 'N',\n",
" 'A',\n",
" 'N',\n",
" 'C',\n",
" 'Y',\n",
" 'V',\n",
" 'I',\n",
" 'E',\n",
" 'I',\n",
" 'R',\n",
" 'A wokalistka z',\n",
" 'Wysp',\n",
" 'Zielonego',\n",
" 'Przylądka',\n",
" 'Aula',\n",
" 'Nova',\n",
" 'Akademii',\n",
" 'Muzycznej za 114 dni',\n",
" 'Kup bilet',\n",
" 'Spektakl',\n",
" 'D',\n",
" 'E',\n",
" 'A',\n",
" 'R',\n",
" 'E',\n",
" 'V',\n",
" 'A',\n",
" 'N',\n",
" 'H',\n",
" 'A',\n",
" 'N',\n",
" 'S',\n",
" 'E',\n",
" 'N',\n",
" 'Teatr',\n",
" 'Muzyczny w',\n",
" 'Poznaniu za 162 dni',\n",
" 'Kup bilet',\n",
" 'W kinie',\n",
" 'Pełny repertuar',\n",
" 'Czytelnicy poszukują',\n",
" 'Zgłoś zaginięcie 18.06.2024',\n",
" 'Zgubiono portfelik \"',\n",
" 'W poniedziałek (17.06) na parkingu w starym korycie',\n",
" 'Warty znaleziono portfelik.',\n",
" 'Właściciela zguby proszę o kontakt\"',\n",
" 'K',\n",
" 'O',\n",
" 'N',\n",
" 'T',\n",
" 'A',\n",
" 'K',\n",
" 'T 601775589',\n",
" 'Dziś w',\n",
" 'Poznaniu 27℃ 14℃',\n",
" 'Poziom opadów: 15.5 mm',\n",
" 'Wiatr do: 19 km',\n",
" 'Stan powietrza',\n",
" 'P',\n",
" 'M2.5 11.80 μg/m3',\n",
" 'Bardzo dobry',\n",
" 'Zobacz pogodę na jutro',\n",
" 'Poznań',\n",
" 'Poznań',\n",
" 'Poznań',\n",
" 'Poznań',\n",
" 'Waszym zdaniem',\n",
" 'Były zastępca prezydenta',\n",
" 'Poznania ma nową pracę.',\n",
" 'Dość zaskakującą ~hahaha 8 godzin temu',\n",
" 'Tusk nie był komisarzem, pajacu.',\n",
" 'Komisarzem był',\n",
" 'Pi',\n",
" 'Sowiec',\n",
" 'Janusz',\n",
" 'Wojciechowski, który w imieniu',\n",
" 'Pi',\n",
" 'S przygotował',\n",
" 'Zielony',\n",
" 'Ład dla polskich rolników :-) 204 12',\n",
" 'Na poznańskim osiedlu od rana skargi na intensywny zapach wanilii i karmelu. \"',\n",
" 'Nie można otworzyć okna\" ~śmieszek 6 godzin temu',\n",
" 'Lepsza fabryka i zapach karmelu niż biuro',\n",
" 'Pi',\n",
" 'Su i zapach kupy. ;) 211 23',\n",
" 'Miało jej nie być, ale jednak się odbędzie.',\n",
" 'Parada',\n",
" 'Sobótkowa w piątek na',\n",
" 'Warcie! ~',\n",
" 'Autor 8 godzin temu',\n",
" 'Najpierw pi',\n",
" 'S',\n",
" 'Smani skomleli w komentarzach że parady nie będzie, a teraz skomlą że będzie x',\n",
" 'D 156 1',\n",
" 'Przebudowali perony, będzie parking dla pasażerów ~',\n",
" 'Autor 6 godzin temu',\n",
" 'Masz rację od kiedy odszedł',\n",
" 'P',\n",
" 'I',\n",
" 'S, żyje nam się lepiej i więcej pieniędzy mamy bo nie kradną 133 4',\n",
" 'Koziołek od',\n",
" 'Kawu także w',\n",
" 'Berlinie. \"',\n",
" 'Walczymy dalej\".',\n",
" 'W piątek mecz o wszystko',\n",
" 'Polaków ~##~ 7 godzin temu',\n",
" 'Na efekty złodziejstwa',\n",
" 'P',\n",
" 'O jeszcze',\n",
" 'P',\n",
" 'Oczekamy.',\n",
" 'Aktualny deficyt w budżecie to wina złodziejskich rządów',\n",
" 'Prawa i',\n",
" 'Sprawiedliwości. 122 2',\n",
" 'Zabarykadował się w domu, groził wysadzeniem budynku.',\n",
" 'Interweniowały służby',\n",
" 'Czuqakadra 3 godziny temu',\n",
" 'Ale was pisowcy bez mydła z tymi imigrantami.',\n",
" 'Naściągali \"ekipy filmowe\" które już \"zakończyły zdjęcia\" na zachodzie',\n",
" 'Europy i ich nam zwracają :',\n",
" 'D 88 3',\n",
" 'Zauważyła starszą panią szukającą jedzenia w śmietniku, chciała jej pomóc ~sas 19 godzin temu \"napatoczyła się na przykry widok\" - normalnie poezja. 84 3',\n",
" 'Pojedynek mistrzów',\n",
" 'Wyślij zdjęcie vs',\n",
" 'Głosuj 88%',\n",
" 'Głosuj 12%',\n",
" 'Termy',\n",
" 'Dziś zagłosowały 982 osoby!',\n",
" 'Spotted środa, 19.06.2024 r., godz. 18.18',\n",
" 'K:',\n",
" 'Urocza blondynko o pięknych brązowych oczach....',\n",
" 'Widywaliśmy się niemal codziennie w autobusie linii 322 po godz 7..',\n",
" 'Uśmiechaliśmy się do siebie kiedy wsiadałem i kiedy',\n",
" 'Ty wysiadałaś na',\n",
" 'Serbskiej albo na',\n",
" 'Alejach....',\n",
" 'Ostatnio kiedy się spotkaliśmy miałaś różowe słuchawki...',\n",
" 'Nie jechałaś już ze mną od miesiąca , nie zdążyłem choćby poznać',\n",
" 'Twojego imienia...........',\n",
" 'Kontakt środa, 19.06.2024 r., godz. 10.44',\n",
" 'Marcin:',\n",
" 'Pozdrawiam sympatyczną kobietę, której wymieniłem żarówkę w jej aucie przed',\n",
" 'M1.',\n",
" 'Chciałbym',\n",
" 'Cię zaprosić na kawę.',\n",
" 'Kontakt wtorek, 18.06.2024 r., godz. 19.23',\n",
" 'Darek:',\n",
" 'Pozdrawiam przemiłą panią z rejestracji(',\n",
" 'Termedica)za pomoc!',\n",
" 'Różni nas tylko cztery miesiące!',\n",
" 'Może kawa?:)',\n",
" 'Kontakt wtorek, 18.06.2024 r., godz. 19.21',\n",
" 'Max:',\n",
" 'Serdecznie pozdrawiam sliczna brunetka,kierowcę',\n",
" 'M',\n",
" 'P',\n",
" 'K ,która dziś na dworcu',\n",
" 'Rataje ślicznie się uśmiechnęła',\n",
" 'Kontakt wtorek, 18.06.2024 r., godz. 17.30',\n",
" 'Maciej:',\n",
" 'Widzieliśmy się 16 czerwca na koncercie ... w okolicy wejścia',\n",
" 'R3 ( trybuna 1 ) , około 21 mijaliśmy się przy barze , byłaś z koleżanką ( blondynka ).',\n",
" 'Zapamiętałem',\n",
" 'Twój uśmiech , ubrana byłaś w białą bluzkę z napisem , jeśli to czytasz ... odezwij się , może na następny koncert pójdziemy razem :)',\n",
" 'Kontakt wtorek, 18.06.2024 r., godz. 00.54',\n",
" 'Maja:',\n",
" 'Hej, szukam wysokiego bruneta.',\n",
" 'Stałeś dzisiaj na przystanku pętli 611 na',\n",
" 'Dębcu i machałeś do mnie.',\n",
" 'Kiedy się do',\n",
" 'Ciebie zbliżyłam podjechał',\n",
" 'Twój autobus, powiedziałeś tylko: \"',\n",
" 'Nie pamiętasz mnie?',\n",
" 'Odezwij się do mnie. \"',\n",
" 'Nie mam pojęcia kim jesteś, jeśli to przeczytasz to',\n",
" 'Ty się odezwij.',\n",
" 'Powiedz do jakiego autobusu wchodziłeś i z kim byłam dla pewności',\n",
" 'Kontakt poniedziałek, 17.06.2024 r., godz. 12.35',\n",
" 'Tylko ja:',\n",
" 'Pozdrawiam sympatyczną dziewczynę z porannego autobusu 193 )) ładne pazurki ))',\n",
" 'Kontakt niedziela, 16.06.2024 r., godz. 22.59',\n",
" 'Grzegorz:',\n",
" 'Kasia.',\n",
" 'Szukam dziewczyny o imieniu',\n",
" 'Kasia którą poznałem na koncercie',\n",
" 'Dawida',\n",
" 'Podsiadło 15 czerwca.',\n",
" 'Siedzieliśmy obok siebie w sektorze',\n",
" 'A1 na rogu stadionu.',\n",
" 'Jestem pod wrażeniem',\n",
" 'Twoich oczu i chciałbym zaprosić',\n",
" 'Cię na kawę.',\n",
" 'Kontakt niedziela, 16.06.2024 r., godz. 20.33',\n",
" 'Ania:',\n",
" 'Bardzo dziękuję parze przemiłych ludzi którzy pomogli mi z opona koło',\n",
" 'Lidla .',\n",
" 'Kontakt sobota, 15.06.2024 r., godz. 12.58',\n",
" 'Marcin:',\n",
" 'Piątek, około godziny 19.00,',\n",
" 'Rosmann',\n",
" 'Druskiennicka.',\n",
" 'Mówiłaś synowi żeby z nikim nie rozmawiał jak wejdziesz do',\n",
" 'Rossmana:) nawiazalismy kontakt jednak się spieszyłem.',\n",
" 'Odezwij się.',\n",
" 'Kontakt piątek, 14.06.2024 r., godz. 22.36',\n",
" 'Beza:',\n",
" 'Widziałam',\n",
" 'Ciebie parę razy w',\n",
" 'Bezie.',\n",
" 'Ty ubrany cały na czarno, ja z koleżanką na kawie.',\n",
" 'Może zjemy razem tort bezowy?',\n",
" 'Kontakt piątek, 14.06.2024 r., godz. 14.47',\n",
" 'Brunetka.:',\n",
" 'Wsiadałaś dziś też na',\n",
" 'Al.',\n",
" 'Marcinkowskiego tramwaj',\n",
" 'Nr9.',\n",
" 'Biało czerwona koszulka czerwone buty i te',\n",
" 'Twoje tatuaże kolorowe na prawej ręce.',\n",
" 'Twoje spojrzenie aż mnie przeszywa.',\n",
" 'Odezwij się do mnie na @.',\n",
" 'Może pogadamy o tatuażach też mam ale nie widać albo może spacer.',\n",
" 'Brunetka.',\n",
" 'Kontakt piątek, 14.06.2024 r., godz. 09.21',\n",
" 'Maleństwo:',\n",
" 'Gdy w środę zobaczyłam',\n",
" 'Cię pierwszy, moje serducho zaczęło szybciej bić - jakby strzała amora trafiła ...',\n",
" 'Patrzyłam jak wychodziłeś, czekając czy się odwrócisz i odwróciłeś się, a nasz wzrok ponownie nie mógł się od siebie oderwać.',\n",
" 'Wiem, że pracujesz w',\n",
" 'Lednogorze.',\n",
" 'Mam nadzieję, że się spotkamy, chociażby na korytarzu, bo bywasz u mnie w pracy.',\n",
" 'Kontakt czwartek, 13.06.2024 r., godz. 15.15',\n",
" 'M:',\n",
" 'Jechałaś dzisiaj tramwajem na królowej',\n",
" 'Jadwigi i pięknie uśmiechała się przez okno.',\n",
" 'Nasz wzrok spotkał się kilka razy.',\n",
" 'Może kawa?',\n",
" 'Ten z',\n",
" 'B',\n",
" 'M',\n",
" 'W.',\n",
" 'Kontakt czwartek, 13.06.2024 r., godz. 06.40',\n",
" 'Brunetka:',\n",
" 'Jeździsz prawie codziennie tramwajem',\n",
" 'Nr9 wsiadasz na',\n",
" 'Al.',\n",
" 'Marcinkowskiego.',\n",
" 'Masz bardzo fajną fryzurę i tatuaże na prawej ręce.',\n",
" 'Pare razy nasze spojrzenia się spotkały masz coś wyjątkowego w oczach.',\n",
" 'Może masz ochotę na spacer?',\n",
" 'Kontakt',\n",
" 'Dodaj',\n",
" 'Wyznaję',\n",
" 'Wyślij do',\n",
" 'Szczera czyli szczera nie jesteś, skoro trzymasz język za zębami ;)',\n",
" 'Łżesz w żywe oczy a swoje myślisz?',\n",
" 'To się nazywa dwulicowość ;)',\n",
" 'Szczera',\n",
" 'Każdy ma jakiegoś mola co go gryzie, ale ja wolę trzymać język za zębami, a',\n",
" 'Ty nie gryziesz się w język. :)',\n",
" 'Miłego dnia, niebieskooki',\n",
" 'Promyczku słońca',\n",
" 'J',\n",
" 'Dziękuję za te piękne lata mojemu mężowi :)',\n",
" 'Życzę nam kolejnych takich :)',\n",
" 'Malinka',\n",
" 'Wciąż cię kocham i tęsknię',\n",
" 'Z ...chcę się z tobą zestarzeć.',\n",
" 'Uwielbiam ... karel',\n",
" 'Madlaine, kiedy rowerki?',\n",
" 'Myszka',\n",
" 'Nie żałuję żadnej z naszych chwil.',\n",
" 'Warto było kochać każdą z nich...',\n",
" 'Iwo',\n",
" 'Pozdrowienia dla ślicznych pielęgniarek z oddziału wewnętrznego .',\n",
" 'Do dlaczego',\n",
" 'Niestety',\n",
" 'Ci po zdalnym nie zrozumieją.',\n",
" 'Polska mowa być trudna mowa.',\n",
" 'Aluśka',\n",
" 'Kocham',\n",
" 'Cię',\n",
" 'Robciu..miłego dnia',\n",
" 'Newsletter',\n",
" 'Oszczędź sobie czasu na poszukiwania!',\n",
" 'Wybierz jakie informacje',\n",
" 'Cię interesują, by być na bieżąco każdego dnia!',\n",
" 'Poranna gazeta',\n",
" 'Codziennie rano',\n",
" 'Informator kulturalny',\n",
" 'W każdy czwartek',\n",
" 'Dodaj',\n",
" 'Potwierdzenie subskrypcji zostało wysłane na',\n",
" 'Twój adres e-mail.',\n",
" 'Potwierdź i jako pierwszy dowiaduj się, co słychać w',\n",
" 'Poznaniu!',\n",
" 'Zdjęcie dnia',\n",
" 'Wyślij zdjęcie',\n",
" \"Efekt 'halo' widziany z poznańskiej\",\n",
" 'Śródki',\n",
" 'Autor:',\n",
" 'Adam',\n",
" 'Słociński',\n",
" 'Nekrologi',\n",
" 'Dodaj nekrolog',\n",
" 'Mirosława',\n",
" 'Kosicka lat 79',\n",
" 'Halina',\n",
" 'Teresa',\n",
" 'Filas lat 77',\n",
" 'Regina',\n",
" 'Dajksler lat 72',\n",
" 'Mirosława',\n",
" 'Maria',\n",
" 'Kaczmarek lat 82',\n",
" 'Ignacy',\n",
" 'Augustyn',\n",
" 'Nowak lat 71',\n",
" 'Wojciech',\n",
" 'Franciszek',\n",
" 'Schmidt lat 84',\n",
" 'Alodia',\n",
" 'Donata',\n",
" 'Witaszek-',\n",
" 'Napierała lat 86',\n",
" 'Katarzyna',\n",
" 'Plenzler lat 55',\n",
" 'Ewa',\n",
" 'Urszula',\n",
" 'Czajka-',\n",
" 'Bolek lat 78',\n",
" 'Piotr',\n",
" 'Kurmiel lat 62',\n",
" 'Mariusz',\n",
" 'Stanisław',\n",
" 'Kaźmierczak lat 62',\n",
" 'Wanda',\n",
" 'Anna',\n",
" 'Pałgan lat 77',\n",
" 'Andrzej',\n",
" 'Kaczmarek lat 41',\n",
" 'Urszula',\n",
" 'Franciszka',\n",
" 'Kubasińska lat 79',\n",
" 'Mateusz',\n",
" 'Strzelczyk lat 31',\n",
" 'Kondolencje',\n",
" 'Formularz kontaktowy',\n",
" 'Zdjęcie/',\n",
" 'Video',\n",
" 'Załącz plik',\n",
" 'Wysyłając niniejszy formularz niniejszym oświadczam, że jestem autorem przesyłanego zdjęcia/video/tekstu i zgadzam się na nieodpłatne wykorzystanie tego materiału na łamach epoznan.pl, wtkplay.pl i',\n",
" 'Telewizji',\n",
" 'W',\n",
" 'T',\n",
" 'K.',\n",
" \"Aby wysłać formularz, konieczne jest zaznaczenie opcji '\",\n",
" \"Nie jestem robotem'.\",\n",
" 'Anuluj',\n",
" 'Wyślij',\n",
" 'Jeśli zapomniałeś hasła podaj adres e-mail na który zarejestrowałeś swoje konto aby wygenerować nowe hasło.',\n",
" 'Anuluj',\n",
" 'Wyślij',\n",
" 'Zgłoszenie “',\n",
" 'Spotted” wysłane.',\n",
" 'Po akceptacji redakcji, pojawi się na stronie.',\n",
" 'Zgłoszenie “',\n",
" 'Wyznaję” wysłane.',\n",
" 'Po akceptacji redakcji, pojawi się na stronie.',\n",
" 'K',\n",
" 'O',\n",
" 'N',\n",
" 'T',\n",
" 'A',\n",
" 'K',\n",
" 'T:',\n",
" 'Zgłoszenie “',\n",
" 'Spotted” wysłane.',\n",
" 'Po akceptacji redakcji, pojawi się na stronie.',\n",
" 'Uzasadnij zgłoszenie komentarza do usunięcia',\n",
" 'Anuluj',\n",
" 'Wyślij',\n",
" 'Odpowiedz na komentarz',\n",
" 'Anuluj',\n",
" 'Wyślij',\n",
" 'Komentarz został dodany.',\n",
" 'Co powinniśmy w tym newsie poprawić?',\n",
" 'Anuluj',\n",
" 'Wyślij',\n",
" 'Login',\n",
" 'Hasło',\n",
" 'Zapomniałeś hasła?',\n",
" 'Zaloguj',\n",
" 'Nie masz konta?',\n",
" 'Zarejestruj się',\n",
" 'Materiał zawiera treści przeznaczone tylko dla osób dorosłych.',\n",
" 'Jeśli nie ukończyłeś 18 roku życia i tym samym w świetle polskiego prawa nie jesteś osobą pełnoletnią, nie wolno',\n",
" 'Ci wchodzić dalej.',\n",
" 'Wchodząc dalej oświadczasz, iż jesteś osobą pełnoletnią.',\n",
" 'Tak',\n",
" 'Nie',\n",
" 'Nr 1',\n",
" 'W',\n",
" 'Poznaniu 2,4 mln',\n",
" 'Czytelników',\n",
" 'Siła miliona',\n",
" 'Czytelników',\n",
" 'Oferta reklamowa',\n",
" 'Nasz patronat',\n",
" 'Polityka prywatności',\n",
" 'Regulamin serwisu',\n",
" 'R',\n",
" 'S',\n",
" 'S',\n",
" 'Kontakt',\n",
" 'Kontakt 24/7: [email protected]',\n",
" 'M',\n",
" 'M',\n",
" 'S/',\n",
" 'S',\n",
" 'M',\n",
" 'S: 537 133 133',\n",
" 'Formularz kontaktowy',\n",
" 'Redaktor naczelny:',\n",
" 'Witold',\n",
" 'Kundzewicz',\n",
" 'Redaktor prowadząca:',\n",
" 'Katarzyna',\n",
" 'Żurowska',\n",
" 'Redakcja:',\n",
" 'Joanna',\n",
" 'Wachowska,',\n",
" 'Paulina',\n",
" 'Zych',\n",
" 'I',\n",
" 'T:',\n",
" 'Tomasz',\n",
" 'Płuciennik']"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [ "source": [
"def sentence_split_enhanced(text):\n", "import requests\n",
" return []" "from bs4 import BeautifulSoup\n",
"\n",
"\n",
"def segment_text(text): \n",
"\n",
" pattern = r'(\\p{Lu}\\p{Ll}*|\\b\\p{Lu}(?=\\p{Ll}))'\n",
" regex = re.compile(pattern, re.UNICODE)\n",
"\n",
" matches = list(regex.finditer(text))\n",
"\n",
" segments = []\n",
" start = 0\n",
" for match in matches:\n",
" end = match.start()\n",
" if start < end:\n",
" segments.append(text[start:end].strip())\n",
" start = end\n",
"\n",
" segments.append(text[start:].strip())\n",
"\n",
" segments = [segment for segment in segments if segment]\n",
"\n",
" return segments\n",
"\n",
"def sentence_split_enhanced(url):\n",
" soup = BeautifulSoup(requests.get(url).content, 'html.parser')\n",
" text = re.sub(r\"\\s+\", \" \", soup.get_text())\n",
" segments = segment_text(text)\n",
" return segments\n",
"\n",
"sentence_split_enhanced(\"https://epoznan.pl\")\n",
"\n",
"# problem z wyrazami, które sa pisane duza litera \n",
"# problem z nazwami ulic"
] ]
}, },
{ {
@ -93,6 +845,12 @@
"* przetłumaczenia tekstu źródłowego tylko częściowo" "* przetłumaczenia tekstu źródłowego tylko częściowo"
] ]
}, },
{
"cell_type": "markdown",
"id": "149d1a7e",
"metadata": {},
"source": []
},
{ {
"cell_type": "markdown", "cell_type": "markdown",
"id": "australian-hundred", "id": "australian-hundred",
@ -125,6 +883,43 @@
"XLIFF jest formatem do przechowywania pamięci tłumaczeń, który opiera się na XML-u. Przykładowy plik XLIFF wygląda następująco:" "XLIFF jest formatem do przechowywania pamięci tłumaczeń, który opiera się na XML-u. Przykładowy plik XLIFF wygląda następująco:"
] ]
}, },
{
"cell_type": "code",
"execution_count": 11,
"id": "7cb45435",
"metadata": {},
"outputs": [],
"source": [
"def create_hunaligna_file(text_en, text_pl, output_file):\n",
" sentences_en = text_en.split('. ')\n",
" sentences_pl = text_pl.split('. ')\n",
"\n",
" with open(output_file, \"w\", encoding=\"utf-8\") as f:\n",
" f.write(\"# Sentence pairs\\n\")\n",
" for i, (en, pl) in enumerate(zip(sentences_en, sentences_pl)):\n",
" f.write(f\"{i}\\n\")\n",
" f.write(\"# Source\\n\")\n",
" f.write(f\"{en.strip()}\\n\")\n",
" f.write(\"# Target\\n\")\n",
" f.write(f\"{pl.strip()}\\n\")\n",
"\n",
"\n",
"en_url = \"https://www.apple.com\"\n",
"pl_url = \"https://www.apple.com/pl/\"\n",
"\n",
"en_text = ' '.join(sentence_split_enhanced(en_url))\n",
"pl_text = ' '.join(sentence_split_enhanced(pl_url))\n",
"\n",
"with open(\"txt/en.txt\", \"w\", encoding=\"utf-8\") as f_en:\n",
" f_en.write(en_text)\n",
"\n",
"with open(\"txt/pl.txt\", \"w\", encoding=\"utf-8\") as f_pl:\n",
" f_pl.write(pl_text)\n",
"\n",
"create_hunaligna_file(en_text, pl_text, \"txt/hunaligna.txt\")\n",
"\n"
]
},
{ {
"cell_type": "raw", "cell_type": "raw",
"id": "appropriate-timber", "id": "appropriate-timber",
@ -174,28 +969,61 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": 18,
"id": "remarkable-pillow", "id": "remarkable-pillow",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"def convert2xliff(hunalign_file_name):\n", "import xml.etree.ElementTree as ET\n",
" return 0" "\n",
"def convert2xliff():\n",
" segments = []\n",
" with open('txt/hunaligna.txt', 'r', encoding='utf-8') as file:\n",
" segment = {}\n",
" for line in file:\n",
" line = line.strip()\n",
" if line.startswith(\"# Source\"):\n",
" segment['source'] = next(file).strip()\n",
" elif line.startswith(\"# Target\"):\n",
" segment['target'] = next(file).strip()\n",
" segments.append(segment)\n",
" segment = {}\n",
" \n",
" xliff = ET.Element('xliff', version=\"1.2\")\n",
" file_elem = ET.SubElement(xliff, 'file', {\n",
" 'datatype': \"plaintext\",\n",
" 'original': \"file.txt\",\n",
" 'source-language': 'en',\n",
" 'target-language': 'pl'\n",
" })\n",
" body = ET.SubElement(file_elem, 'body')\n",
"\n",
" for i, segment in enumerate(segments):\n",
" trans_unit = ET.SubElement(body, 'trans-unit', id=str(i))\n",
" source = ET.SubElement(trans_unit, 'source')\n",
" source.text = segment['source']\n",
" target = ET.SubElement(trans_unit, 'target')\n",
" target.text = segment['target']\n",
"\n",
" tree = ET.ElementTree(xliff)\n",
" tree.write('txt/output.xliff', encoding='UTF-8', xml_declaration=True)\n",
"\n",
"\n",
"convert2xliff()\n",
"\n",
"\n"
] ]
} }
], ],
"metadata": { "metadata": {
"author": "Rafał Jaworski", "author": "Rafał Jaworski",
"email": "rjawor@amu.edu.pl", "email": "rjawor@amu.edu.pl",
"lang": "pl",
"subtitle": "11. Urównoleglanie",
"title": "Komputerowe wspomaganie tłumaczenia",
"year": "2021",
"kernelspec": { "kernelspec": {
"display_name": "Python 3", "display_name": "Python 3",
"language": "python", "language": "python",
"name": "python3" "name": "python3"
}, },
"lang": "pl",
"language_info": { "language_info": {
"codemirror_mode": { "codemirror_mode": {
"name": "ipython", "name": "ipython",
@ -206,8 +1034,11 @@
"name": "python", "name": "python",
"nbconvert_exporter": "python", "nbconvert_exporter": "python",
"pygments_lexer": "ipython3", "pygments_lexer": "ipython3",
"version": "3.8.10" "version": "3.11.9"
} },
"subtitle": "11. Urównoleglanie",
"title": "Komputerowe wspomaganie tłumaczenia",
"year": "2021"
}, },
"nbformat": 4, "nbformat": 4,
"nbformat_minor": 5 "nbformat_minor": 5

View File

@ -65,19 +65,34 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": 6,
"id": "broken-workstation", "id": "broken-workstation",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [
{
"ename": "KeyboardInterrupt",
"evalue": "",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[6], line 10\u001b[0m\n\u001b[1;32m 6\u001b[0m f\u001b[38;5;241m.\u001b[39mwrite(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mdatetime\u001b[38;5;241m.\u001b[39mnow()\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m - \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mkey\u001b[38;5;241m.\u001b[39mname\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 8\u001b[0m keyboard\u001b[38;5;241m.\u001b[39mon_press(log_key)\n\u001b[0;32m---> 10\u001b[0m \u001b[43mkeyboard\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mwait\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[0;32m/opt/homebrew/lib/python3.11/site-packages/keyboard/__init__.py:886\u001b[0m, in \u001b[0;36mwait\u001b[0;34m(hotkey, suppress, trigger_on_release)\u001b[0m\n\u001b[1;32m 884\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 885\u001b[0m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m:\n\u001b[0;32m--> 886\u001b[0m \u001b[43m_time\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msleep\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m1e6\u001b[39;49m\u001b[43m)\u001b[49m\n",
"\u001b[0;31mKeyboardInterrupt\u001b[0m: "
]
}
],
"source": [ "source": [
"import keyboard\n", "import keyboard\n",
"from datetime import datetime\n",
"\n", "\n",
"def log_key(key):\n",
" with open(\"txt/keylog.txt\", \"a\") as f:\n",
" f.write(f\"{datetime.now()} - {key.name}\\n\")\n",
"\n", "\n",
"def report_key(event):\n", "keyboard.on_press(log_key)\n",
" print(event)\n",
"\n", "\n",
"keyboard.on_release(callback=report_key)\n", "keyboard.wait()\n"
"keyboard.wait()"
] ]
}, },
{ {
@ -96,6 +111,36 @@
"### Ćwiczenie 1: Wykorzystując powyższy kod napisz keylogger, który zapisuje wszystkie uderzenia w klawisze do pliku. Format pliku jest dowolny, każdy wpis musi zawierać precyzyjną godzinę uderzenia oraz uderzony klawisz. Uruchom program i przepisz paragraf dowolnie wybranego tekstu." "### Ćwiczenie 1: Wykorzystując powyższy kod napisz keylogger, który zapisuje wszystkie uderzenia w klawisze do pliku. Format pliku jest dowolny, każdy wpis musi zawierać precyzyjną godzinę uderzenia oraz uderzony klawisz. Uruchom program i przepisz paragraf dowolnie wybranego tekstu."
] ]
}, },
{
"cell_type": "code",
"execution_count": 5,
"id": "e1fd5d69",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<function keyboard.hook.<locals>.remove_()>"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import keyboard\n",
"import time\n",
"\n",
"def log_keystroke(event):\n",
" with open(\"txt/keystroke_log.txt\", \"a\") as log_file:\n",
" log_time = time.strftime(\"%Y-%m-%d %H:%M:%S\", time.localtime())\n",
" log_file.write(f\"{log_time} - {event.name}\\n\")\n",
"\n",
"keyboard.on_press(log_keystroke)\n",
"keyboard.wait()"
]
},
{ {
"cell_type": "markdown", "cell_type": "markdown",
"id": "valuable-bearing", "id": "valuable-bearing",
@ -117,12 +162,76 @@
"execution_count": 1, "execution_count": 1,
"id": "possible-holder", "id": "possible-holder",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"This process is not trusted! Input event monitoring will not be possible until it is added to accessibility clients.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Rozpocznij pisanie. Naciśnij 'esc' aby zakończyć i zobaczyć wyniki.\n",
"Średnia prędkość pisania: 733.09 znaków na minutę\n",
"Średnia prędkość pisania: 146.62 słów na minutę\n"
]
}
],
"source": [ "source": [
"def calculate_typing_speed():\n", "from pynput import keyboard\n",
" return 0" "from datetime import datetime, timedelta\n",
"\n",
"key_times = []\n",
"last_time = None\n",
"\n",
"def on_press(key):\n",
" global last_time\n",
" current_time = datetime.now()\n",
" \n",
" if last_time and (current_time - last_time) > timedelta(seconds=5):\n",
" key_times.clear()\n",
" \n",
" key_times.append(current_time)\n",
" last_time = current_time\n",
"\n",
"def calculate_speed():\n",
" if len(key_times) < 2:\n",
" print(\"Zbyt mało danych do obliczenia prędkości pisania.\")\n",
" return\n",
" \n",
" total_time = (key_times[-1] - key_times[0]).total_seconds() / 60\n",
" \n",
" num_chars = len(key_times)\n",
" num_words = num_chars / 5\n",
" \n",
" chars_per_minute = num_chars / total_time\n",
" words_per_minute = num_words / total_time\n",
" \n",
" print(f\"Średnia prędkość pisania: {chars_per_minute:.2f} znaków na minutę\")\n",
" print(f\"Średnia prędkość pisania: {words_per_minute:.2f} słów na minutę\")\n",
"\n",
"def on_release(key):\n",
" if key == keyboard.Key.esc:\n",
" calculate_speed()\n",
" return False\n",
"\n",
"with keyboard.Listener(on_press=on_press, on_release=on_release) as listener:\n",
" print(\"Rozpocznij pisanie. Naciśnij 'esc' aby zakończyć i zobaczyć wyniki.\")\n",
" listener.join()\n",
"\n"
] ]
}, },
{
"cell_type": "code",
"execution_count": null,
"id": "03ba2685",
"metadata": {},
"outputs": [],
"source": []
},
{ {
"cell_type": "markdown", "cell_type": "markdown",
"id": "ceramic-birth", "id": "ceramic-birth",
@ -141,28 +250,100 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 2, "execution_count": 7,
"id": "close-riverside", "id": "close-riverside",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"This process is not trusted! Input event monitoring will not be possible until it is added to accessibility clients.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Rozpocznij pisanie. Naciśnij 'esc' aby zakończyć i zobaczyć wyniki.\n",
"Przerwa: 3.82 sekund\n",
"Kontekst przed przerwą: ...fsdjfdnsjfjdnfjdfnj[Key.esc]\n",
"Kontekst po przerwie: ...\n",
"\n"
]
}
],
"source": [ "source": [
"def find_pauses():\n", "from pynput import keyboard\n",
" return []" "from datetime import datetime, timedelta\n",
"\n",
"key_times = []\n",
"key_text = []\n",
"last_time = None\n",
"breaks = []\n",
"\n",
"def key_to_string(key):\n",
" try:\n",
" return key.char\n",
" except AttributeError:\n",
" if key == keyboard.Key.space:\n",
" return ' '\n",
" return f'[{str(key)}]'\n",
"\n",
"def on_press(key):\n",
" global last_time\n",
" current_time = datetime.now()\n",
" \n",
" key_times.append(current_time)\n",
" key_text.append(key_to_string(key))\n",
" \n",
" if last_time and (current_time - last_time) > timedelta(seconds=3):\n",
" context_start = max(0, len(key_text) - 20)\n",
" context_end = len(key_text)\n",
" context_before = ''.join(key_text[context_start:context_end])\n",
" \n",
" context_start = len(key_text)\n",
" context_end = min(len(key_text) + 20, len(key_text))\n",
" context_after = ''.join(key_text[context_start:context_end])\n",
" \n",
" breaks.append((current_time - last_time, context_before, context_after))\n",
" \n",
" last_time = current_time\n",
"\n",
"def report_breaks():\n",
" if not breaks:\n",
" print(\"Nie wykryto przerw dłuższych niż 3 sekundy.\")\n",
" return\n",
" \n",
" breaks.sort(reverse=True, key=lambda x: x[0])\n",
" \n",
" for duration, context_before, context_after in breaks:\n",
" print(f\"Przerwa: {duration.total_seconds():.2f} sekund\")\n",
" print(f\"Kontekst przed przerwą: ...{context_before}\")\n",
" print(f\"Kontekst po przerwie: {context_after}...\")\n",
" print()\n",
"\n",
"def on_release(key):\n",
" if key == keyboard.Key.esc:\n",
" report_breaks()\n",
" return False\n",
"\n",
"with keyboard.Listener(on_press=on_press, on_release=on_release) as listener:\n",
" print(\"Rozpocznij pisanie. Naciśnij 'esc' aby zakończyć i zobaczyć wyniki.\")\n",
" listener.join()\n",
"\n"
] ]
} }
], ],
"metadata": { "metadata": {
"author": "Rafał Jaworski", "author": "Rafał Jaworski",
"email": "rjawor@amu.edu.pl", "email": "rjawor@amu.edu.pl",
"lang": "pl",
"subtitle": "12. Key logging",
"title": "Komputerowe wspomaganie tłumaczenia",
"year": "2021",
"kernelspec": { "kernelspec": {
"display_name": "Python 3", "display_name": "Python 3",
"language": "python", "language": "python",
"name": "python3" "name": "python3"
}, },
"lang": "pl",
"language_info": { "language_info": {
"codemirror_mode": { "codemirror_mode": {
"name": "ipython", "name": "ipython",
@ -173,8 +354,11 @@
"name": "python", "name": "python",
"nbconvert_exporter": "python", "nbconvert_exporter": "python",
"pygments_lexer": "ipython3", "pygments_lexer": "ipython3",
"version": "3.8.10" "version": "3.11.9"
} },
"subtitle": "12. Key logging",
"title": "Komputerowe wspomaganie tłumaczenia",
"year": "2021"
}, },
"nbformat": 4, "nbformat": 4,
"nbformat_minor": 5 "nbformat_minor": 5

View File

@ -44,7 +44,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 3, "execution_count": null,
"id": "familiar-terrace", "id": "familiar-terrace",
"metadata": { "metadata": {
"scrolled": true "scrolled": true
@ -91,6 +91,14 @@
" print(line.rstrip())" " print(line.rstrip())"
] ]
}, },
{
"cell_type": "code",
"execution_count": null,
"id": "bee601d9",
"metadata": {},
"outputs": [],
"source": []
},
{ {
"cell_type": "markdown", "cell_type": "markdown",
"id": "dominant-insurance", "id": "dominant-insurance",
@ -120,13 +128,41 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": 11,
"id": "economic-southeast", "id": "economic-southeast",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[('auto', 1)]\n",
"[('adam', 1), ('jest', 1), ('fajny', 1)]\n",
"[('brak', 1), ('słów', 1)]\n",
"[('chrzaszcz', 0)]\n"
]
}
],
"source": [ "source": [
"def correct_text(text):\n", "from zipfile import ZipFile\n",
" return []" "\n",
"dictionary = set\n",
"\n",
"with ZipFile('data/hunspell_pl.zip') as zip_f:\n",
" with zip_f.open('hunspell_pl.txt') as f:\n",
" dictionary = set([line.strip().lower() for line in f.read().decode('utf-8').splitlines()])\n",
"\n",
"def correct_text(phrase):\n",
" return [(word, 1) if word in dictionary\n",
" else (word, 0)\n",
" for word in phrase.lower().split()]\n",
"\n",
"# 0 - źle\n",
"# 1 - dobrze\n",
"print(correct_text('Auto'))\n",
"print(correct_text('Adam jest fajny'))\n",
"print(correct_text('Brak słów'))\n",
"print(correct_text('Chrzaszcz'))"
] ]
}, },
{ {
@ -168,13 +204,34 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 1, "execution_count": 5,
"id": "built-sally", "id": "built-sally",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"def L1(w):\n", "import numpy as np\n",
" return []" "\n",
"def levenshtein_distance(a, b):\n",
" n, m = len(a), len(b)\n",
" if n > m:\n",
" a, b = b, a\n",
" n, m = m, n\n",
"\n",
" current_row = np.arange(n + 1)\n",
" for i in range(1, m + 1):\n",
" previous_row, current_row = current_row, np.zeros(n + 1, dtype=int)\n",
" current_row[0] = i\n",
" for j in range(1, n + 1):\n",
" insertions = previous_row[j] + 1\n",
" deletions = current_row[j - 1] + 1\n",
" substitutions = previous_row[j - 1] + (a[j - 1] != b[i - 1])\n",
" current_row[j] = min(insertions, deletions, substitutions)\n",
"\n",
" return current_row[n]\n",
"\n",
"\n",
"def L1(word, dictionary, max_distance=1):\n",
" return [dict_word for dict_word in dictionary if levenshtein_distance(word, dict_word) <= max_distance]"
] ]
}, },
{ {
@ -187,13 +244,45 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 2, "execution_count": 6,
"id": "coordinated-cooperation", "id": "coordinated-cooperation",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
"def generate_suggestions(w):\n", "def generate_suggestions(word, dictionary):\n",
" return []" " return L1(word, dictionary)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "0c7843bb",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['ato', 'alto', 'huto', 'luto', 'autko', 'autor', 'auto', 'aut']\n",
"['adams', 'adm', 'dam', 'ada', 'edam', 'adad', 'adym', 'asam', 'adaś', 'adat', 'aram', 'adam']\n",
"['fajny', 'fajno', 'tajny', 'farny']\n",
"[]\n"
]
}
],
"source": [
"from zipfile import ZipFile\n",
"\n",
"dictionary = set\n",
"\n",
"with ZipFile('data/hunspell_pl.zip') as zip_f:\n",
" with zip_f.open('hunspell_pl.txt') as f:\n",
" dictionary = set([line.strip().lower() for line in f.read().decode('utf-8').splitlines()])\n",
" \n",
"print(generate_suggestions('auto', dictionary))\n",
"print(generate_suggestions('adam', dictionary))\n",
"print(generate_suggestions('fajny', dictionary))\n",
"print(generate_suggestions('chrzazszcz', dictionary))"
] ]
} }
], ],
@ -216,7 +305,7 @@
"name": "python", "name": "python",
"nbconvert_exporter": "python", "nbconvert_exporter": "python",
"pygments_lexer": "ipython3", "pygments_lexer": "ipython3",
"version": "3.8.10" "version": "3.11.9"
}, },
"subtitle": "13,14. Korekta pisowni", "subtitle": "13,14. Korekta pisowni",
"title": "Komputerowe wspomaganie tłumaczenia", "title": "Komputerowe wspomaganie tłumaczenia",

View File

@ -79,16 +79,25 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 6, "execution_count": 2,
"id": "relative-anaheim", "id": "relative-anaheim",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Downloading LanguageTool 6.4: 100%|██████████| 246M/246M [00:03<00:00, 66.2MB/s] \n",
"Unzipping /var/folders/x3/mzn7dtf55q5355g3c5003g_h0000gn/T/tmp5m6kri4j.zip to /Users/adamstelmaszyk/.cache/language_tool_python.\n",
"Downloaded https://www.languagetool.org/download/LanguageTool-6.4.zip to /Users/adamstelmaszyk/.cache/language_tool_python.\n"
]
},
{ {
"name": "stdout", "name": "stdout",
"output_type": "stream", "output_type": "stream",
"text": [ "text": [
"[Match({'ruleId': 'EN_A_VS_AN', 'message': 'Use “an” instead of a if the following word starts with a vowel sound, e.g. an article, an hour.', 'replacements': ['an'], 'offsetInContext': 16, 'context': 'A sentence with a error in the Hitchhikers Guide tot he ...', 'offset': 16, 'errorLength': 1, 'category': 'MISC', 'ruleIssueType': 'misspelling', 'sentence': 'A sentence with a error in the Hitchhikers Guide tot he Galaxy'}),\n", "[Match({'ruleId': 'EN_A_VS_AN', 'message': 'Use “an” instead of a if the following word starts with a vowel sound, e.g. an article, an hour.', 'replacements': ['an'], 'offsetInContext': 16, 'context': 'A sentence with a error in the Hitchhikers Guide tot he ...', 'offset': 16, 'errorLength': 1, 'category': 'MISC', 'ruleIssueType': 'misspelling', 'sentence': \"A sentence with a error in the Hitchhiker's Guide tot he Galaxy\"}),\n",
" Match({'ruleId': 'TOT_HE', 'message': 'Did you mean “to the”?', 'replacements': ['to the'], 'offsetInContext': 43, 'context': '... with a error in the Hitchhikers Guide tot he Galaxy', 'offset': 50, 'errorLength': 6, 'category': 'TYPOS', 'ruleIssueType': 'misspelling', 'sentence': 'A sentence with a error in the Hitchhikers Guide tot he Galaxy'})]\n" " Match({'ruleId': 'TOT_HE', 'message': 'Did you mean “to the”?', 'replacements': ['to the'], 'offsetInContext': 43, 'context': '... with a error in the Hitchhikers Guide tot he Galaxy', 'offset': 50, 'errorLength': 6, 'category': 'TYPOS', 'ruleIssueType': 'misspelling', 'sentence': \"A sentence with a error in the Hitchhiker's Guide tot he Galaxy\"})]\n"
] ]
} }
], ],
@ -122,13 +131,77 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 2, "execution_count": 5,
"id": "sound-teaching", "id": "sound-teaching",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Error: Prawdopodobna literówka: wiele spacji z rzędu\n",
" Specjalistk...rce znajdziesz\n",
"\n",
"Error: Prawdopodobna literówka: wiele spacji z rzędu\n",
" Specjalistka ds. mediów społecznośc...\n",
"\n",
"Error: Wykryto prawdopodobny błąd pisowni\n",
"Context: ...absolwentka Wydziału Anglistyki. UAM na Pyrkonie 2024. Zapraszamy do obejrzenia fotorela...\n",
"\n",
"Error: Wykryto prawdopodobny błąd pisowni\n",
"Context: ...topnia oraz jednolite magisterskie. . . Scientific Advisory Board UAM obradowało po raz ko...\n",
"\n",
"Error: Wykryto prawdopodobny błąd pisowni\n",
"Context: ... jednolite magisterskie. . . Scientific Advisory Board UAM obradowało po raz kolejny. Po...\n",
"\n",
"Error: Nietypowa kombinacja małych i dużych liter. Czy nie powinno być: \"beststudentcamp\"?\n",
"Context: ... laureatów konkursu Szkoła Letnia UAM BESTStudentCAMP 2024. Poznaj listę nagrodzonych! Stworz...\n",
"\n",
"Error: Spacje wokół dywizu (w przeciwieństwie do myślnika) są zbędne: \"UAM-Pyrkon\"; jeśli to miał być myślnik, to należy napisać \"UAM — Pyrkon\".\n",
"Context: ...wszy etap wdrażania planu. Fantastyczny UAM - Pyrkon 2024 „Człowiek w rzeczywistości AI: roz...\n",
"\n",
"Error: Prawdopodobna literówka: wiele spacji z rzędu\n",
"Context: ...rs recytatorski wierszy Czesława Miłosza . Uniwe...\n",
"\n",
"Error: Prawdopodobna literówka: wiele spacji z rzędu\n",
"Context: ...a i nauczanie wzajemnie się przenikają. . Jedna z inicjatyw UAM, w ramach której...\n",
"\n",
"Error: Nie wstawiamy spacji przed kropką\n",
"Context: ...zej uczelni Zobacz nasze ciekawe obiekty . Uniwer...\n",
"\n",
"Error: Prawdopodobna literówka: wiele spacji z rzędu\n",
"Context: ...asopismach ogólnopolskich i światowych. . Na UA...\n",
"\n",
"Error: Prawdopodobna literówka: wiele spacji z rzędu\n",
"Context: ...tecie im. Adama Mickiewicza w Poznaniu. . Na UA...\n",
"\n",
"Error: Prawdopodobna literówka: wiele spacji z rzędu\n",
"Context: ...a także ogłoszone zamówienia publiczne. . Uniwersytet im. Adama Mickiewicza w Po...\n",
"\n"
]
}
],
"source": [ "source": [
"import requests\n",
"from bs4 import BeautifulSoup\n",
"from language_tool_python import LanguageTool\n",
"\n",
"def find_errors(website_url):\n", "def find_errors(website_url):\n",
" return []" " tool = LanguageTool('pl-PL')\n",
"\n",
" response = requests.get(website_url)\n",
" soup = BeautifulSoup(response.content, 'html.parser')\n",
"\n",
" text = ' '.join([p.get_text() for p in soup.find_all('p')])\n",
"\n",
" matches = tool.check(text)\n",
" \n",
" errors = [match for match in matches if 'Prawidłowa' not in match.ruleIssueType]\n",
" \n",
" return errors\n",
"\n",
"website_url = 'https://amu.edu.pl' \n",
"\n"
] ]
}, },
{ {
@ -141,13 +214,215 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 3, "execution_count": 7,
"id": "settled-armor", "id": "settled-armor",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Error: Wykryto prawdopodobny błąd pisowni\n",
"Context: // This is a incorrect comment\n",
"\n",
"Error: Wykryto prawdopodobny błąd pisowni\n",
"Context: // This is a incorrect comment\n",
"\n",
"Error: Wykryto prawdopodobny błąd pisowni\n",
"Context: // This is a incorrect comment\n",
"\n",
"Error: Wykryto prawdopodobny błąd pisowni\n",
"Context: // This is a method that does something\n",
"\n",
"Error: Wykryto prawdopodobny błąd pisowni\n",
"Context: // This is a method that does something\n",
"\n",
"Error: Wykryto prawdopodobny błąd pisowni\n",
"Context: // This is a method that does something\n",
"\n",
"Error: Wykryto prawdopodobny błąd pisowni\n",
"Context: // This is a method that does something\n",
"\n",
"Error: Wykryto prawdopodobny błąd pisowni\n",
"Context: // This is a method that does something\n",
"\n",
"Error: Wykryto prawdopodobny błąd pisowni\n",
"Context: // This is a method that does something\n",
"\n",
"Error: Wykryto prawdopodobny błąd pisowni\n",
"Context: // Return the value of the sample field\n",
"\n",
"Error: Wykryto prawdopodobny błąd pisowni\n",
"Context: // Return the value of the sample field\n",
"\n",
"Error: Wykryto prawdopodobny błąd pisowni\n",
"Context: // Return the value of the sample field\n",
"\n",
"Error: Wykryto prawdopodobny błąd pisowni\n",
"Context: // Return the value of the sample field\n",
"\n",
"Error: Wykryto prawdopodobny błąd pisowni\n",
"Context: // Another bad comment here\n",
"\n",
"Error: Wykryto prawdopodobny błąd pisowni\n",
"Context: // Another bad comment here\n",
"\n",
"Error: Wykryto prawdopodobny błąd pisowni\n",
"Context: /** * This is a simple Java class demonstrating gr...\n",
"\n",
"Error: Wykryto prawdopodobny błąd pisowni\n",
"Context: /** * This is a simple Java class demonstrating gramm...\n",
"\n",
"Error: Wykryto prawdopodobny błąd pisowni\n",
"Context: /** * This is a simple Java class demonstrating grammatical er...\n",
"\n",
"Error: Wykryto prawdopodobny błąd pisowni\n",
"Context: /** * This is a simple Java class demonstrating grammatical errors in com...\n",
"\n",
"Error: Wykryto prawdopodobny błąd pisowni\n",
"Context: /** * This is a simple Java class demonstrating grammatical errors in comments. */\n",
"\n",
"Error: Wykryto prawdopodobny błąd pisowni\n",
"Context: ...is is a simple Java class demonstrating grammatical errors in comments. */\n",
"\n",
"Error: Wykryto prawdopodobny błąd pisowni\n",
"Context: ...le Java class demonstrating grammatical errors in comments. */\n",
"\n",
"Error: Wykryto prawdopodobny błąd pisowni\n",
"Context: ...ass demonstrating grammatical errors in comments. */\n",
"\n",
"Error: Wykryto prawdopodobny błąd pisowni\n",
"Context: /** * This method does something very simple. ...\n",
"\n",
"Error: Wykryto prawdopodobny błąd pisowni\n",
"Context: /** * This method does something very simple. * @par...\n",
"\n",
"Error: Wykryto prawdopodobny błąd pisowni\n",
"Context: /** * This method does something very simple. * @param va...\n",
"\n",
"Error: Wykryto prawdopodobny błąd pisowni\n",
"Context: /** * This method does something very simple. * @param value This i...\n",
"\n",
"Error: Wykryto prawdopodobny błąd pisowni\n",
"Context: /** * This method does something very simple. * @param value This is a p...\n",
"\n",
"Error: Wykryto prawdopodobny błąd pisowni\n",
"Context: ... * This method does something very simple. * @param value This is a paramete...\n",
"\n",
"Error: Prawdopodobna literówka: wiele spacji z rzędu\n",
"Context: ...This method does something very simple. * @param value This is a parameter that ...\n",
"\n",
"Error: Wykryto prawdopodobny błąd pisowni\n",
"Context: ...thod does something very simple. * @param value This is a parameter that take a i...\n",
"\n",
"Error: Wykryto prawdopodobny błąd pisowni\n",
"Context: ...es something very simple. * @param value This is a parameter that take a integer...\n",
"\n",
"Error: Wykryto prawdopodobny błąd pisowni\n",
"Context: ...ething very simple. * @param value This is a parameter that take a integer valu...\n",
"\n",
"Error: Wykryto prawdopodobny błąd pisowni\n",
"Context: ...g very simple. * @param value This is a parameter that take a integer value. ...\n",
"\n",
"Error: Wykryto prawdopodobny błąd pisowni\n",
"Context: ...y simple. * @param value This is a parameter that take a integer value. */\n",
"\n",
"Error: Wykryto prawdopodobny błąd pisowni\n",
"Context: ... * @param value This is a parameter that take a integer value. */\n",
"\n",
"Error: Wykryto prawdopodobny błąd pisowni\n",
"Context: ...* @param value This is a parameter that take a integer value. */\n",
"\n",
"Error: Wykryto prawdopodobny błąd pisowni\n",
"Context: ...m value This is a parameter that take a integer value. */\n",
"\n",
"Error: Wykryto prawdopodobny błąd pisowni\n",
"Context: ...This is a parameter that take a integer value. */\n",
"\n",
"Error: Prawdopodobna literówka: wiele spacji z rzędu\n",
"Context: ... a parameter that take a integer value. */\n",
"\n",
"Error: Wykryto prawdopodobny błąd pisowni\n",
"Context: /** * This method returns the value of sample fiel...\n",
"\n",
"Error: Wykryto prawdopodobny błąd pisowni\n",
"Context: /** * This method returns the value of sample field. ...\n",
"\n",
"Error: Wykryto prawdopodobny błąd pisowni\n",
"Context: /** * This method returns the value of sample field. * It sh...\n",
"\n",
"Error: Wykryto prawdopodobny błąd pisowni\n",
"Context: /** * This method returns the value of sample field. * It should...\n",
"\n",
"Error: Wykryto prawdopodobny błąd pisowni\n",
"Context: /** * This method returns the value of sample field. * It should be a ...\n",
"\n",
"Error: Wykryto prawdopodobny błąd pisowni\n",
"Context: ...** * This method returns the value of sample field. * It should be a int...\n",
"\n",
"Error: Prawdopodobna literówka: wiele spacji z rzędu\n",
"Context: ...thod returns the value of sample field. * It should be a integer value. * @...\n",
"\n",
"Error: Wykryto prawdopodobny błąd pisowni\n",
"Context: ...turns the value of sample field. * It should be a integer value. * @retu...\n",
"\n",
"Error: Wykryto prawdopodobny błąd pisowni\n",
"Context: ...ns the value of sample field. * It should be a integer value. * @return the ...\n",
"\n",
"Error: Wykryto prawdopodobny błąd pisowni\n",
"Context: ... of sample field. * It should be a integer value. * @return the value of samp...\n",
"\n",
"Error: Wykryto prawdopodobny błąd pisowni\n",
"Context: ...le field. * It should be a integer value. * @return the value of sampleFiel...\n",
"\n",
"Error: Prawdopodobna literówka: wiele spacji z rzędu\n",
"Context: ...d. * It should be a integer value. * @return the value of sampleField ...\n",
"\n",
"Error: Wykryto prawdopodobny błąd pisowni\n",
"Context: ... * It should be a integer value. * @return the value of sampleField */\n",
"\n",
"Error: Wykryto prawdopodobny błąd pisowni\n",
"Context: ...ould be a integer value. * @return the value of sampleField */\n",
"\n",
"Error: Wykryto prawdopodobny błąd pisowni\n",
"Context: ... be a integer value. * @return the value of sampleField */\n",
"\n",
"Error: Wykryto prawdopodobny błąd pisowni\n",
"Context: ...integer value. * @return the value of sampleField */\n",
"\n",
"Error: Nietypowa kombinacja małych i dużych liter. Czy nie powinno być: \"samplefield\"?\n",
"Context: ...eger value. * @return the value of sampleField */\n",
"\n"
]
}
],
"source": [ "source": [
"import re\n",
"from language_tool_python import LanguageTool\n",
"\n",
"def correct_java_grammar(java_file_path):\n", "def correct_java_grammar(java_file_path):\n",
" return []" " tool = LanguageTool('pl-PL')\n",
"\n",
" with open(java_file_path, 'r', encoding='utf-8') as file:\n",
" java_code = file.read()\n",
"\n",
" single_line_comments = re.findall(r'//.*', java_code)\n",
" javadocs = re.findall(r'/\\*\\*.*?\\*/', java_code, re.DOTALL)\n",
" \n",
" all_comments = single_line_comments + javadocs\n",
"\n",
" errors = []\n",
" for comment in all_comments:\n",
" matches = tool.check(comment)\n",
" errors.extend(matches)\n",
"\n",
" return errors\n",
"\n",
"java_file_path = 'java/sample_class.java'\n",
"errors = correct_java_grammar(java_file_path)\n",
"\n",
"for error in errors:\n",
" print(f\"Error: {error.message}\\nContext: {error.context}\\n\")\n"
] ]
} }
], ],
@ -170,7 +445,7 @@
"name": "python", "name": "python",
"nbconvert_exporter": "python", "nbconvert_exporter": "python",
"pygments_lexer": "ipython3", "pygments_lexer": "ipython3",
"version": "3.8.10" "version": "3.11.9"
}, },
"subtitle": "15. Korekta gramatyczna", "subtitle": "15. Korekta gramatyczna",
"title": "Komputerowe wspomaganie tłumaczenia", "title": "Komputerowe wspomaganie tłumaczenia",

BIN
lab/local_data/.DS_Store vendored Normal file

Binary file not shown.

View File

@ -0,0 +1,63 @@
B. ASSISTANCE OF A LAWYER /RIGHT TO PROVISION OF LEGAL ADVICE
You have the right to speak confidentially to a lawyer.
A lawyer is independent from the police.
If you need help to get in contact with a lawyer, the police must help you.
Ask the police for more information.
According to the Rights of Persons who are Arrested and Detained Law (sections 3(1)(c), 3(2)(a) and (4), 8(3)(b), 10, 12(1) and (3), 14(2):
• Immediately after arrest, a person arrested by a member of the Police has the right to call personally a lawyer of their own choice, without any other person being present during the phonecall.
• In the event that a person is arrested, who is obviously not able to exercise the above-mentioned right to communication due to any intellectual or physical disability, such person shall be entitled to exercise said right with the assistance and/or in the presence of an official from the medical or social services of the state; such assistance must be made available to the arrested person immediately once this is practically feasible after the arrest.
• In any case, immediately after arrest or, in the event that the arrest did not take place inside the police station, immediately after the person's admission to the station, a list which includes the names and telephone numbers of all lawyers registered in the “Register of Practicing Advocates” must be made available to the person arrested.
• Any person arrested may submit a written request to the Court, which shall examine whether the person is entitled to provision of pro-bono services of a lawyer.
C. INTERPRETATION AND TRANSLATION
If you do not speak or understand the language spoken by the police or other competent authorities, you have the right to be assisted by an interpreter, free of charge.
The interpreter may help you to talk to your lawyer and must keep the content of that communication confidential.
Moreover, you have the right to translation, free of charge, of all essential documents (arrest and/or detention warrant, bill of charge or indictment, any court judgment and order within the framework of the proceedings and any other document the competent authority may deem as essential either of their own motion or following a justified request made by you or your lawyer).
In some circumstances, you may be provided with an oral translation and/or summary of the essential documents.
According to the Rights of Persons who are Arrested and Detained Law (sections 7(2), 12(4) and 27(3):
D. RIGHT TO REMAIN SILENT
While being questioned by the police or other competent authorities, you do not have to answer questions about the alleged offence.
Your lawyer can help you to decide on that.
E. ACCESS TO DOCUMENTS AND CHALLENGE OF THE LEGALITY OF THE ARREST OR DETENTION.
When you are arrested and detained, you (or your lawyer) have the right to access essential documents (copy of the arrest and detention warrant, copy of the application and the affidavit on the basis of which the warrant was issued) which you need to challenge the legality of the arrest or detention.
If your case goes to court, you (or your lawyer) have the right to access the material evidence and documents gathered during the investigation of the case concerning the criminal offence brought before the Court.
F. INFORMING SOMEONE ELSE ABOUT YOUR ARREST OR DETENTION / INFORMING YOUR CONSULATE OR EMBASSY
When you are arrested or detained, you should tell the police if you wish to call someone personally in order to inform them of your detention, for example a family member or your employer.
In certain cases, the right to inform another person of your detention may be temporarily restricted.
In such cases, the police will inform you of this.
According to the Rights of Persons who are Arrested and Detained Law (sections 3(2)(b),(3) and (4), 5(1)(α) and (2):
When you are arrested or detained, you have the right to urgent medical assistance.
Please let the police know if you are in need of such assistance.
According to the Rights of Persons who are Arrested and Detained Law (sections 23, 24, 26, 27):
The costs for the medical examination, treatment and surveillance to be carried out by a doctor of the detained person's choice, shall be borned by the latter.
• In the event that the detained person desires to exercise the abovementioned right, he/she may do so by filling in and signing a relevant form and handing it over to a member of the police.
The detained person shall be entitled to keep a copy of said form, once the member of the police confirms receipt by signing it (Annex B).
H. PERIOD OF DEPRIVATION OF LIBERTY
After your arrest, you may be deprived of liberty or detained for a maximum period of twenty four (24) hours from the arrest, until you are brought before a judge.
At the end of that period, you must either be released or be heard by a judge who will decide on the continuation of your detention.
Ask your lawyer or the judge for information about the possibility to challenge the legality of your arrest, or the submission of application for provisional release.
I. VISITING RIGHTS
According to the Rights of Persons who are Arrested and Detained Law (sections 12(1) and 16):
• Every detained person and any relative or other person of their choice and, in the case of a detained person under the age of eighteen, his/her parents or guardians are entitled to meet daily for up to one hour overall in a private area in the detention centre, in the presence of a member of the police.
(c) live in a cell of a reasonable size, where the basic comforts and hygiene conditions are provided, sufficient lighting and ventilation, and appropriate resting equipment.
• In any case, a detained woman shall have the right to:
(a) if she is breastfeeding, continue breastfeeding in a private room in the detention centre and, if she so desires, the baby may stay with her in the cell at her own expenses;
(b) the necessary personal hygiene products or, she may purchase at her own expenses personal hygiene products of her choice;
N. EXERCISE OF RIGHTS
In the event that you do not ask to exercise some of your rights mentioned above, you shall not be deprived of the right to do so at a later point in time during your detention.
Time……………………………..
Signature of the detainee…………………...
PART Β
• Breakfast 7am - 9am
• Lunch 12.00 - 2 pm
• Dinner
• Bed time by 10 pm
• Mobile phones, money, jewellery, medicine, matches, lighters, laces, belts, ties, razors and any other dangerous items are not permitted within the cells.
• You may be provided with medicines only upon prescription or instructions given by a doctor.
• Alcoholic beverages are not permitted.
• Should you not wish to have the food provided by the police, you may obtain food at your own expenses provided that you have informed the manager of the detention centre in a timely manner and following inspection of the food for security purposes.
• It is your obligation to keep the space of your detention clean.
• It is not permitted to cause nuisance.
• In the event of deliberately causing damage in the detention centre, you shall be subject to criminal prosecution and you shall bear any restoration costs.
This shall be displayed in every cell in the Greek, English, and Turkish language, and a copy must be given to every detained person in a language they understand.
Date …………………… Time…………………….

View File

@ -0,0 +1,63 @@
B. POMOC ADWOKATA / PRAWO DO PORADY PRAWNEJ
Masz prawo do poufnej rozmowy z adwokatem.
Adwokat jest niezależny od policji.
Policja udzieli Ci pomocy w skontaktowaniu się z adwokatem.
Poproś policję o dodatkowe informacje.
Zgodnie z Ustawą dot. praw osób zatrzymanych i przebywających w areszcie (Art. 3(1)(c), 3(2)(a) oraz (4), 8(3)(b), 10, 12(1) oraz (3), 14(2):
Osoba zatrzymana przez funkcjonariusza Policji, zaraz po jej zatrzymaniu, ma prawo do kontaktu telefonicznego osobiście z wybranym przez siebie adwokatem, na osobności, bez obecności jakiejkolwiek osoby trzeciej.
W przypadku gdy zatrzymywana jest osoba, która w sposób ewidentny, nie jest w stanie z powodu jakiejkolwiek niepełnosprawności fizycznej lub umysłowej, skorzystać samodzielnie z prawa do kontaktu, o którym mowa powyżej, może z niego skorzystać z pomocą lub w obecności pracownika publicznych służb medycznych lub społecznych, którego powinna mieć do swej dyspozycji po jej aresztowaniu, zaraz jak tylko będzie to praktycznie możliwe.
W każdym przypadku udostępnia się zatrzymanemu, zaraz po jego zatrzymaniu, a w przypadku, gdy zatrzymania dokonuje się poza posterunkiem policji zaraz po wejściu na teren posterunku, listę nazwisk i numerów telefonów wszystkich adwokatów, którzy figurują w Rejestrze adwokatów wykonujących zawód .
Osoba zatrzymana może złożyć pisemny wniosek w Sądzie, który rozpatrzy czy osoba ta ma prawo do nieodpłatnie świadczonych usług adwokackich.
C. TŁUMACZENIE USTNE I PISEMNE
Jeżeli nie mówisz językiem, którym posługuje się policja lub inne właściwe organy, lub go nie rozumiesz, masz prawo do bezpłatnej pomocy tłumacza ustnego.
Tłumacz ustny może Ci pomóc w rozmowie z adwokatem i musi zachować poufność tej rozmowy.
Ponadto, masz prawo do nieodpłatnego tłumaczenia pisemnego wszystkich istotnych dokumentów (nakazu zatrzymania i/lub aresztowania, aktu oskarżenia, wszelkich decyzji sądowych i wyroku w ramach postępowania oraz jakiegokolwiek innego dokumentu, ocenionego jako istotny przez właściwy organ lub z urzędu, lub na uzasadniony wniosek Twój lub Twojego adwokata).
W niektórych okolicznościach możesz uzyskać tłumaczenie ustne i/lub streszczenie w formie ustnej istotnych dokumentów.
Zgodnie z Ustawą dotyczącą praw osób zatrzymanych i przebywających w areszcie (Art. 7(2), 12(4)oraz 27(3):
D. PRAWO DO ODMOWY SKŁADANIA WYJAŚNIEŃ
Podczas przesłuchania przez policję lub inne właściwe organy nie musisz odpowiadać na pytania dotyczące zarzucanego przestępstwa.
Twój adwokat pomoże Ci w podjęciu decyzji.
E. DOSTĘP DO DOKUMENTÓW ORAZ ZAKWESTIONOWANIE ZGODNOŚCI Z PRAWEM ZATRZYMANIA LUB ARESZTOWANIA
Gdy jesteś zatrzymany i aresztowany, Ty lub Twój adwokat macie prawo dostępu do istotnych dokumentów (kopii nakazu zatrzymania i/lub aresztowania, kopii wniosku i zaprzysiężonego oświadczenia, na podstawie których został wydany nakaz), które Ci potrzebne, aby zakwestionować zgodność z prawem Twojego zatrzymania lub aresztowania.
Jeżeli Twoja sprawa zostanie przekazana do sądu, Ty lub Twój adwokat będziecie mieli prawo dostępu do zeznań i dokumentów zebranych w trakcie przeprowadzonego śledztwa w sprawie toczącego się rozpoznawania czynu karalnego.
F. POINFORMOWANIE OSÓB TRZECICH O TWOIM ZATRZYMANIU LUB ARESZTOWANIU/POINFORMOWANIE TWOJEGO KONSULATU LUB AMBASADY
Gdy jesteś zatrzymany lub aresztowany, powinieneś zgłosić policji, jeżeli chcesz, aby ktoś - na przykład członek rodziny lub Twój pracodawca - został poinformowany o Twoim aresztowaniu.
W niektórych przypadkach prawo do poinformowania osoby trzeciej o Twoim aresztowaniu może być tymczasowo ograniczone.
W takich przypadkach policja poinformuje Cię o tym.
Zgodnie z Ustawą dotyczącą praw osób zatrzymanych i przebywających w areszcie (Art. 3(2)(b), (3) oraz (4), 5(1)(a) i (2):
Gdy jesteś zatrzymany lub aresztowany, masz prawo do pilnej pomocy medycznej.
Poinformuj policję, jeżeli potrzebujesz takiej pomocy.
Zgodnie z Ustawą dotyczącą praw osób zatrzymanych i przebywających w areszcie (Art. 23, 24, 26 i 27):
Koszty badania lekarskiego, opieki lub kontroli lekarskiej przez wybranego przez siebie lekarza ponosi zatrzymany.
W przypadku, kiedy zatrzymany chce skorzystać z przysługującego mu, wyżej wymienionego prawa, może to uczynić wypełniając i podpisując sporządzony w tym celu formularz i przekazując go funkcjonariuszowi policji.
Zatrzymany ma prawo do zachowania kopii tego formularza, którego odbiór powinien uprzednio potwierdzić funkcjonariusz policji przez złożenie na nim swojego podpisu (Załącznik B).
H. OKRES POZBAWIENIA WOLNOŚCI
Po zatrzymaniu możesz być pozbawiony wolności przez maksymalny okres 24 godzin (od chwili zatrzymania do momentu stanięcia przed sędzią).
Po upływie tego okresu musisz zostać zwolniony albo przesłuchany przez sędziego, który podejmie decyzję o dalszym okresie aresztowania.
Poproś adwokata lub sędziego o informacje dotyczące możliwości zakwestionowania zatrzymania lub wystąpienia o zwolnienie tymczasowe.
I. PRAWO DO WIDZEŃ
Zgodnie z Ustawą dotyczącą praw osób zatrzymanych i przebywających w areszcie (Art. 12(1) I 16):
Każdy zatrzymany ma prawo do widzeń z jakimkolwiek krewnym lub inną wybraną przez siebie osobą, a w przypadku zatrzymanego w wieku poniżej lat osiemnastu, z jego rodzicami lub opiekunami, codziennie, przez maksymalnie jedną godzinę łącznie w odosobnionym miejscu aresztu, w obecności funkcjonariusza Policji.
Zgodnie z Ustawą dotyczącą praw osób zatrzymanych i przebywających w areszcie (Art. 19, 20, 21 I 22) każdy aresztowany ma prawo do:
Zatrzymani przebywają w oddzielnych celach od tych, w których przebywają zatrzymani płci odmiennej.
Każda zatrzymana kobieta ma prawo do traktowania i zachowania nie stanowiącego bezpośredniego lub pośredniego molestowania seksualnego lub uwłaczania jej godności.
W każdym przypadku zatrzymana kobieta ma prawo:
N. KORZYSTANIE Z PRAW
W przypadku, jeżeli nie poprosiłeś/ o skorzystanie z któregokolwiek z wymienionych powyżej przysługujących Ci praw, nie tracisz prawa do skorzystania z niego później, w jakimkolwiek czasie, w trakcie twojego pobytu w areszcie.
Godzina ..........................
Podpis osoby aresztowanej ............................
CZĘŚĆ Β'
Godziny widzeń - od: godz.
9.00 rano do najpóźniej na godzinę przed zachodem słońca.
Śniadanie:
Cisza nocna:
Wewnątrz celi niedozwolone są: telefony komórkowe, pieniądze, biżuteria, leki, zapałki, zapalniczki, sznurówki, paski, krawaty, golarki i inne niebezpieczne przedmioty.
Leki mogą być podawane wyłączenie na receptę lub polecenie lekarza.
Niedozwolone napoje alkoholowe.
Jeżeli nie chesz spożywać posiłków oferowanych przez policję, możliwe jest zaopatrzenie się w żywność za własne środki, pod warunkiem zgłoszenia z odpowiednim wyprzedzeniem i sprawdzenia tej żywności, dla celów bezpieczeństwa.
Nie zanieczyszczanie pomieszczenia aresztu stanowi Twój obowiązek.
Nie zezwala się na uciążliwe zachowanie.
Celowe spowodowanie szkody w pomieszczeniu aresztu powoduje wszczęcie postępowania karnego i obciążenie kosztami naprawy.
Να αναρτάται σε κάθε κελί στην Ελληνική , Τουρκική και Αγγλική γλώσσα και αντίγραφο να δίδεται στον κρατούμενο σε γλώσσα καταληπτή από τον ίδιο.
131 - Załącznik

View File

@ -0,0 +1,69 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE cesAlign PUBLIC "-//CES//DTD XML cesAlign//EN" "http://www.xces.org/dtd/xcesAlign.dtd">
<cesAlign version="1.0">
<linkGrp targType="s" fromDoc="en/17.xml.gz" toDoc="pl/17.xml.gz">
<link xtargets="1;1" />
<link xtargets="2;2" />
<link xtargets="3;3" />
<link xtargets="4;4" />
<link xtargets="5;5" />
<link xtargets="6;6" />
<link xtargets="7;7" />
<link xtargets="8;8" />
<link xtargets="9;9" />
<link xtargets="10;10" />
<link xtargets="11;11" />
<link xtargets="12;12" />
<link xtargets="13;13" />
<link xtargets="14;14" />
<link xtargets="15;15" />
<link xtargets="16;16" />
<link xtargets="17;17" />
<link xtargets="18;18" />
<link xtargets="19;19" />
<link xtargets="20;20" />
<link xtargets="21;21" />
<link xtargets="22;22" />
<link xtargets="23;23" />
<link xtargets="24;24" />
<link xtargets="25;25" />
<link xtargets="26;26" />
<link xtargets="27;27" />
<link xtargets="28;28" />
<link xtargets="29;29" />
<link xtargets="30;30" />
<link xtargets="31;31" />
<link xtargets="32;32" />
<link xtargets="33;33" />
<link xtargets="34;34" />
<link xtargets="35;35" />
<link xtargets="36;36" />
<link xtargets="37;37" />
<link xtargets="38;38" />
<link xtargets="39;39" />
<link xtargets="40;40" />
<link xtargets="41;41" />
<link xtargets="42;42" />
<link xtargets="43;43" />
<link xtargets="44;44" />
<link xtargets="45;45" />
<link xtargets="46;46" />
<link xtargets="47;47" />
<link xtargets="48;48" />
<link xtargets="49;49" />
<link xtargets="50;50" />
<link xtargets="51;51" />
<link xtargets="52;52" />
<link xtargets="53;53" />
<link xtargets="54;54" />
<link xtargets="55;55" />
<link xtargets="56;56" />
<link xtargets="57;57" />
<link xtargets="58;58" />
<link xtargets="59;59" />
<link xtargets="60;60" />
<link xtargets="61;61" />
<link xtargets="62;62" />
</linkGrp>
</cesAlign>

View File

@ -0,0 +1,7 @@
We do not own any of the text from which the data has been extracted.
We only offer files that we believe we are free to redistribute.
If any doubt occurs about the legality of any of our file downloads
we will take them off right away after contacting us.
For more information, please check the information
given on the corpus website: http://opus.nlpl.eu/ELRC-648-Letter_rights_person-v1.php

View File

@ -0,0 +1,15 @@
Corpus Name: ELRC-648-Letter_rights_person
Package: ELRC-648-Letter_rights_person.en-pl in Moses format
Website: http://opus.nlpl.eu/ELRC-648-Letter_rights_person-v1.php
Release: v1
Release date: Sat Nov 26 18:30:52 EET 2022
License: CC-BY-4.0CC-BY-4.0
Copyright: Check details at <a href=https://elrc-share.eu/repository/browse/letter-of-rights-for-persons-arrested-and-or-detained-processed/0102395e050811e8b7d400155d026706483f8695e5e94dc5beb5b835e17725bb/>ELRC share</a>Check details at <a href=https://www.elrc-share.eu>ELRC share</a>
This package is part of OPUS - the open collection of parallel corpora
OPUS Website: http://opus.nlpl.eu
Please acknowledge the <a href=https://elrc-share.eu/repository/browse/letter-of-rights-for-persons-arrested-and-or-detained-processed/0102395e050811e8b7d400155d026706483f8695e5e94dc5beb5b835e17725bb/>original sources and providers</a> of the data and also <a href=http://opus.lingfil.uu.se/LREC2012.txt>cite the following article</a> if you use the OPUS packages and downloads in your own work:<br/> J. Tiedemann, 2012, <a href=http://www.lrec-conf.org/proceedings/lrec2012/pdf/463_Paper.pdf><i>Parallel Data, Tools and Interfaces in OPUS.</i></a> In Proceedings of the 8th International Conference on Language Resources and Evaluation (LREC 2012)
Letter of rights for persons arrested and or detained (Processed)
ELRC-648-Letter_rights_persons_arrested_or_detained is a public data set distributed by the https://www.elrc-share.eu

View File

@ -0,0 +1,24 @@
Your consent would speed up the proceedings.
ASSISTANCE OF A LAWYER
A lawyer is independent from the police.
It may be difficult or even impossible to change this decision at a later stage.
POSSIBILITY TO CONSENT
PERIOD OF DEPRIVATION OF LIBERTY
Ask the police for more information.
INTERPRETATION AND TRANSLATION
You may in some circumstances be provided with an oral translation or summary.
INFORMATION ABOUT THE EUROPEAN ARREST WARRANT
If you do not speak or understand the language spoken by the police or other competent authorities, you have the right to be assisted by an interpreter, free of charge.
You may consent or not consent to being surrendered to the State seeking you.
You have been arrested on the basis of a European Arrest Warrant and you have the following rights:
Ask the authorities or your lawyer for more information.
You have the right to a translation of the European Arrest Warrant in a language you understand.
The interpreter may help you to talk to your lawyer and must keep the content of that communication confidential.
You have the right to speak confidentially to a lawyer.
If you do not consent to your surrender, you have the right to be heard by a judicial authority.
Following your arrest, you will be brought as soon as possible and in any case within twenty four (24) hours before the competent District Judge.
You have the right to be informed about the content of the European Arrest Warrant on the basis of which you have been arrested.
LETTER OF RIGHTS FOR PERSONS ARRESTED ON THE BASIS OF A EUROPEAN ARREST WARRANT
Ask the police if you need help to get in contact with a lawyer, the police shall help you.
HEARING

View File

@ -0,0 +1,24 @@
Twoja zgoda przyspieszyłaby postępowanie.
POMOC ZE STRONY ADWOKATA
Adwokat jest niezależny od policji.
Zaznacza się, że niemożliwa jest zmiana tej decyzji na późniejszym etapie.
MOŻLIWOŚĆ WYRAŻENIA ZGODY
OKRES POZBAWIENIA WOLNOŚCI
Poproś policję o dodatkowe informacje.
TŁUMACZENIE USTNE I PISEMNE
W niektórych okolicznościach możesz uzyskać tłumaczenie lub streszczenie w formie ustnej.
INFORMACJE O EUROPEJSKIM NAKAZIE ARESZTOWANIA
Jeżeli nie mówisz danym językiem, którym posługuje się policja lub inne właściwe organy, lub go nie rozumiesz, masz prawo do bezpłatnej pomocy tłumacza ustnego.
Możesz wyrazić zgodę lub odmówić wyrażenia zgody na przekazanie państwu członkowskiemu, które Cię poszukuje.
Zostałeś zatrzymany na podstawie europejskiego nakazu aresztowania i masz następujące prawa:
Poproś organy lub adwokata o dodatkowe informacje.
Masz prawo do tłumaczenia pisemnego europejskiego nakazu aresztowania w języku, który rozumiesz.
Tłumacz ustny może Ci pomóc w rozmowie z adwokatem i musi zachować poufność tej rozmowy.
Masz prawo do poufnej rozmowy z adwokatem.
Jeżeli nie zgadzasz się na wydanie, masz prawo do przesłuchania przez organ sądowy.
24 godzin, zostaniesz doprowadzony i staniesz przed właściwym Sędzią Okręgowym.
Masz prawo do otrzymania informacji o treści europejskiego nakazu aresztowania, na podstawie którego zostałeś zatrzymany.
POUCZENIE O PRAWACH DLA OSÓB ZATRZYMANYCH NA PODSTAWIE EUROPEJSKIEGO NAKAZU ARESZTOWANIA
Policja ma obowiązek pomóc Ci , jeżeli potrzebujesz pomocy w skontaktowaniu się z adwokatem.
E. PRZESŁUCHANIE

View File

@ -0,0 +1,30 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE cesAlign PUBLIC "-//CES//DTD XML cesAlign//EN" "http://www.xces.org/dtd/xcesAlign.dtd">
<cesAlign version="1.0">
<linkGrp targType="s" fromDoc="en/33.xml.gz" toDoc="pl/33.xml.gz">
<link xtargets="1;1" />
<link xtargets="2;2" />
<link xtargets="3;3" />
<link xtargets="4;4" />
<link xtargets="5;5" />
<link xtargets="6;6" />
<link xtargets="7;7" />
<link xtargets="8;8" />
<link xtargets="9;9" />
<link xtargets="10;10" />
<link xtargets="11;11" />
<link xtargets="12;12" />
<link xtargets="13;13" />
<link xtargets="14;14" />
<link xtargets="15;15" />
<link xtargets="16;16" />
<link xtargets="17;17" />
<link xtargets="18;18" />
<link xtargets="19;19" />
<link xtargets="20;20" />
<link xtargets="21;21" />
<link xtargets="22;22" />
<link xtargets="23;23" />
</linkGrp>
</cesAlign>

View File

@ -0,0 +1,7 @@
We do not own any of the text from which the data has been extracted.
We only offer files that we believe we are free to redistribute.
If any doubt occurs about the legality of any of our file downloads
we will take them off right away after contacting us.
For more information, please check the information
given on the corpus website: http://opus.nlpl.eu/ELRC-403-Rights_Arrested-v1.php

View File

@ -0,0 +1,15 @@
Corpus Name: ELRC-403-Rights_Arrested
Package: ELRC-403-Rights_Arrested.en-pl in Moses format
Website: http://opus.nlpl.eu/ELRC-403-Rights_Arrested-v1.php
Release: v1
Release date: Sat Nov 26 15:15:14 EET 2022
License: CC-BY-4.0CC-BY-4.0
Copyright: Check details at <a href=https://elrc-share.eu/repository/browse/letter-of-rights-for-persons-arrested-on-the-basis-of-a-european-arrest-warrant-processed/4bc6efc86d1b11e7b7d400155d02670635cb9c208fbd46439728df5d8d651a85/>ELRC share</a>Check details at <a href=https://elrc-share.eu/repository/browse/letter-of-rights-for-persons-arrested-on-the-basis-of-a-european-arrest-warrant-processed/4bc6efc86d1b11e7b7d400155d02670635cb9c208fbd46439728df5d8d651a85/>ELRC share</a>
This package is part of OPUS - the open collection of parallel corpora
OPUS Website: http://opus.nlpl.eu
Please acknowledge the <a href=https://elrc-share.eu/repository/browse/letter-of-rights-for-persons-arrested-on-the-basis-of-a-european-arrest-warrant-processed/4bc6efc86d1b11e7b7d400155d02670635cb9c208fbd46439728df5d8d651a85/>original sources and providers</a> of the data and also <a href=http://opus.lingfil.uu.se/LREC2012.txt>cite the following article</a> if you use the OPUS packages and downloads in your own work:<br/> J. Tiedemann, 2012, <a href=http://www.lrec-conf.org/proceedings/lrec2012/pdf/463_Paper.pdf><i>Parallel Data, Tools and Interfaces in OPUS.</i></a> In Proceedings of the 8th International Conference on Language Resources and Evaluation (LREC 2012)
Letter of rights for persons arrested on the basis of a European Arrest Warrant (Processed)
ELRC-403-Rights_Arrested is a public data set distributed by the https://www.elrc-share.eu

View File

@ -0,0 +1,21 @@
You have been arrested on the basis of a European Arrest Warrant and you have the following rights:
PERIOD OF DEPRIVATION OF LIBERTY
INFORMATION ABOUT THE EUROPEAN ARREST WARRANT
You have the right to be informed about the content of the European Arrest Warrant on the basis of which you have been arrested.
ASSISTANCE OF A LAWYER
You have the right to speak confidentially to a lawyer.
A lawyer is independent from the police.
Ask the police if you need help to get in contact with a lawyer, the police shall help you.
Ask the police for more information.
INTERPRETATION AND TRANSLATION
If you do not speak or understand the language spoken by the police or other competent authorities, you have the right to be assisted by an interpreter, free of charge.
The interpreter may help you to talk to your lawyer and must keep the content of that communication confidential.
You have the right to a translation of the European Arrest Warrant in a language you understand.
You may in some circumstances be provided with an oral translation or summary.
POSSIBILITY TO CONSENT
You may consent or not consent to being surrendered to the State seeking you.
Your consent would speed up the proceedings.
It may be difficult or even impossible to change this decision at a later stage.
Ask the authorities or your lawyer for more information.
HEARING
If you do not consent to your surrender, you have the right to be heard by a judicial authority.

View File

@ -0,0 +1,21 @@
ΕΓΓΡΑΦΟ ΙΚΑΙΩΜΑΤΩΝ ΓΙΑ ΣΥΛΛΗΦΘΕΝΤΕΣ ΒΑΣΕΙ ΕΥΡΩΠΑΪΚΟΥ ΕΝΤΑΛΜΑΤΟΣ ΣΥΛΛΗΨΗΣ
OKRES POZBAWIENIA WOLNOŚCI
INFORMACJE O EUROPEJSKIM NAKAZIE ARESZTOWANIA
Masz prawo do otrzymania informacji o treści europejskiego nakazu aresztowania, na podstawie którego zostałeś zatrzymany.
POMOC ZE STRONY ADWOKATA
Masz prawo do poufnej rozmowy z adwokatem.
Adwokat jest niezależny od policji.
Policja ma obowiązek pomóc Ci , jeżeli potrzebujesz pomocy w skontaktowaniu się z adwokatem.
Poproś policję o dodatkowe informacje.
TŁUMACZENIE USTNE I PISEMNE
Jeżeli nie mówisz danym językiem, którym posługuje się policja lub inne właściwe organy, lub go nie rozumiesz, masz prawo do bezpłatnej pomocy tłumacza ustnego.
Tłumacz ustny może Ci pomóc w rozmowie z adwokatem i musi zachować poufność tej rozmowy.
Masz prawo do tłumaczenia pisemnego europejskiego nakazu aresztowania w języku, który rozumiesz.
W niektórych okolicznościach możesz uzyskać tłumaczenie lub streszczenie w formie ustnej.
MOŻLIWOŚĆ WYRAŻENIA ZGODY
Możesz wyrazić zgodę lub odmówić wyrażenia zgody na przekazanie państwu członkowskiemu, które Cię poszukuje.
Twoja zgoda przyspieszyłaby postępowanie.
Zaznacza się, że niemożliwa jest zmiana tej decyzji na późniejszym etapie.
Poproś organy lub adwokata o dodatkowe informacje.
E. PRZESŁUCHANIE
Jeżeli nie zgadzasz się na wydanie, masz prawo do przesłuchania przez organ sądowy.

View File

@ -0,0 +1,27 @@
<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE cesAlign PUBLIC "-//CES//DTD XML cesAlign//EN" "">
<cesAlign version="1.0">
<linkGrp targType="s" fromDoc="en/112.en-pl.xml.gz" toDoc="pl/112.en-pl.xml.gz">
<link xtargets="1;1" />
<link xtargets="2;2" />
<link xtargets="3;3" />
<link xtargets="4;4" />
<link xtargets="5;5" />
<link xtargets="6;6" />
<link xtargets="7;7" />
<link xtargets="8;8" />
<link xtargets="9;9" />
<link xtargets="10;10" />
<link xtargets="11;11" />
<link xtargets="12;12" />
<link xtargets="13;13" />
<link xtargets="14;14" />
<link xtargets="15;15" />
<link xtargets="16;16" />
<link xtargets="17;17" />
<link xtargets="18;18" />
<link xtargets="19;19" />
<link xtargets="20;20" />
<link xtargets="21;21" />
</linkGrp>
</cesAlign>

View File

@ -0,0 +1,247 @@
Attribution 4.0 International
Official translations of this license are available in other languages.
Creative Commons Corporation (“Creative Commons”) is not a law firm and does not provide
legal services or legal advice. Distribution of Creative Commons public licenses does not create
a lawyer-client or other relationship. Creative Commons makes its licenses and related
information available on an “as-is” basis. Creative Commons gives no warranties regarding its
licenses, any material licensed under their terms and conditions, or any related information.
Creative Commons disclaims all liability for damages resulting from their use to the fullest
extent possible.
Using Creative Commons Public Licenses
Creative Commons public licenses provide a standard set of terms and conditions that creators
and other rights holders may use to share original works of authorship and other material subject
to copyright and certain other rights specified in the public license below. The following
considerations are for informational purposes only, are not exhaustive, and do not form part of
our licenses.
Considerations for licensors: Our public licenses are intended for use by those authorized to give the
public permission to use material in ways otherwise restricted by copyright and certain other rights. Our
licenses are irrevocable. Licensors should read and understand the terms and conditions of the license
they choose before applying it. Licensors should also secure all rights necessary before applying our
licenses so that the public can reuse the material as expected. Licensors should clearly mark any
material not subject to the license. This includes other CC-licensed material, or material used under an
exception or limitation to copyright. More considerations for licensors.
Considerations for the public: By using one of our public licenses, a licensor grants the public permission
to use the licensed material under specified terms and conditions. If the licensors permission is not
necessary for any reasonfor example, because of any applicable exception or limitation to copyright
then that use is not regulated by the license. Our licenses grant only permissions under copyright and
certain other rights that a licensor has authority to grant. Use of the licensed material may still be
restricted for other reasons, including because others have copyright or other rights in the material. A
licensor may make special requests, such as asking that all changes be marked or described. Although
not required by our licenses, you are encouraged to respect those requests where reasonable. More
considerations for the public.
Creative Commons Attribution 4.0 International Public License
By exercising the Licensed Rights (defined below), You accept and agree to be bound by the
terms and conditions of this Creative Commons Attribution 4.0 International Public License
("Public License"). To the extent this Public License may be interpreted as a contract, You are
granted the Licensed Rights in consideration of Your acceptance of these terms and conditions,
and the Licensor grants You such rights in consideration of benefits the Licensor receives from
making the Licensed Material available under these terms and conditions.
Section 1 Definitions.
a. Adapted Material means material subject to Copyright and Similar Rights that is derived from or
based upon the Licensed Material and in which the Licensed Material is translated, altered,
arranged, transformed, or otherwise modified in a manner requiring permission under the
Copyright and Similar Rights held by the Licensor. For purposes of this Public License, where the
Licensed Material is a musical work, performance, or sound recording, Adapted Material is
always produced where the Licensed Material is synched in timed relation with a moving image.
b. Adapter's License means the license You apply to Your Copyright and Similar Rights in Your
contributions to Adapted Material in accordance with the terms and conditions of this Public
License.
c. Copyright and Similar Rights means copyright and/or similar rights closely related to copyright
including, without limitation, performance, broadcast, sound recording, and Sui Generis
Database Rights, without regard to how the rights are labeled or categorized. For purposes of
this Public License, the rights specified in Section 2(b)(1)-(2) are not Copyright and Similar Rights.
d. Effective Technological Measures means those measures that, in the absence of proper
authority, may not be circumvented under laws fulfilling obligations under Article 11 of the
WIPO Copyright Treaty adopted on December 20, 1996, and/or similar international
agreements.
e. Exceptions and Limitations means fair use, fair dealing, and/or any other exception or limitation
to Copyright and Similar Rights that applies to Your use of the Licensed Material.
f. Licensed Material means the artistic or literary work, database, or other material to which the
Licensor applied this Public License.
g. Licensed Rights means the rights granted to You subject to the terms and conditions of this
Public License, which are limited to all Copyright and Similar Rights that apply to Your use of the
Licensed Material and that the Licensor has authority to license.
h. Licensor means the individual(s) or entity(ies) granting rights under this Public License.
i. Share means to provide material to the public by any means or process that requires permission
under the Licensed Rights, such as reproduction, public display, public performance,
distribution, dissemination, communication, or importation, and to make material available to
the public including in ways that members of the public may access the material from a place
and at a time individually chosen by them.
j. Sui Generis Database Rights means rights other than copyright resulting from Directive 96/9/EC
of the European Parliament and of the Council of 11 March 1996 on the legal protection of
databases, as amended and/or succeeded, as well as other essentially equivalent rights
anywhere in the world.
k. You means the individual or entity exercising the Licensed Rights under this Public License. Your
has a corresponding meaning.
Section 2 Scope.
a. License grant.
1. Subject to the terms and conditions of this Public License, the Licensor hereby grants
You a worldwide, royalty-free, non-sublicensable, non-exclusive, irrevocable license to
exercise the Licensed Rights in the Licensed Material to:
A. reproduce and Share the Licensed Material, in whole or in part; and
B. produce, reproduce, and Share Adapted Material.
2. Exceptions and Limitations. For the avoidance of doubt, where Exceptions and
Limitations apply to Your use, this Public License does not apply, and You do not need to
comply with its terms and conditions.
3. Term. The term of this Public License is specified in Section 6(a).
4. Media and formats; technical modifications allowed. The Licensor authorizes You to
exercise the Licensed Rights in all media and formats whether now known or hereafter
created, and to make technical modifications necessary to do so. The Licensor waives
and/or agrees not to assert any right or authority to forbid You from making technical
modifications necessary to exercise the Licensed Rights, including technical
modifications necessary to circumvent Effective Technological Measures. For purposes
of this Public License, simply making modifications authorized by this Section 2(a)(4)
never produces Adapted Material.
5. Downstream recipients.
A. Offer from the Licensor Licensed Material. Every recipient of the Licensed
Material automatically receives an offer from the Licensor to exercise the
Licensed Rights under the terms and conditions of this Public License.
B. No downstream restrictions. You may not offer or impose any additional or
different terms or conditions on, or apply any Effective Technological Measures
to, the Licensed Material if doing so restricts exercise of the Licensed Rights by
any recipient of the Licensed Material.
6. No endorsement. Nothing in this Public License constitutes or may be construed as
permission to assert or imply that You are, or that Your use of the Licensed Material is,
connected with, or sponsored, endorsed, or granted official status by, the Licensor or
others designated to receive attribution as provided in Section 3(a)(1)(A)(i).
b. Other rights.
1. Moral rights, such as the right of integrity, are not licensed under this Public License, nor
are publicity, privacy, and/or other similar personality rights; however, to the extent
possible, the Licensor waives and/or agrees not to assert any such rights held by the
Licensor to the limited extent necessary to allow You to exercise the Licensed Rights, but
not otherwise.
2. Patent and trademark rights are not licensed under this Public License.
3. To the extent possible, the Licensor waives any right to collect royalties from You for the
exercise of the Licensed Rights, whether directly or through a collecting society under
any voluntary or waivable statutory or compulsory licensing scheme. In all other cases
the Licensor expressly reserves any right to collect such royalties.
Section 3 License Conditions.
Your exercise of the Licensed Rights is expressly made subject to the following conditions.
a. Attribution.
1. If You Share the Licensed Material (including in modified form), You must:
A. retain the following if it is supplied by the Licensor with the Licensed Material:
i. identification of the creator(s) of the Licensed Material and any others
designated to receive attribution, in any reasonable manner requested
by the Licensor (including by pseudonym if designated);
ii. a copyright notice;
iii. a notice that refers to this Public License;
iv. a notice that refers to the disclaimer of warranties;
v. a URI or hyperlink to the Licensed Material to the extent reasonably
practicable;
B. indicate if You modified the Licensed Material and retain an indication of any
previous modifications; and
C. indicate the Licensed Material is licensed under this Public License, and include
the text of, or the URI or hyperlink to, this Public License.
2. You may satisfy the conditions in Section 3(a)(1) in any reasonable manner based on the
medium, means, and context in which You Share the Licensed Material. For example, it
may be reasonable to satisfy the conditions by providing a URI or hyperlink to a resource
that includes the required information.
3. If requested by the Licensor, You must remove any of the information required by
Section 3(a)(1)(A) to the extent reasonably practicable.
4. If You Share Adapted Material You produce, the Adapter's License You apply must not
prevent recipients of the Adapted Material from complying with this Public License.
Section 4 Sui Generis Database Rights.
Where the Licensed Rights include Sui Generis Database Rights that apply to Your use of the
Licensed Material:
a. for the avoidance of doubt, Section 2(a)(1) grants You the right to extract, reuse, reproduce, and
Share all or a substantial portion of the contents of the database;
b. if You include all or a substantial portion of the database contents in a database in which You
have Sui Generis Database Rights, then the database in which You have Sui Generis Database
Rights (but not its individual contents) is Adapted Material; and
c. You must comply with the conditions in Section 3(a) if You Share all or a substantial portion of
the contents of the database.
For the avoidance of doubt, this Section 4 supplements and does not replace Your obligations under this
Public License where the Licensed Rights include other Copyright and Similar Rights.
Section 5 Disclaimer of Warranties and Limitation of Liability.
a. Unless otherwise separately undertaken by the Licensor, to the extent possible, the Licensor
offers the Licensed Material as-is and as-available, and makes no representations or
warranties of any kind concerning the Licensed Material, whether express, implied, statutory,
or other. This includes, without limitation, warranties of title, merchantability, fitness for a
particular purpose, non-infringement, absence of latent or other defects, accuracy, or the
presence or absence of errors, whether or not known or discoverable. Where disclaimers of
warranties are not allowed in full or in part, this disclaimer may not apply to You.
b. To the extent possible, in no event will the Licensor be liable to You on any legal theory
(including, without limitation, negligence) or otherwise for any direct, special, indirect,
incidental, consequential, punitive, exemplary, or other losses, costs, expenses, or damages
arising out of this Public License or use of the Licensed Material, even if the Licensor has been
advised of the possibility of such losses, costs, expenses, or damages. Where a limitation of
liability is not allowed in full or in part, this limitation may not apply to You.
c. The disclaimer of warranties and limitation of liability provided above shall be interpreted in a
manner that, to the extent possible, most closely approximates an absolute disclaimer and
waiver of all liability.
Section 6 Term and Termination.
a. This Public License applies for the term of the Copyright and Similar Rights licensed here.
However, if You fail to comply with this Public License, then Your rights under this Public License
terminate automatically.
b. Where Your right to use the Licensed Material has terminated under Section 6(a), it
reinstates:
1. automatically as of the date the violation is cured, provided it is cured within 30 days of
Your discovery of the violation; or
2. upon express reinstatement by the Licensor.
For the avoidance of doubt, this Section 6(b) does not affect any right the Licensor may have to
seek remedies for Your violations of this Public License.
c. For the avoidance of doubt, the Licensor may also offer the Licensed Material under separate
terms or conditions or stop distributing the Licensed Material at any time; however, doing so
will not terminate this Public License.
d. Sections 1, 5, 6, 7, and 8 survive termination of this Public License.
Section 7 Other Terms and Conditions.
a. The Licensor shall not be bound by any additional or different terms or conditions
communicated by You unless expressly agreed.
b. Any arrangements, understandings, or agreements regarding the Licensed Material not stated
herein are separate from and independent of the terms and conditions of this Public License.
Section 8 Interpretation.
a. For the avoidance of doubt, this Public License does not, and shall not be interpreted to, reduce,
limit, restrict, or impose conditions on any use of the Licensed Material that could lawfully be
made without permission under this Public License.
b. To the extent possible, if any provision of this Public License is deemed unenforceable, it shall be
automatically reformed to the minimum extent necessary to make it enforceable. If the
provision cannot be reformed, it shall be severed from this Public License without affecting the
enforceability of the remaining terms and conditions.
c. No term or condition of this Public License will be waived and no failure to comply consented to
unless expressly agreed to by the Licensor.
d. Nothing in this Public License constitutes or may be interpreted as a limitation upon, or waiver
of, any privileges and immunities that apply to the Licensor or You, including from the legal
processes of any jurisdiction or authority.
Creative Commons is not a party to its public licenses. Notwithstanding, Creative Commons may
elect to apply one of its public licenses to material it publishes and in those instances will be
considered the “Licensor.” The text of the Creative Commons public licenses is dedicated to the
public domain under the CC0 Public Domain Dedication. Except for the limited purpose of
indicating that material is shared under a Creative Commons public license or as otherwise
permitted by the Creative Commons policies published at creativecommons.org/policies,
Creative Commons does not authorize the use of the trademark “Creative Commons” or any
other trademark or logo of Creative Commons without its prior written consent including,
without limitation, in connection with any unauthorized modifications to any of its public
licenses or any other arrangements, understandings, or agreements concerning use of licensed
material. For the avoidance of doubt, this paragraph does not form part of the public licenses.
Creative Commons may be contacted at creativecommons.org.

View File

@ -0,0 +1,15 @@
Corpus Name: ELRA-W0301
Package: ELRA-W0301.en-pl in Moses format
Website: http://opus.nlpl.eu/ELRA-W0301-v1.php
Release: v1
Release date: Fri May 15 23:31:56 EEST 2020
License: CC-BY-4.0
Copyright: CC-BY-4.0
This corpus is part of OPUS - the open collection of parallel corpora
OPUS Website: http://opus.nlpl.eu
Please cite the following article if you use any part of the corpus in your own work: J. Tiedemann, 2012, Parallel Data, Tools and Interfaces in OPUS. In Proceedings of the 8th International Conference on Language Resources and Evaluation (LREC 2012)
ELRC_403_Letter\ of\ rights\ for\ persons\ arrested
public data set from https://www.elrc-share.eu

1
lab/txt/en.txt Normal file

File diff suppressed because one or more lines are too long

156
lab/txt/hunaligna.txt Normal file
View File

@ -0,0 +1,156 @@
# Sentence pairs
0
# Source
Apple Apple Apple Store Maci Padi Phone Watch Vision Air Pods T V & Home Entertainment Accessories Support 0+ Buy Mac or i Pad for college with education savings Get a gift card up to $150* Only at the Apple Store Shop Buy Mac or i Pad for college with education savings Get Air Pods with Mac* Apple Pencil with i Pad* Only at the Apple Store Shop i Phone Our most powerful cameras yet
# Target
Apple ( Polska) Apple Apple Sklep Maci Padi Phone Watch Air Pods T V i Dom Rozrywka Akcesoria Wsparcie 0+ i Phone 15 Nowy aparat
1
# Source
Ultrafast chips
# Target
Nowa konstrukcja
2
# Source
And U S B- C
# Target
Zachwytologia w praktyce
3
# Source
Learn more Shop i Phone College Students Mac and i Pad
# Target
Dowiedz się więcej Kup i Phone 15 Pro Tytan
4
# Source
Go further
# Target
Taki mocny
5
# Source
Learn more Mac Book Air Lean
# Target
Taki lekki
6
# Source
Mean
# Target
Taki Pro
7
# Source
M3 machine
# Target
Dowiedz się więcej Kup i Pad Pro Ultrasmukłość
8
# Source
Learn more Buy i Pad Air Two sizes
# Target
Megamoc
9
# Source
Faster chip
# Target
Dowiedz się więcej Kup Studenci Mac i i Pad
10
# Source
Does it all
# Target
Możesz więcej
11
# Source
Learn more Buy Mac Book Pro Mind-blowing
# Target
Dowiedz się więcej Mac Book Air Smukły sprinter
12
# Source
Head-turning
# Target
Z grubą mocą M3
13
# Source
Learn more Buy i Pad Pro Unbelievably thin
# Target
Dowiedz się więcej Kup Mac Book Pro Miażdży mocą
14
# Source
Incredibly powerful
# Target
Kusi formą
15
# Source
Learn more Buy i Phone 15 Pro Titanium
# Target
Dowiedz się więcej Kup Apple Watch Series 9 Sprytniej
16
# Source
So strong
# Target
Jaśniej
17
# Source
So light
# Target
Mocniej
18
# Source
So Pro
# Target
Dowiedz się więcej Kup i Pad Air Dwa rozmiary
19
# Source
Learn more Buy Air Pods Pro Adaptive Audio
# Target
Szybszy czip
20
# Source
Now playing
# Target
Wszystko hula
21
# Source
Learn more Buy Apple Card Get up to 3% Daily Cash back with every purchase
# Target
Dowiedz się więcej Kup Air Pods Pro Dźwięk adaptacyjny
22
# Source
Learn more Apply now Apply now Apple Trade In Get $170-$630 in credit when you trade in i Phone 11 or higher.1 Get your estimate Apple T V+ F A M Gallery Play now N B A 2 K24 Arcade Edition Watch now Get Your Shoulders in Great Shape Listen now Todays Country Play now Hello Kitty Island Adventure Watch now H I I T with Anja Listen now R& B Now Apple Footer * Available for Qualified Purchasers only
# Target
Teraz brzmi
23
# Source
Qualified Purchasers receive an Apple Gift Card when they purchase an Eligible Product at a Qualifying Location through September 30, 2024
# Target
Dowiedz się więcej Kup Apple T V+ Zápatí Apple Prezentowane treści są dostępne w apce Apple T V
24
# Source
Gift card values may vary by Eligible Product
# Target
Zestaw oferowanych funkcji może ulec zmianie
25
# Source
Only one Apple Gift Card per Eligible Product per Qualified Purchaser
# Target
Niektóre funkcje, aplikacje i usługi mogą nie być dostępne we wszystkich regionach i językachch
26
# Source
Offer subject to availability
# Target
Kupuj i poznawaj Kupuj i poznawaj Sklep Mac i Pad i Phone Watch Air Pods T V i Dom Air Tag Akcesoria Karty upominkowe Portfel Apple Portfel Apple Apple Pay Konto Konto Obsługa Twojego Apple I D Konto w Apple Store i Cloud.com Rozrywka Rozrywka Apple One Apple T V+ Apple Music Apple Arcade App Store Apple Store Apple Store Aplikacja Apple Store Odnowione i certyfikowane Stan zamówienia Pomoc w zakupach Dla biznesu Dla biznesu Apple i biznes Kupuj do firmy Dla edukacji Dla edukacji Apple i edukacja Zakupy na uczelnię Wartości Apple Wartości Apple Dostępność Prywatność O firmie Apple O firmie Apple Newsroom Apple Leadership Praca Gwarancja Inwestorzy Etyka i zgodność z prawem Kontakt z Apple Inne sposoby zakupu: znajdź sprzedawcę w pobliżu
27
# Source
While supplies last
# Target
Lub zadzwoń pod numer 800 702 322
28
# Source
Qualified Purchasers shall receive a discount equal to the value of the Apple Gift Card off the price of the Eligible Product, but will be charged for all items in their cart, including the Apple Gift Card
# Target
Polska Copyright © 2024 Apple Inc
29
# Source
Important notice regarding the checkout receipt and monthly statement for Apple Card Monthly Installments ( A C M I) purchases with this promotion: Qualified Purchasers selecting A C M I (a 0% A P R payment option available only in the U
# Target
Wszelkie prawa zastrzeżone
30
# Source
S.) as payment type at checkout shall receive a discount equal to the value of the Apple Gift Card off the price of the Eligible Product
# Target
Polityka prywatności Wykorzystanie plików cookie Warunki korzystania Sprzedaż i zwroty Informacje prawne Mapa witryny

2
lab/txt/output.xliff Normal file

File diff suppressed because one or more lines are too long

1
lab/txt/pl.txt Normal file
View File

@ -0,0 +1 @@
Apple ( Polska) Apple Apple Sklep Maci Padi Phone Watch Air Pods T V i Dom Rozrywka Akcesoria Wsparcie 0+ i Phone 15 Nowy aparat. Nowa konstrukcja. Zachwytologia w praktyce. Dowiedz się więcej Kup i Phone 15 Pro Tytan. Taki mocny. Taki lekki. Taki Pro. Dowiedz się więcej Kup i Pad Pro Ultrasmukłość. Megamoc. Dowiedz się więcej Kup Studenci Mac i i Pad. Możesz więcej. Dowiedz się więcej Mac Book Air Smukły sprinter. Z grubą mocą M3. Dowiedz się więcej Kup Mac Book Pro Miażdży mocą. Kusi formą. Dowiedz się więcej Kup Apple Watch Series 9 Sprytniej. Jaśniej. Mocniej. Dowiedz się więcej Kup i Pad Air Dwa rozmiary. Szybszy czip. Wszystko hula. Dowiedz się więcej Kup Air Pods Pro Dźwięk adaptacyjny. Teraz brzmi. Dowiedz się więcej Kup Apple T V+ Zápatí Apple Prezentowane treści są dostępne w apce Apple T V. Zestaw oferowanych funkcji może ulec zmianie. Niektóre funkcje, aplikacje i usługi mogą nie być dostępne we wszystkich regionach i językachch. Kupuj i poznawaj Kupuj i poznawaj Sklep Mac i Pad i Phone Watch Air Pods T V i Dom Air Tag Akcesoria Karty upominkowe Portfel Apple Portfel Apple Apple Pay Konto Konto Obsługa Twojego Apple I D Konto w Apple Store i Cloud.com Rozrywka Rozrywka Apple One Apple T V+ Apple Music Apple Arcade App Store Apple Store Apple Store Aplikacja Apple Store Odnowione i certyfikowane Stan zamówienia Pomoc w zakupach Dla biznesu Dla biznesu Apple i biznes Kupuj do firmy Dla edukacji Dla edukacji Apple i edukacja Zakupy na uczelnię Wartości Apple Wartości Apple Dostępność Prywatność O firmie Apple O firmie Apple Newsroom Apple Leadership Praca Gwarancja Inwestorzy Etyka i zgodność z prawem Kontakt z Apple Inne sposoby zakupu: znajdź sprzedawcę w pobliżu. Lub zadzwoń pod numer 800 702 322. Polska Copyright © 2024 Apple Inc. Wszelkie prawa zastrzeżone. Polityka prywatności Wykorzystanie plików cookie Warunki korzystania Sprzedaż i zwroty Informacje prawne Mapa witryny