diff --git a/lab/lab_01.ipynb b/lab/lab_01.ipynb index 0ffe833..1e612e2 100644 --- a/lab/lab_01.ipynb +++ b/lab/lab_01.ipynb @@ -52,7 +52,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 77, "id": "narrow-romantic", "metadata": {}, "outputs": [], @@ -71,7 +71,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 78, "id": "indonesian-electron", "metadata": {}, "outputs": [], @@ -82,7 +82,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 79, "id": "compact-trinidad", "metadata": {}, "outputs": [ @@ -92,7 +92,7 @@ "['Press the ENTER button']" ] }, - "execution_count": 3, + "execution_count": 79, "metadata": {}, "output_type": "execute_result" } @@ -119,7 +119,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 80, "id": "exposed-daniel", "metadata": {}, "outputs": [], @@ -139,7 +139,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 81, "id": "serial-velvet", "metadata": {}, "outputs": [ @@ -149,7 +149,7 @@ "['Press the ENTER button', 'Press the ENTER key']" ] }, - "execution_count": 5, + "execution_count": 81, "metadata": {}, "output_type": "execute_result" } @@ -176,7 +176,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 82, "id": "every-gibson", "metadata": {}, "outputs": [ @@ -186,7 +186,7 @@ "[]" ] }, - "execution_count": 6, + "execution_count": 82, "metadata": {}, "output_type": "execute_result" } @@ -213,13 +213,15 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 83, "id": "protected-rings", "metadata": {}, "outputs": [], "source": [ "def tm_lookup(sentence):\n", - " return ''" + " lowerSentence = sentence.lower()\n", + "\n", + " return [entry[1] for entry in translation_memory if entry[0].lower() == lowerSentence]" ] }, { @@ -232,17 +234,17 @@ }, { "cell_type": "code", - "execution_count": 18, - "id": "severe-alloy", + "execution_count": 84, + "id": "60a6c976", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "''" + "[]" ] }, - "execution_count": 18, + "execution_count": 84, "metadata": {}, "output_type": "execute_result" } @@ -261,13 +263,21 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 85, "id": "structural-diesel", "metadata": {}, "outputs": [], "source": [ + "import string\n", + "\n", + "def prepare_sentence(sentence):\n", + " translator = str.maketrans('', '', string.punctuation)\n", + "\n", + " return sentence.lower().translate(translator)\n", + "\n", "def tm_lookup(sentence):\n", - " return ''" + " lowerSentence = prepare_sentence(sentence)\n", + " return [entry[1] for entry in translation_memory if prepare_sentence(entry[0]) == lowerSentence]" ] }, { @@ -280,17 +290,17 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 86, "id": "brief-senegal", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "''" + "[]" ] }, - "execution_count": 12, + "execution_count": 86, "metadata": {}, "output_type": "execute_result" } @@ -317,13 +327,43 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 87, "id": "mathematical-customs", "metadata": {}, "outputs": [], "source": [ + "import string\n", + "\n", + "def prepare_sentence(sentence):\n", + " translator = str.maketrans('', '', string.punctuation)\n", + "\n", + " return sentence.lower().translate(translator)\n", + "\n", + "def sentence_similar(sentence1, sentence2):\n", + " words1 = sentence1.split()\n", + " words2 = sentence2.split()\n", + " \n", + " min_length = min(len(words1), len(words2))\n", + " \n", + " matched_count = 0\n", + " for i in range(min_length):\n", + " if prepare_sentence(words1[i]) == prepare_sentence(words2[i]):\n", + " matched_count += 1\n", + " \n", + " return {\n", + " \"count\": matched_count,\n", + " \"length\": min_length\n", + " }\n", + "\n", "def tm_lookup(sentence):\n", - " return ''" + " collection = []\n", + "\n", + " for entry in translation_memory:\n", + " similarity = sentence_similar(sentence, entry[0])\n", + " if similarity[\"length\"] - similarity[\"count\"] <= 1:\n", + " collection.append(entry[1])\n", + "\n", + " return collection\n" ] }, { @@ -344,7 +384,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 88, "id": "humanitarian-wrong", "metadata": {}, "outputs": [], @@ -362,7 +402,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 89, "id": "located-perception", "metadata": {}, "outputs": [], @@ -374,7 +414,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 90, "id": "advised-casting", "metadata": {}, "outputs": [ @@ -384,7 +424,7 @@ "[('przycisk', 'button'), ('drukarka', 'printer')]" ] }, - "execution_count": 17, + "execution_count": 90, "metadata": {}, "output_type": "execute_result" } @@ -406,7 +446,7 @@ "id": "defensive-fifteen", "metadata": {}, "source": [ - "Odpowiedź:" + "Odpowiedź: Jest to n przeszukiwań po liście m-elementowej co daje złozonosc O(n*m)" ] }, { @@ -419,13 +459,40 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 91, "id": "original-tunisia", "metadata": {}, "outputs": [], "source": [ + "def prepare_sentence(sentence):\n", + " return sentence.lower()\n", + "\n", "def glossary_lookup(sentence):\n", - " return ''" + " sentence_words = sentence.split()\n", + " lowered_words = list(map(prepare_sentence, sentence_words))\n", + "\n", + " return [entry for entry in glossary if prepare_sentence(entry[0]) in lowered_words]" + ] + }, + { + "cell_type": "code", + "execution_count": 92, + "id": "df948bb3", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[('komputer', 'computer')]" + ] + }, + "execution_count": 92, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "glossary_lookup(\"Komputer\")" ] }, { @@ -438,13 +505,44 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 93, "id": "adolescent-semiconductor", "metadata": {}, "outputs": [], "source": [ + "def prepare_dictionary(sentences):\n", + " dict = {}\n", + "\n", + " for entry in sentences:\n", + " dict[entry[0].lower()] = entry\n", + "\n", + " return dict\n", + "\n", + "glossary_dict = prepare_dictionary(glossary)\n", + "\n", "def glossary_lookup(sentence):\n", - " return ''" + " return glossary_dict[sentence.lower()]" + ] + }, + { + "cell_type": "code", + "execution_count": 94, + "id": "98e9ff56", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "('komputer', 'computer')" + ] + }, + "execution_count": 94, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "glossary_lookup(\"Komputer\")" ] } ], @@ -467,7 +565,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.10" + "version": "3.7.9" }, "subtitle": "1. Podstawowe techniki wspomagania tłumaczenia", "title": "Komputerowe wspomaganie tłumaczenia", diff --git a/lab/lab_02.ipynb b/lab/lab_02.ipynb index 10c2003..d4490ae 100644 --- a/lab/lab_02.ipynb +++ b/lab/lab_02.ipynb @@ -57,7 +57,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 92, "id": "confident-prison", "metadata": {}, "outputs": [], @@ -80,15 +80,51 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 93, "id": "continental-submission", "metadata": {}, "outputs": [], "source": [ - "def ice_lookup(sentence, prev_sentence, next_sentence):\n", + "def prepare_dictionary(sentences):\n", + " dict = {}\n", + "\n", + " for entry in sentences:\n", + " dict[entry[0].lower()] = entry\n", + "\n", + " return dict\n", + "\n", + "memory_dict = prepare_dictionary(translation_memory)\n", + "\n", + "def ice_lookup(input, prev_sentence, next_sentence): \n", + " sentence = input.lower()\n", + "\n", + " if prev_sentence.lower() in memory_dict and next_sentence.lower() in memory_dict and sentence in memory_dict:\n", + " return memory_dict[sentence]\n", + "\n", " return []" ] }, + { + "cell_type": "code", + "execution_count": 94, + "id": "bdc1df76", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "('Wciśnij przycisk Enter', 'Press the ENTER button')" + ] + }, + "execution_count": 94, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ice_lookup(\"Wciśnij przycisk Enter\", \"Sprawdź ustawienia sieciowe\", \"Drukarka jest wyłączona\")" + ] + }, { "cell_type": "markdown", "id": "figured-server", @@ -119,7 +155,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 95, "id": "fourth-pillow", "metadata": {}, "outputs": [], @@ -141,7 +177,11 @@ "id": "graduate-theorem", "metadata": {}, "source": [ - "Odpowiedź:" + "Odpowiedź: Nie, nie jest poprawna. Cechy:\n", + "- nieujemna (abs > 0)\n", + "- identyfikacja nie jest spelniona -> moga miec taka sama dlugosc, a byc inne\n", + "- symetryczna - wynik z wartosci bezwglednej\n", + "- nierownosc trojkata nie jest spelniona" ] }, { @@ -154,7 +194,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 96, "id": "continued-christopher", "metadata": {}, "outputs": [], @@ -179,7 +219,7 @@ "id": "metallic-leave", "metadata": {}, "source": [ - "Odpowiedź:" + "Odpowiedź: z punktu widzenia cech, wszystkie cechy sa spelnione, jednak funkcja sama w sobie jest bezuyteczna poprzez to, ze wartosci sa stale. " ] }, { @@ -206,7 +246,10 @@ "id": "bibliographic-stopping", "metadata": {}, "source": [ - "Odpowiedź:" + "Odpowiedź: Tak, poniewaz spelnia cechy: \n", + "- nieujemnosci - zawsze dodatni lub zero gdy a i b jest rowny sobie\n", + "- symetria - dystans od a i b jest taki sam jak b i a\n", + "- nierownosc trojkata - dystans od ciągu A do C przez B jest zawsze mniejszy lub równy sumie dystansów od A do B i od B do C" ] }, { @@ -223,7 +266,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 97, "id": "secondary-wrist", "metadata": {}, "outputs": [ @@ -233,7 +276,7 @@ "2" ] }, - "execution_count": 5, + "execution_count": 97, "metadata": {}, "output_type": "execute_result" } @@ -254,7 +297,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 98, "id": "associate-tuner", "metadata": {}, "outputs": [], @@ -273,7 +316,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 99, "id": "focal-pathology", "metadata": {}, "outputs": [ @@ -283,7 +326,7 @@ "0.9166666666666666" ] }, - "execution_count": 7, + "execution_count": 99, "metadata": {}, "output_type": "execute_result" } @@ -294,7 +337,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 100, "id": "roman-ceiling", "metadata": {}, "outputs": [ @@ -304,7 +347,7 @@ "0.9428571428571428" ] }, - "execution_count": 8, + "execution_count": 100, "metadata": {}, "output_type": "execute_result" } @@ -315,7 +358,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 101, "id": "invisible-cambodia", "metadata": {}, "outputs": [ @@ -325,7 +368,7 @@ "0.631578947368421" ] }, - "execution_count": 9, + "execution_count": 101, "metadata": {}, "output_type": "execute_result" } @@ -344,13 +387,61 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 102, "id": "genetic-cradle", "metadata": {}, "outputs": [], "source": [ "def fuzzy_lookup(sentence, threshold):\n", - " return []" + " col = []\n", + "\n", + " for entry in translation_memory:\n", + " if (levenshtein_similarity(entry[0], sentence)) >= threshold:\n", + " col.append(entry)\n", + " \n", + " return col" + ] + }, + { + "cell_type": "code", + "execution_count": 103, + "id": "57fb39b9", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[]" + ] + }, + "execution_count": 103, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fuzzy_lookup('Spróbuj wyłączyć i włączyć komputer', 0.7)" + ] + }, + { + "cell_type": "code", + "execution_count": 104, + "id": "94e1b3be", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[('Wciśnij przycisk Enter', 'Press the ENTER button')]" + ] + }, + "execution_count": 104, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "fuzzy_lookup('Wciśnij przycisk escape', 0.7)" ] } ], @@ -373,7 +464,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.10" + "version": "3.7.9" }, "subtitle": "2. Zaawansowane użycie pamięci tłumaczeń", "title": "Komputerowe wspomaganie tłumaczenia", diff --git a/lab/lab_03.ipynb b/lab/lab_03.ipynb index 5707f0d..4b3b331 100644 --- a/lab/lab_03.ipynb +++ b/lab/lab_03.ipynb @@ -86,7 +86,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 31, "id": "loving-prince", "metadata": {}, "outputs": [], @@ -110,12 +110,12 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 32, "id": "bound-auction", "metadata": {}, "outputs": [], "source": [ - "dictionary = ['program', 'application', 'applet' 'compile']" + "dictionary = ['program', 'application', 'applet', 'compile']" ] }, { @@ -128,13 +128,47 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 33, "id": "cognitive-cedar", "metadata": {}, "outputs": [], "source": [ - "def terminology_lookup():\n", - " return []" + "import re\n", + "\n", + "def terminology_lookup(txt, labels):\n", + " results = []\n", + "\n", + " for label in labels:\n", + " results.append((\n", + " label,\n", + " [(m.start(), m.end() - 1) for m in re.finditer(label, txt)]\n", + " ))\n", + "\n", + " return results" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "id": "7cc3ad1f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[('program', [(14, 20), (291, 297), (468, 474), (516, 522), (533, 539)]),\n", + " ('application', [(80, 90), (164, 174), (322, 332)]),\n", + " ('applet', [(302, 307)]),\n", + " ('compile', [(56, 62)])]" + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "terminology_lookup(text, dictionary)" ] }, { @@ -161,7 +195,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 35, "id": "tribal-attention", "metadata": {}, "outputs": [ @@ -205,7 +239,7 @@ "IDE\n", ",\n", "see\n", - "Running\n", + "run\n", "Tutorial\n", "Examples\n", "in\n", @@ -218,7 +252,7 @@ "work\n", "for\n", "all\n", - "swing\n", + "Swing\n", "program\n", "—\n", "applet\n", @@ -232,7 +266,7 @@ "be\n", "the\n", "step\n", - "-PRON-\n", + "you\n", "need\n", "to\n", "follow\n", @@ -248,7 +282,7 @@ "platform\n", ",\n", "if\n", - "-PRON-\n", + "you\n", "have\n", "not\n", "already\n", @@ -260,7 +294,7 @@ "program\n", "that\n", "use\n", - "Swing\n", + "swing\n", "component\n", ".\n", "compile\n", @@ -302,7 +336,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 36, "id": "surgical-demonstration", "metadata": {}, "outputs": [], @@ -337,7 +371,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 37, "id": "superb-butterfly", "metadata": {}, "outputs": [], @@ -356,7 +390,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 38, "id": "acting-tolerance", "metadata": {}, "outputs": [], @@ -374,7 +408,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 39, "id": "eight-redhead", "metadata": {}, "outputs": [], @@ -393,7 +427,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 40, "id": "monetary-mambo", "metadata": {}, "outputs": [], @@ -422,7 +456,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.10" + "version": "3.7.9" }, "subtitle": "3. Terminologia", "title": "Komputerowe wspomaganie tłumaczenia",