From 22d22f8f7f7472f677f12835e46ce1ee9bf9872e Mon Sep 17 00:00:00 2001 From: Adam Stelmaszyk Date: Sat, 13 Apr 2024 13:33:54 +0200 Subject: [PATCH] first task --- lab/lab_01.ipynb | 182 +++++++++++++++++++++++++++++++++++++---------- lab/lab_02.ipynb | 14 +++- 2 files changed, 159 insertions(+), 37 deletions(-) diff --git a/lab/lab_01.ipynb b/lab/lab_01.ipynb index 0ffe833..7ed3630 100644 --- a/lab/lab_01.ipynb +++ b/lab/lab_01.ipynb @@ -52,7 +52,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 2, "id": "narrow-romantic", "metadata": {}, "outputs": [], @@ -71,7 +71,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 3, "id": "indonesian-electron", "metadata": {}, "outputs": [], @@ -82,7 +82,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 4, "id": "compact-trinidad", "metadata": {}, "outputs": [ @@ -92,7 +92,7 @@ "['Press the ENTER button']" ] }, - "execution_count": 3, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } @@ -119,7 +119,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 5, "id": "exposed-daniel", "metadata": {}, "outputs": [], @@ -139,7 +139,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 6, "id": "serial-velvet", "metadata": {}, "outputs": [ @@ -149,7 +149,7 @@ "['Press the ENTER button', 'Press the ENTER key']" ] }, - "execution_count": 5, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -176,7 +176,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 9, "id": "every-gibson", "metadata": {}, "outputs": [ @@ -186,7 +186,7 @@ "[]" ] }, - "execution_count": 6, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -213,13 +213,26 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 11, "id": "protected-rings", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "['Press the ENTER button', 'Press the ENTER key']" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "def tm_lookup(sentence):\n", - " return ''" + " return [entry[1] for entry in translation_memory if entry[0].lower() == sentence.lower()]\n", + "\n", + "tm_lookup('Wciśnij przycisk ENTER')" ] }, { @@ -232,17 +245,17 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 8, "id": "severe-alloy", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "''" + "[]" ] }, - "execution_count": 18, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -261,13 +274,29 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 16, "id": "structural-diesel", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "['Press the ENTER button', 'Press the ENTER key']" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ + "import string\n", + "\n", "def tm_lookup(sentence):\n", - " return ''" + " sentence = sentence.translate(str.maketrans('', '', string.punctuation))\n", + " return [entry[1] for entry in translation_memory if entry[0].lower() == sentence.lower()]\n", + "\n", + "tm_lookup('Wciśnij przycisk [ENTER]')" ] }, { @@ -280,17 +309,17 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 17, "id": "brief-senegal", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "''" + "[]" ] }, - "execution_count": 12, + "execution_count": 17, "metadata": {}, "output_type": "execute_result" } @@ -317,13 +346,66 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 26, "id": "mathematical-customs", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Suggestion:\n" + ] + }, + { + "data": { + "text/plain": [ + "['System restart required']" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ + "def differenceThanNotBiggerThanOneElement(firstSentence, secondSentence):\n", + " firstSentenceList = firstSentence.lower().split()\n", + " secondSentenceList = secondSentence.lower().split()\n", + "\n", + " diffNumber = 0\n", + "\n", + " for i in range(len(firstSentenceList)):\n", + " if(firstSentenceList[i] != secondSentenceList[i]):\n", + " diffNumber=diffNumber+1\n", + " if(diffNumber > 2):\n", + " return False\n", + "\n", + " return True\n", + "\n", "def tm_lookup(sentence):\n", - " return ''" + " sentence = sentence.translate(str.maketrans('', '', string.punctuation))\n", + "\n", + " exactMatchList = [entry[1] for entry in translation_memory if entry[0].lower() == sentence.lower()]\n", + "\n", + " if(len(exactMatchList) == 0):\n", + " diffMatchList = [entry[1] for entry in translation_memory if differenceThanNotBiggerThanOneElement(entry[0], sentence)]\n", + "\n", + " if(len(diffMatchList) > 0):\n", + " print('Suggestion:')\n", + " return diffMatchList\n", + "\n", + " else:\n", + " return exactMatchList\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + " \n", + "\n", + "tm_lookup('Wymagane ponowne uruchomienie maszyny')" ] }, { @@ -344,7 +426,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 27, "id": "humanitarian-wrong", "metadata": {}, "outputs": [], @@ -362,7 +444,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 28, "id": "located-perception", "metadata": {}, "outputs": [], @@ -374,7 +456,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 29, "id": "advised-casting", "metadata": {}, "outputs": [ @@ -384,7 +466,7 @@ "[('przycisk', 'button'), ('drukarka', 'printer')]" ] }, - "execution_count": 17, + "execution_count": 29, "metadata": {}, "output_type": "execute_result" } @@ -406,7 +488,7 @@ "id": "defensive-fifteen", "metadata": {}, "source": [ - "Odpowiedź:" + "Odpowiedź: O(n * m)" ] }, { @@ -419,13 +501,27 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 34, "id": "original-tunisia", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "[]" + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "def glossary_lookup(sentence):\n", - " return ''" + " sentence_words = sentence.lower().split()\n", + " return [entry for entry in glossary if entry[0] in sentence_words]\n", + "\n", + "glossary_lookup('Każda Drukarka posiada Przycisk wznowienia drukowania')" ] }, { @@ -438,13 +534,27 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 35, "id": "adolescent-semiconductor", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "[('przycisk', 'button'), ('drukarka', 'printer')]" + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "def glossary_lookup(sentence):\n", - " return ''" + " sentence_words = set(sentence.lower().split())\n", + " return [entry for entry in glossary if entry[0] in sentence_words]\n", + "\n", + "glossary_lookup('Każda Drukarka posiada Przycisk wznowienia drukowania')" ] } ], @@ -467,7 +577,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.10" + "version": "3.11.7" }, "subtitle": "1. Podstawowe techniki wspomagania tłumaczenia", "title": "Komputerowe wspomaganie tłumaczenia", diff --git a/lab/lab_02.ipynb b/lab/lab_02.ipynb index 10c2003..3a4d3cf 100644 --- a/lab/lab_02.ipynb +++ b/lab/lab_02.ipynb @@ -86,7 +86,19 @@ "outputs": [], "source": [ "def ice_lookup(sentence, prev_sentence, next_sentence):\n", - " return []" + " sentence = sentence.lower()\n", + " prev_sentence = prev_sentence.lower()\n", + " next_sentence = next_sentence.lower()\n", + " for index in range(len(translation_memory)):\n", + " if index == 0:\n", + " if len(translation_memory) > 1:\n", + " if sentence == translation_memory[index] and next_sentence == translation_memory[index+1]:\n", + " return True\n", + " else if sentence == translation_memory[index]:\n", + " return True\n", + " \n", + " \n", + " # return [entry[1] for entry in translation_memory if entry[0] == sentence]" ] }, {