From 622860d71e921f83cf701e8996d98c5c89975188 Mon Sep 17 00:00:00 2001 From: Adam Stelmaszyk Date: Sat, 27 Apr 2024 19:50:45 +0300 Subject: [PATCH] finshed 4 exercices --- lab/lab_06-07.ipynb | 257 +++++++++++++++----------------------------- 1 file changed, 86 insertions(+), 171 deletions(-) diff --git a/lab/lab_06-07.ipynb b/lab/lab_06-07.ipynb index c218573..7e715de 100644 --- a/lab/lab_06-07.ipynb +++ b/lab/lab_06-07.ipynb @@ -164,7 +164,7 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 77, "id": "beautiful-mathematics", "metadata": {}, "outputs": [ @@ -177,38 +177,48 @@ " 'month': 4,\n", " 'year': 2024},\n", " {'position': (61, 71),\n", - " 'date_format': 'RRRR/MM/DD',\n", - " 'day': 2024,\n", + " 'date_format': 'DD/MM/RRRR',\n", + " 'day': 12,\n", " 'month': 4,\n", - " 'year': 20},\n", + " 'year': 2024},\n", " {'position': (41, 51),\n", - " 'date_format': 'RRRR-MM-DD',\n", - " 'day': 2024,\n", + " 'date_format': 'DD-MM-RRRR',\n", + " 'day': 2,\n", + " 'month': 12,\n", + " 'year': 2023},\n", + " {'position': (13, 23),\n", + " 'date_format': 'DD.MM.RRRR',\n", + " 'day': 12,\n", " 'month': 4,\n", - " 'year': 20},\n", + " 'year': 2024},\n", + " {'position': (41, 51),\n", + " 'date_format': 'DD.MM.RRRR',\n", + " 'day': 2,\n", + " 'month': 12,\n", + " 'year': 2023},\n", + " {'position': (61, 71),\n", + " 'date_format': 'DD.MM.RRRR',\n", + " 'day': 12,\n", + " 'month': 4,\n", + " 'year': 2024},\n", " {'position': (13, 21),\n", - " 'date_format': 'MM/DD/RR',\n", + " 'date_format': 'DD/MM/RR',\n", " 'day': 12,\n", " 'month': 4,\n", " 'year': 20},\n", - " {'position': (63, 71),\n", - " 'date_format': 'MM/DD/RR',\n", - " 'day': 24,\n", + " {'position': (61, 69),\n", + " 'date_format': 'DD/MM/RR',\n", + " 'day': 12,\n", " 'month': 4,\n", " 'year': 20},\n", - " {'position': (74, 82),\n", - " 'date_format': 'MM/DD/RR',\n", - " 'day': 4,\n", - " 'month': 12,\n", - " 'year': 24},\n", " {'position': (88, 96),\n", - " 'date_format': 'MM/DD/RR',\n", - " 'day': 4,\n", - " 'month': 20,\n", + " 'date_format': 'DD/MM/RR',\n", + " 'day': 20,\n", + " 'month': 4,\n", " 'year': 24}]" ] }, - "execution_count": 35, + "execution_count": 77, "metadata": {}, "output_type": "execute_result" } @@ -218,17 +228,17 @@ " patterns = [\n", " r'(\\d{2})/(\\d{2})/(\\d{4})', # DD/MM/RRRR\n", " r'(\\d{2})-(\\d{2})-(\\d{4})', # DD-MM-RRRR\n", - " r'(\\d{4})/(\\d{2})/(\\d{2})', # RRRR/MM/DD\n", - " r'(\\d{4})-(\\d{2})-(\\d{2})', # RRRR-MM-DD\n", - " r'(\\d{2})/(\\d{2})/(\\d{2})' # MM/DD/RR\n", + " r'(\\d{2}).(\\d{2}).(\\d{4})', # DD.MM.RRRR\n", + " r'(\\d{2}) (\\d{2}) (\\d{4})', # DD MM RRRR\n", + " r'(\\d{2})/(\\d{2})/(\\d{2})' # DD/MM/RR\n", " ]\n", "\n", " date_formats = [\n", " \"DD/MM/RRRR\",\n", " \"DD-MM-RRRR\",\n", - " \"RRRR/MM/DD\",\n", - " \"RRRR-MM-DD\",\n", - " \"MM/DD/RR\"\n", + " \"DD.MM.RRRR\",\n", + " \"DD MM RRRR\",\n", + " \"DD/MM/RR\"\n", " ]\n", "\n", " results = []\n", @@ -245,7 +255,7 @@ "\n", " return results\n", "\n", - "text = \"Data dsadasdj12/04/2024 oraz dnasjdjasndj2024-04-20, jeszcze 2024/04/20 i 04/12/24 oraz 04/20/24.\"\n", + "text = \"Data dsadasdj12/04/2024 oraz dnasjdjasndj02-12-2023, jeszcze 12/04/2024 i 04.12.24 oraz 20/04/24.\"\n", "\n", "find_dates(text)\n" ] @@ -272,115 +282,14 @@ ] }, { - "cell_type": "code", - "execution_count": 5, - "id": "finished-essex", + "cell_type": "markdown", + "id": "dc46baa6", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[{'position': (9, 19), 'date_format': 'DD/MM/RRRR', 'day': 20, 'month': 4, 'year': 2024}, {'position': (41, 51), 'date_format': 'DD/MM/RRRR', 'day': 21, 'month': 4, 'year': 2024}, {'position': (9, 17), 'date_format': 'MM/DD/RR', 'day': 20, 'month': 4, 'year': 20}, {'position': (41, 49), 'date_format': 'MM/DD/RR', 'day': 21, 'month': 4, 'year': 20}]\n", - "[{'position': (22, 32), 'date_format': 'DD/MM/RRRR', 'day': 20, 'month': 4, 'year': 2024}, {'position': (54, 64), 'date_format': 'DD/MM/RRRR', 'day': 21, 'month': 4, 'year': 2024}, {'position': (22, 30), 'date_format': 'MM/DD/RR', 'day': 20, 'month': 4, 'year': 20}, {'position': (54, 62), 'date_format': 'MM/DD/RR', 'day': 21, 'month': 4, 'year': 20}]\n", - "Translated text (Europe format): Aujourd'hui, c'est le 20/04/2024 et demain ce sera le 21/04/2024.\n" - ] - } - ], - "source": [ - "# def correct_dates(source_segment, target_segment, date_format):\n", - "# return ''\n", - "\n", - "from queue import Full\n", - "import re\n", - "\n", - "def find_dates(text):\n", - " patterns = [\n", - " r'(\\d{2})/(\\d{2})/(\\d{4})', # DD/MM/RRRR\n", - " r'(\\d{2})-(\\d{2})-(\\d{4})', # DD-MM-RRRR\n", - " r'(\\d{4})/(\\d{2})/(\\d{2})', # RRRR/MM/DD\n", - " r'(\\d{4})-(\\d{2})-(\\d{2})', # RRRR-MM-DD\n", - " r'(\\d{2})/(\\d{2})/(\\d{2})' # MM/DD/RR\n", - " ]\n", - "\n", - " date_formats = [\n", - " \"DD/MM/RRRR\",\n", - " \"DD-MM-RRRR\",\n", - " \"RRRR/MM/DD\",\n", - " \"RRRR-MM-DD\",\n", - " \"MM/DD/RR\"\n", - " ]\n", - "\n", - " results = []\n", - " for pattern, date_format in zip(patterns, date_formats):\n", - " for match in re.finditer(pattern, text):\n", - " day, month, year = match.groups()\n", - " results.append({\n", - " \"position\": match.span(),\n", - " \"date_format\": date_format,\n", - " \"day\": int(day),\n", - " \"month\": int(month),\n", - " \"year\": int(year)\n", - " })\n", - "\n", - " return results\n", - "\n", - "def translate_dates(source_text, target_text, target_format):\n", - " source_dates = find_dates(source_text)\n", - " print(source_dates)\n", - " target_dates = find_dates(target_text)\n", - " print(target_dates)\n", - "\n", - " if len(source_dates) != len(target_dates):\n", - " print(\"Uwaga: Rózna liczba dat\")\n", - " return\n", - "\n", - " for source_date, target_date in zip(source_dates, target_dates):\n", - " # if source_date[\"day\"] != target_date[\"day\"]:\n", - " if f\"{source_date['day']}/{source_date['month']}/{source_date['year']}\" != f\"{target_date['day']}/{target_date['month']}/{target_date['year']}\":\n", - " print(\"Uwaga: Daty są rózne\")\n", - " return\n", - " \n", - " \n", - "\n", - " # translated_text = target_text\n", - " # for target_date in target_dates:\n", - " replacement = ''\n", - " pattern = ''\n", - " if target_format == \"Europe\":\n", - " # new_date = f\"{target_date['day']}/{target_date['month']}/{target_date['year']}\"\n", - " replacement = r'\\1/\\2/\\3'\n", - " pattern = r'(\\b\\d{2})\\.(\\d{2})\\.(\\d{4}\\b)'\n", - " elif target_format == \"US\":\n", - " # new_date = f\"{target_date['month']}/{target_date['day']}/{target_date['year']}\"\n", - " replacement = r'\\1/\\2/\\3'\n", - " pattern = r'(\\b\\d{2})\\.(\\d{2})\\.(\\d{4}\\b)'\n", - " elif target_format == \"digit-dot\":\n", - " replacement = r'\\1.\\2.\\3'\n", - " pattern = r'(\\b\\d{2})\\.(\\d{2})\\.(\\d{4}\\b)'\n", - " else:\n", - " print(\"Uwaga: Niewspierany format.\")\n", - " return\n", - "\n", - " # translated_text = translated_text[:target_date[\"position\"][0]] + new_date + translated_text[target_date[\"position\"][1]:]\n", - " # print(translated_text)\n", - " # translated_text = translated_text[:target_date[\"position\"][0]] + new_date + translated_text[target_date[\"position\"][1]:]\n", - "\n", - " return re.sub(pattern, replacement, target_text)\n", - "\n", - "# Example usage:\n", - "source_text = \"Today is 20/04/2024 and tomorrow will be 21/04/2024.\"\n", - "target_text = \"Aujourd'hui, c'est le 20/04/2024 et demain ce sera le 21/04/2024.\"\n", - "translated_text = translate_dates(source_text, target_text, \"Europe\")\n", - "if translated_text != None:\n", - " print(\"Translated text (Europe format):\", translated_text)\n", - "\n", - "\n" - ] + "source": [] }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 73, "id": "4ee148d5", "metadata": {}, "outputs": [ @@ -388,10 +297,10 @@ "name": "stdout", "output_type": "stream", "text": [ - "[{'position': (9, 19), 'date_format': 'DD/MM/RRRR', 'day': 20, 'month': 4, 'year': 2024}, {'position': (41, 51), 'date_format': 'DD/MM/RRRR', 'day': 21, 'month': 4, 'year': 2024}, {'position': (9, 17), 'date_format': 'MM/DD/RR', 'day': 20, 'month': 4, 'year': 20}, {'position': (41, 49), 'date_format': 'MM/DD/RR', 'day': 21, 'month': 4, 'year': 20}]\n", - "[{'position': (54, 64), 'date_format': 'DD/MM/RRRR', 'day': 21, 'month': 4, 'year': 2024}, {'position': (22, 32), 'date_format': 'RRRR/MM/DD', 'day': 2024, 'month': 4, 'year': 20}, {'position': (24, 32), 'date_format': 'MM/DD/RR', 'day': 24, 'month': 4, 'year': 20}, {'position': (54, 62), 'date_format': 'MM/DD/RR', 'day': 21, 'month': 4, 'year': 20}]\n", - "Uwaga: Daty są różne\n", - "Translated text (Europe format): Aujourd'hui, c'est le 2024/04/20 et demain ce sera le 21/04/2024.\n" + "[{'position': (54, 64), 'date_format': '\\\\1/\\\\2/\\\\3', 'day': 21, 'month': 4, 'year': 2024}, {'position': (22, 32), 'date_format': '\\\\1.\\\\2.\\\\3', 'day': 21, 'month': 4, 'year': 2021}, {'position': (54, 64), 'date_format': '\\\\1.\\\\2.\\\\3', 'day': 21, 'month': 4, 'year': 2024}, {'position': (22, 32), 'date_format': '\\\\1 \\\\2 \\\\3', 'day': 21, 'month': 4, 'year': 2021}, {'position': (54, 62), 'date_format': '\\\\1/\\\\2/\\\\3', 'day': 21, 'month': 4, 'year': 20}]\n", + "\n", + "\n", + "Translated text (Europe format): Aujourd'hui, c'est le 21.04.2021 et demain ce sera le 21.04.2024.\n" ] } ], @@ -399,23 +308,25 @@ "from queue import Full\n", "import re\n", "\n", - "def find_dates(text):\n", - " patterns = [\n", + "date_formats = [\n", + " r'\\1/\\2/\\3',\n", + " r'\\1-\\2-\\3',\n", + " r'\\1.\\2.\\3',\n", + " r'\\1 \\2 \\3',\n", + " r'\\1/\\2/\\3',\n", + " ]\n", + "\n", + "patterns = [\n", " r'(\\d{2})/(\\d{2})/(\\d{4})', # DD/MM/RRRR\n", " r'(\\d{2})-(\\d{2})-(\\d{4})', # DD-MM-RRRR\n", - " r'(\\d{4})/(\\d{2})/(\\d{2})', # RRRR/MM/DD\n", - " r'(\\d{4})-(\\d{2})-(\\d{2})', # RRRR-MM-DD\n", - " r'(\\d{2})/(\\d{2})/(\\d{2})' # MM/DD/RR\n", + " r'(\\d{2}).(\\d{2}).(\\d{4})', # DD.MM.RRRR\n", + " r'(\\d{2}) (\\d{2}) (\\d{4})', # DD MM RRRR\n", + " r'(\\d{2})/(\\d{2})/(\\d{2})' # DD/MM/RR\n", " ]\n", "\n", - " date_formats = [\n", - " \"DD/MM/RRRR\",\n", - " \"DD-MM-RRRR\",\n", - " \"RRRR/MM/DD\",\n", - " \"RRRR-MM-DD\",\n", - " \"MM/DD/RR\"\n", - " ]\n", "\n", + "def find_dates(text):\n", + " \n", " results = []\n", " for pattern, date_format in zip(patterns, date_formats):\n", " for match in re.finditer(pattern, text):\n", @@ -430,42 +341,46 @@ "\n", " return results\n", "\n", - "def format_date(day, month, year):\n", - " formatted_day = f\"{day:02d}\" # Add leading zero if day has single digit\n", - " formatted_month = f\"{month:02d}\" # Add leading zero if month has single digit\n", - " return formatted_day, formatted_month, str(year)\n", "\n", "def translate_dates(source_text, target_text, target_format):\n", " source_dates = find_dates(source_text)\n", - " print(source_dates)\n", " target_dates = find_dates(target_text)\n", " print(target_dates)\n", + " print('\\n')\n", "\n", " if len(source_dates) != len(target_dates):\n", - " print(\"Uwaga: Różna liczba dat\")\n", - " return target_text\n", + " print(\"Uwaga: Rózna liczba dat\")\n", + " return\n", "\n", " for source_date, target_date in zip(source_dates, target_dates):\n", - " source_date_str = f\"{source_date['day']}/{source_date['month']}/{source_date['year']}\"\n", - " target_date_str = f\"{target_date['day']}/{target_date['month']}/{target_date['year']}\"\n", - " if source_date_str != target_date_str:\n", - " print(\"Uwaga: Daty są różne\")\n", - " return target_text\n", + " if f\"{source_date['day']}/{source_date['month']}/{source_date['year']}\" != f\"{target_date['day']}/{target_date['month']}/{target_date['year']}\":\n", + " print(\"Uwaga: Daty są rózne\")\n", + " return\n", + " \n", + " replacement = ''\n", + " if target_format == \"Europe\":\n", + " replacement = r'\\1/\\2/\\3'\n", + " elif target_format == \"US\":\n", + " replacement = r'\\1/\\2/\\3'\n", + " elif target_format == \"digit-dot\":\n", + " replacement = r'\\1.\\2.\\3'\n", + " else:\n", + " print(\"Uwaga: Niewspierany format.\")\n", + " return\n", "\n", - " # Translate dates to the desired format\n", - " translated_text = target_text\n", - " for target_date in target_dates:\n", - " formatted_day, formatted_month, formatted_year = format_date(target_date['day'], target_date['month'], target_date['year'])\n", - " replacement = f\"{formatted_day}/{formatted_month}/{formatted_year}\"\n", - " translated_text = translated_text[:target_date[\"position\"][0]] + replacement + translated_text[target_date[\"position\"][1]:]\n", - " print(target_date)\n", + " \n", "\n", - " return translated_text\n", + " final_text = target_text\n", + "\n", + " for pattern in patterns:\n", + " final_text = re.sub(pattern, replacement, final_text)\n", + "\n", + " return final_text\n", "\n", "# Example usage:\n", - "source_text = \"Today is 20/04/2024 and tomorrow will be 21/04/2024.\"\n", - "target_text = \"Aujourd'hui, c'est le 2024/04/20 et demain ce sera le 21/04/2024.\"\n", - "translated_text = translate_dates(source_text, target_text, \"Europe\")\n", + "source_text = \"Today is 21 04 2021 and tomorrow will be 21/04/2024.\"\n", + "target_text = \"Aujourd'hui, c'est le 21 04 2021 et demain ce sera le 21/04/2024.\"\n", + "translated_text = translate_dates(source_text, target_text, \"digit-dot\")\n", "print(\"Translated text (Europe format):\", translated_text)\n", "\n", "\n",