finshed 4 exercices

This commit is contained in:
Adam Stelmaszyk 2024-04-27 19:50:45 +03:00
parent add3f3c9c2
commit 622860d71e
1 changed files with 86 additions and 171 deletions

View File

@ -164,7 +164,7 @@
},
{
"cell_type": "code",
"execution_count": 35,
"execution_count": 77,
"id": "beautiful-mathematics",
"metadata": {},
"outputs": [
@ -177,38 +177,48 @@
" 'month': 4,\n",
" 'year': 2024},\n",
" {'position': (61, 71),\n",
" 'date_format': 'RRRR/MM/DD',\n",
" 'day': 2024,\n",
" 'date_format': 'DD/MM/RRRR',\n",
" 'day': 12,\n",
" 'month': 4,\n",
" 'year': 20},\n",
" 'year': 2024},\n",
" {'position': (41, 51),\n",
" 'date_format': 'RRRR-MM-DD',\n",
" 'day': 2024,\n",
" 'date_format': 'DD-MM-RRRR',\n",
" 'day': 2,\n",
" 'month': 12,\n",
" 'year': 2023},\n",
" {'position': (13, 23),\n",
" 'date_format': 'DD.MM.RRRR',\n",
" 'day': 12,\n",
" 'month': 4,\n",
" 'year': 20},\n",
" 'year': 2024},\n",
" {'position': (41, 51),\n",
" 'date_format': 'DD.MM.RRRR',\n",
" 'day': 2,\n",
" 'month': 12,\n",
" 'year': 2023},\n",
" {'position': (61, 71),\n",
" 'date_format': 'DD.MM.RRRR',\n",
" 'day': 12,\n",
" 'month': 4,\n",
" 'year': 2024},\n",
" {'position': (13, 21),\n",
" 'date_format': 'MM/DD/RR',\n",
" 'date_format': 'DD/MM/RR',\n",
" 'day': 12,\n",
" 'month': 4,\n",
" 'year': 20},\n",
" {'position': (63, 71),\n",
" 'date_format': 'MM/DD/RR',\n",
" 'day': 24,\n",
" {'position': (61, 69),\n",
" 'date_format': 'DD/MM/RR',\n",
" 'day': 12,\n",
" 'month': 4,\n",
" 'year': 20},\n",
" {'position': (74, 82),\n",
" 'date_format': 'MM/DD/RR',\n",
" 'day': 4,\n",
" 'month': 12,\n",
" 'year': 24},\n",
" {'position': (88, 96),\n",
" 'date_format': 'MM/DD/RR',\n",
" 'day': 4,\n",
" 'month': 20,\n",
" 'date_format': 'DD/MM/RR',\n",
" 'day': 20,\n",
" 'month': 4,\n",
" 'year': 24}]"
]
},
"execution_count": 35,
"execution_count": 77,
"metadata": {},
"output_type": "execute_result"
}
@ -218,17 +228,17 @@
" patterns = [\n",
" r'(\\d{2})/(\\d{2})/(\\d{4})', # DD/MM/RRRR\n",
" r'(\\d{2})-(\\d{2})-(\\d{4})', # DD-MM-RRRR\n",
" r'(\\d{4})/(\\d{2})/(\\d{2})', # RRRR/MM/DD\n",
" r'(\\d{4})-(\\d{2})-(\\d{2})', # RRRR-MM-DD\n",
" r'(\\d{2})/(\\d{2})/(\\d{2})' # MM/DD/RR\n",
" r'(\\d{2}).(\\d{2}).(\\d{4})', # DD.MM.RRRR\n",
" r'(\\d{2}) (\\d{2}) (\\d{4})', # DD MM RRRR\n",
" r'(\\d{2})/(\\d{2})/(\\d{2})' # DD/MM/RR\n",
" ]\n",
"\n",
" date_formats = [\n",
" \"DD/MM/RRRR\",\n",
" \"DD-MM-RRRR\",\n",
" \"RRRR/MM/DD\",\n",
" \"RRRR-MM-DD\",\n",
" \"MM/DD/RR\"\n",
" \"DD.MM.RRRR\",\n",
" \"DD MM RRRR\",\n",
" \"DD/MM/RR\"\n",
" ]\n",
"\n",
" results = []\n",
@ -245,7 +255,7 @@
"\n",
" return results\n",
"\n",
"text = \"Data dsadasdj12/04/2024 oraz dnasjdjasndj2024-04-20, jeszcze 2024/04/20 i 04/12/24 oraz 04/20/24.\"\n",
"text = \"Data dsadasdj12/04/2024 oraz dnasjdjasndj02-12-2023, jeszcze 12/04/2024 i 04.12.24 oraz 20/04/24.\"\n",
"\n",
"find_dates(text)\n"
]
@ -272,115 +282,14 @@
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "finished-essex",
"cell_type": "markdown",
"id": "dc46baa6",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[{'position': (9, 19), 'date_format': 'DD/MM/RRRR', 'day': 20, 'month': 4, 'year': 2024}, {'position': (41, 51), 'date_format': 'DD/MM/RRRR', 'day': 21, 'month': 4, 'year': 2024}, {'position': (9, 17), 'date_format': 'MM/DD/RR', 'day': 20, 'month': 4, 'year': 20}, {'position': (41, 49), 'date_format': 'MM/DD/RR', 'day': 21, 'month': 4, 'year': 20}]\n",
"[{'position': (22, 32), 'date_format': 'DD/MM/RRRR', 'day': 20, 'month': 4, 'year': 2024}, {'position': (54, 64), 'date_format': 'DD/MM/RRRR', 'day': 21, 'month': 4, 'year': 2024}, {'position': (22, 30), 'date_format': 'MM/DD/RR', 'day': 20, 'month': 4, 'year': 20}, {'position': (54, 62), 'date_format': 'MM/DD/RR', 'day': 21, 'month': 4, 'year': 20}]\n",
"Translated text (Europe format): Aujourd'hui, c'est le 20/04/2024 et demain ce sera le 21/04/2024.\n"
]
}
],
"source": [
"# def correct_dates(source_segment, target_segment, date_format):\n",
"# return ''\n",
"\n",
"from queue import Full\n",
"import re\n",
"\n",
"def find_dates(text):\n",
" patterns = [\n",
" r'(\\d{2})/(\\d{2})/(\\d{4})', # DD/MM/RRRR\n",
" r'(\\d{2})-(\\d{2})-(\\d{4})', # DD-MM-RRRR\n",
" r'(\\d{4})/(\\d{2})/(\\d{2})', # RRRR/MM/DD\n",
" r'(\\d{4})-(\\d{2})-(\\d{2})', # RRRR-MM-DD\n",
" r'(\\d{2})/(\\d{2})/(\\d{2})' # MM/DD/RR\n",
" ]\n",
"\n",
" date_formats = [\n",
" \"DD/MM/RRRR\",\n",
" \"DD-MM-RRRR\",\n",
" \"RRRR/MM/DD\",\n",
" \"RRRR-MM-DD\",\n",
" \"MM/DD/RR\"\n",
" ]\n",
"\n",
" results = []\n",
" for pattern, date_format in zip(patterns, date_formats):\n",
" for match in re.finditer(pattern, text):\n",
" day, month, year = match.groups()\n",
" results.append({\n",
" \"position\": match.span(),\n",
" \"date_format\": date_format,\n",
" \"day\": int(day),\n",
" \"month\": int(month),\n",
" \"year\": int(year)\n",
" })\n",
"\n",
" return results\n",
"\n",
"def translate_dates(source_text, target_text, target_format):\n",
" source_dates = find_dates(source_text)\n",
" print(source_dates)\n",
" target_dates = find_dates(target_text)\n",
" print(target_dates)\n",
"\n",
" if len(source_dates) != len(target_dates):\n",
" print(\"Uwaga: Rózna liczba dat\")\n",
" return\n",
"\n",
" for source_date, target_date in zip(source_dates, target_dates):\n",
" # if source_date[\"day\"] != target_date[\"day\"]:\n",
" if f\"{source_date['day']}/{source_date['month']}/{source_date['year']}\" != f\"{target_date['day']}/{target_date['month']}/{target_date['year']}\":\n",
" print(\"Uwaga: Daty są rózne\")\n",
" return\n",
" \n",
" \n",
"\n",
" # translated_text = target_text\n",
" # for target_date in target_dates:\n",
" replacement = ''\n",
" pattern = ''\n",
" if target_format == \"Europe\":\n",
" # new_date = f\"{target_date['day']}/{target_date['month']}/{target_date['year']}\"\n",
" replacement = r'\\1/\\2/\\3'\n",
" pattern = r'(\\b\\d{2})\\.(\\d{2})\\.(\\d{4}\\b)'\n",
" elif target_format == \"US\":\n",
" # new_date = f\"{target_date['month']}/{target_date['day']}/{target_date['year']}\"\n",
" replacement = r'\\1/\\2/\\3'\n",
" pattern = r'(\\b\\d{2})\\.(\\d{2})\\.(\\d{4}\\b)'\n",
" elif target_format == \"digit-dot\":\n",
" replacement = r'\\1.\\2.\\3'\n",
" pattern = r'(\\b\\d{2})\\.(\\d{2})\\.(\\d{4}\\b)'\n",
" else:\n",
" print(\"Uwaga: Niewspierany format.\")\n",
" return\n",
"\n",
" # translated_text = translated_text[:target_date[\"position\"][0]] + new_date + translated_text[target_date[\"position\"][1]:]\n",
" # print(translated_text)\n",
" # translated_text = translated_text[:target_date[\"position\"][0]] + new_date + translated_text[target_date[\"position\"][1]:]\n",
"\n",
" return re.sub(pattern, replacement, target_text)\n",
"\n",
"# Example usage:\n",
"source_text = \"Today is 20/04/2024 and tomorrow will be 21/04/2024.\"\n",
"target_text = \"Aujourd'hui, c'est le 20/04/2024 et demain ce sera le 21/04/2024.\"\n",
"translated_text = translate_dates(source_text, target_text, \"Europe\")\n",
"if translated_text != None:\n",
" print(\"Translated text (Europe format):\", translated_text)\n",
"\n",
"\n"
]
"source": []
},
{
"cell_type": "code",
"execution_count": 16,
"execution_count": 73,
"id": "4ee148d5",
"metadata": {},
"outputs": [
@ -388,10 +297,10 @@
"name": "stdout",
"output_type": "stream",
"text": [
"[{'position': (9, 19), 'date_format': 'DD/MM/RRRR', 'day': 20, 'month': 4, 'year': 2024}, {'position': (41, 51), 'date_format': 'DD/MM/RRRR', 'day': 21, 'month': 4, 'year': 2024}, {'position': (9, 17), 'date_format': 'MM/DD/RR', 'day': 20, 'month': 4, 'year': 20}, {'position': (41, 49), 'date_format': 'MM/DD/RR', 'day': 21, 'month': 4, 'year': 20}]\n",
"[{'position': (54, 64), 'date_format': 'DD/MM/RRRR', 'day': 21, 'month': 4, 'year': 2024}, {'position': (22, 32), 'date_format': 'RRRR/MM/DD', 'day': 2024, 'month': 4, 'year': 20}, {'position': (24, 32), 'date_format': 'MM/DD/RR', 'day': 24, 'month': 4, 'year': 20}, {'position': (54, 62), 'date_format': 'MM/DD/RR', 'day': 21, 'month': 4, 'year': 20}]\n",
"Uwaga: Daty są różne\n",
"Translated text (Europe format): Aujourd'hui, c'est le 2024/04/20 et demain ce sera le 21/04/2024.\n"
"[{'position': (54, 64), 'date_format': '\\\\1/\\\\2/\\\\3', 'day': 21, 'month': 4, 'year': 2024}, {'position': (22, 32), 'date_format': '\\\\1.\\\\2.\\\\3', 'day': 21, 'month': 4, 'year': 2021}, {'position': (54, 64), 'date_format': '\\\\1.\\\\2.\\\\3', 'day': 21, 'month': 4, 'year': 2024}, {'position': (22, 32), 'date_format': '\\\\1 \\\\2 \\\\3', 'day': 21, 'month': 4, 'year': 2021}, {'position': (54, 62), 'date_format': '\\\\1/\\\\2/\\\\3', 'day': 21, 'month': 4, 'year': 20}]\n",
"\n",
"\n",
"Translated text (Europe format): Aujourd'hui, c'est le 21.04.2021 et demain ce sera le 21.04.2024.\n"
]
}
],
@ -399,23 +308,25 @@
"from queue import Full\n",
"import re\n",
"\n",
"def find_dates(text):\n",
" patterns = [\n",
"date_formats = [\n",
" r'\\1/\\2/\\3',\n",
" r'\\1-\\2-\\3',\n",
" r'\\1.\\2.\\3',\n",
" r'\\1 \\2 \\3',\n",
" r'\\1/\\2/\\3',\n",
" ]\n",
"\n",
"patterns = [\n",
" r'(\\d{2})/(\\d{2})/(\\d{4})', # DD/MM/RRRR\n",
" r'(\\d{2})-(\\d{2})-(\\d{4})', # DD-MM-RRRR\n",
" r'(\\d{4})/(\\d{2})/(\\d{2})', # RRRR/MM/DD\n",
" r'(\\d{4})-(\\d{2})-(\\d{2})', # RRRR-MM-DD\n",
" r'(\\d{2})/(\\d{2})/(\\d{2})' # MM/DD/RR\n",
" r'(\\d{2}).(\\d{2}).(\\d{4})', # DD.MM.RRRR\n",
" r'(\\d{2}) (\\d{2}) (\\d{4})', # DD MM RRRR\n",
" r'(\\d{2})/(\\d{2})/(\\d{2})' # DD/MM/RR\n",
" ]\n",
"\n",
" date_formats = [\n",
" \"DD/MM/RRRR\",\n",
" \"DD-MM-RRRR\",\n",
" \"RRRR/MM/DD\",\n",
" \"RRRR-MM-DD\",\n",
" \"MM/DD/RR\"\n",
" ]\n",
"\n",
"def find_dates(text):\n",
" \n",
" results = []\n",
" for pattern, date_format in zip(patterns, date_formats):\n",
" for match in re.finditer(pattern, text):\n",
@ -430,42 +341,46 @@
"\n",
" return results\n",
"\n",
"def format_date(day, month, year):\n",
" formatted_day = f\"{day:02d}\" # Add leading zero if day has single digit\n",
" formatted_month = f\"{month:02d}\" # Add leading zero if month has single digit\n",
" return formatted_day, formatted_month, str(year)\n",
"\n",
"def translate_dates(source_text, target_text, target_format):\n",
" source_dates = find_dates(source_text)\n",
" print(source_dates)\n",
" target_dates = find_dates(target_text)\n",
" print(target_dates)\n",
" print('\\n')\n",
"\n",
" if len(source_dates) != len(target_dates):\n",
" print(\"Uwaga: Różna liczba dat\")\n",
" return target_text\n",
" print(\"Uwaga: Rózna liczba dat\")\n",
" return\n",
"\n",
" for source_date, target_date in zip(source_dates, target_dates):\n",
" source_date_str = f\"{source_date['day']}/{source_date['month']}/{source_date['year']}\"\n",
" target_date_str = f\"{target_date['day']}/{target_date['month']}/{target_date['year']}\"\n",
" if source_date_str != target_date_str:\n",
" print(\"Uwaga: Daty są różne\")\n",
" return target_text\n",
" if f\"{source_date['day']}/{source_date['month']}/{source_date['year']}\" != f\"{target_date['day']}/{target_date['month']}/{target_date['year']}\":\n",
" print(\"Uwaga: Daty są rózne\")\n",
" return\n",
" \n",
" replacement = ''\n",
" if target_format == \"Europe\":\n",
" replacement = r'\\1/\\2/\\3'\n",
" elif target_format == \"US\":\n",
" replacement = r'\\1/\\2/\\3'\n",
" elif target_format == \"digit-dot\":\n",
" replacement = r'\\1.\\2.\\3'\n",
" else:\n",
" print(\"Uwaga: Niewspierany format.\")\n",
" return\n",
"\n",
" # Translate dates to the desired format\n",
" translated_text = target_text\n",
" for target_date in target_dates:\n",
" formatted_day, formatted_month, formatted_year = format_date(target_date['day'], target_date['month'], target_date['year'])\n",
" replacement = f\"{formatted_day}/{formatted_month}/{formatted_year}\"\n",
" translated_text = translated_text[:target_date[\"position\"][0]] + replacement + translated_text[target_date[\"position\"][1]:]\n",
" print(target_date)\n",
" \n",
"\n",
" return translated_text\n",
" final_text = target_text\n",
"\n",
" for pattern in patterns:\n",
" final_text = re.sub(pattern, replacement, final_text)\n",
"\n",
" return final_text\n",
"\n",
"# Example usage:\n",
"source_text = \"Today is 20/04/2024 and tomorrow will be 21/04/2024.\"\n",
"target_text = \"Aujourd'hui, c'est le 2024/04/20 et demain ce sera le 21/04/2024.\"\n",
"translated_text = translate_dates(source_text, target_text, \"Europe\")\n",
"source_text = \"Today is 21 04 2021 and tomorrow will be 21/04/2024.\"\n",
"target_text = \"Aujourd'hui, c'est le 21 04 2021 et demain ce sera le 21/04/2024.\"\n",
"translated_text = translate_dates(source_text, target_text, \"digit-dot\")\n",
"print(\"Translated text (Europe format):\", translated_text)\n",
"\n",
"\n",