finished 6-7

This commit is contained in:
Adam Stelmaszyk 2024-04-28 07:18:57 +03:00
parent 622860d71e
commit ff131152f6

View File

@ -55,7 +55,7 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 91,
"id": "documented-hacker",
"metadata": {},
"outputs": [
@ -70,7 +70,7 @@
" {'</root>': (36, 43)}]"
]
},
"execution_count": 6,
"execution_count": 91,
"metadata": {},
"output_type": "execute_result"
}
@ -100,7 +100,7 @@
},
{
"cell_type": "code",
"execution_count": 30,
"execution_count": 92,
"id": "unauthorized-study",
"metadata": {},
"outputs": [
@ -164,7 +164,7 @@
},
{
"cell_type": "code",
"execution_count": 77,
"execution_count": 93,
"id": "beautiful-mathematics",
"metadata": {},
"outputs": [
@ -218,7 +218,7 @@
" 'year': 24}]"
]
},
"execution_count": 77,
"execution_count": 93,
"metadata": {},
"output_type": "execute_result"
}
@ -289,7 +289,7 @@
},
{
"cell_type": "code",
"execution_count": 73,
"execution_count": 94,
"id": "4ee148d5",
"metadata": {},
"outputs": [
@ -429,13 +429,63 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 153,
"id": "romance-judge",
"metadata": {},
"outputs": [],
"outputs": [
{
"data": {
"text/plain": [
"'<greeting>Witaj </greeting><name>Ania! </name></name></name></name></name>John </name>'"
]
},
"execution_count": 153,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import re\n",
"\n",
"def split_text(text):\n",
" parts = re.split(r'(<\\w+>|<\\/\\w+>| )', text)\n",
" \n",
" split_list = []\n",
" \n",
" for part in parts:\n",
" if part.strip(): \n",
" split_list.append((part, part.startswith(\"<\") and part.endswith(\">\")))\n",
" \n",
" return split_list\n",
"\n",
"def transfer_tags(source_segment, target_segment):\n",
" return ''"
" source_interation_index = 0\n",
"\n",
" split_source_segment = split_text(source_segment)\n",
"\n",
" target_text_with_tags = ''\n",
"\n",
" for text in target_segment.split():\n",
" source_element = split_source_segment[source_interation_index]\n",
" \n",
" while(source_element[1]):\n",
" target_text_with_tags += source_element[0]\n",
" source_interation_index = source_interation_index + 1\n",
" source_element = split_source_segment[source_interation_index]\n",
"\n",
" target_text_with_tags += text + ' '\n",
" source_interation_index = source_interation_index + 1\n",
"\n",
" for index in range(source_interation_index, len(split_source_segment)):\n",
" target_text_with_tags += split_source_segment[index][0]\n",
"\n",
" return target_text_with_tags\n",
"\n",
"\n",
"source_segment = \"<greeting>Hello</greeting> <name>Ania!</name></name></name></name></name>John</name>\"\n",
"target_segment = \"Witaj Ania! John\"\n",
"\n",
"transfer_tags(source_segment,target_segment)\n"
]
}
],