finished 6-7

This commit is contained in:
Adam Stelmaszyk 2024-04-28 07:18:57 +03:00
parent 622860d71e
commit ff131152f6
1 changed files with 59 additions and 9 deletions

View File

@ -55,7 +55,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 6, "execution_count": 91,
"id": "documented-hacker", "id": "documented-hacker",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
@ -70,7 +70,7 @@
" {'</root>': (36, 43)}]" " {'</root>': (36, 43)}]"
] ]
}, },
"execution_count": 6, "execution_count": 91,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -100,7 +100,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 30, "execution_count": 92,
"id": "unauthorized-study", "id": "unauthorized-study",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
@ -164,7 +164,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 77, "execution_count": 93,
"id": "beautiful-mathematics", "id": "beautiful-mathematics",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
@ -218,7 +218,7 @@
" 'year': 24}]" " 'year': 24}]"
] ]
}, },
"execution_count": 77, "execution_count": 93,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -289,7 +289,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 73, "execution_count": 94,
"id": "4ee148d5", "id": "4ee148d5",
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
@ -429,13 +429,63 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 5, "execution_count": 153,
"id": "romance-judge", "id": "romance-judge",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [
{
"data": {
"text/plain": [
"'<greeting>Witaj </greeting><name>Ania! </name></name></name></name></name>John </name>'"
]
},
"execution_count": 153,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [ "source": [
"import re\n",
"\n",
"def split_text(text):\n",
" parts = re.split(r'(<\\w+>|<\\/\\w+>| )', text)\n",
" \n",
" split_list = []\n",
" \n",
" for part in parts:\n",
" if part.strip(): \n",
" split_list.append((part, part.startswith(\"<\") and part.endswith(\">\")))\n",
" \n",
" return split_list\n",
"\n",
"def transfer_tags(source_segment, target_segment):\n", "def transfer_tags(source_segment, target_segment):\n",
" return ''" " source_interation_index = 0\n",
"\n",
" split_source_segment = split_text(source_segment)\n",
"\n",
" target_text_with_tags = ''\n",
"\n",
" for text in target_segment.split():\n",
" source_element = split_source_segment[source_interation_index]\n",
" \n",
" while(source_element[1]):\n",
" target_text_with_tags += source_element[0]\n",
" source_interation_index = source_interation_index + 1\n",
" source_element = split_source_segment[source_interation_index]\n",
"\n",
" target_text_with_tags += text + ' '\n",
" source_interation_index = source_interation_index + 1\n",
"\n",
" for index in range(source_interation_index, len(split_source_segment)):\n",
" target_text_with_tags += split_source_segment[index][0]\n",
"\n",
" return target_text_with_tags\n",
"\n",
"\n",
"source_segment = \"<greeting>Hello</greeting> <name>Ania!</name></name></name></name></name>John</name>\"\n",
"target_segment = \"Witaj Ania! John\"\n",
"\n",
"transfer_tags(source_segment,target_segment)\n"
] ]
} }
], ],