forked from bfijalkowski/KWT-2024
finished 6-7
This commit is contained in:
parent
622860d71e
commit
ff131152f6
@ -55,7 +55,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"execution_count": 91,
|
||||
"id": "documented-hacker",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@ -70,7 +70,7 @@
|
||||
" {'</root>': (36, 43)}]"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"execution_count": 91,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@ -100,7 +100,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 30,
|
||||
"execution_count": 92,
|
||||
"id": "unauthorized-study",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@ -164,7 +164,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 77,
|
||||
"execution_count": 93,
|
||||
"id": "beautiful-mathematics",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@ -218,7 +218,7 @@
|
||||
" 'year': 24}]"
|
||||
]
|
||||
},
|
||||
"execution_count": 77,
|
||||
"execution_count": 93,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
@ -289,7 +289,7 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 73,
|
||||
"execution_count": 94,
|
||||
"id": "4ee148d5",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
@ -429,13 +429,63 @@
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"execution_count": 153,
|
||||
"id": "romance-judge",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"'<greeting>Witaj </greeting><name>Ania! </name></name></name></name></name>John </name>'"
|
||||
]
|
||||
},
|
||||
"execution_count": 153,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import re\n",
|
||||
"\n",
|
||||
"def split_text(text):\n",
|
||||
" parts = re.split(r'(<\\w+>|<\\/\\w+>| )', text)\n",
|
||||
" \n",
|
||||
" split_list = []\n",
|
||||
" \n",
|
||||
" for part in parts:\n",
|
||||
" if part.strip(): \n",
|
||||
" split_list.append((part, part.startswith(\"<\") and part.endswith(\">\")))\n",
|
||||
" \n",
|
||||
" return split_list\n",
|
||||
"\n",
|
||||
"def transfer_tags(source_segment, target_segment):\n",
|
||||
" return ''"
|
||||
" source_interation_index = 0\n",
|
||||
"\n",
|
||||
" split_source_segment = split_text(source_segment)\n",
|
||||
"\n",
|
||||
" target_text_with_tags = ''\n",
|
||||
"\n",
|
||||
" for text in target_segment.split():\n",
|
||||
" source_element = split_source_segment[source_interation_index]\n",
|
||||
" \n",
|
||||
" while(source_element[1]):\n",
|
||||
" target_text_with_tags += source_element[0]\n",
|
||||
" source_interation_index = source_interation_index + 1\n",
|
||||
" source_element = split_source_segment[source_interation_index]\n",
|
||||
"\n",
|
||||
" target_text_with_tags += text + ' '\n",
|
||||
" source_interation_index = source_interation_index + 1\n",
|
||||
"\n",
|
||||
" for index in range(source_interation_index, len(split_source_segment)):\n",
|
||||
" target_text_with_tags += split_source_segment[index][0]\n",
|
||||
"\n",
|
||||
" return target_text_with_tags\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"source_segment = \"<greeting>Hello</greeting> <name>Ania!</name></name></name></name></name>John</name>\"\n",
|
||||
"target_segment = \"Witaj Ania! John\"\n",
|
||||
"\n",
|
||||
"transfer_tags(source_segment,target_segment)\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
|
Loading…
Reference in New Issue
Block a user