Add preprocessing for replacing polish characters

This commit is contained in:
Karol Idaszak 2022-06-01 10:49:28 +02:00
parent 3f02c41b1f
commit d38fbee2ec

View File

@ -2,13 +2,13 @@ import re
from acts import hello_act, name_request_act from acts import hello_act, name_request_act
hello = ['dzien] dobry', 'dobry wiecz[oó]r', 'witam', 'witaj', 'siema', 'elo', 'czes][ćc]'] hello = ['dzien dobry', 'dobry wieczor', 'witam', 'witaj', 'siema', 'elo', 'czesc']
request_name = ['imi[eę]', 'nazywasz'] request_name = ['imie', 'nazywasz']
question = ['\?$'] question = ['\?$']
acts = {hello_act: [hello], acts = {hello_act: [hello],
name_request_act: [request_name, question]} name_request_act: [request_name, question]}
def analizator_jezyka_naturalnego(text): def analizator_jezyka_naturalnego(text):
text = text_preprocess(text) text = text_preprocess(text)
frame = act_check(text) frame = act_check(text)
@ -16,6 +16,18 @@ def analizator_jezyka_naturalnego(text):
def text_preprocess(text): def text_preprocess(text):
text = text.lower() text = text.lower()
text = text.replace("ą", "a")
text = text.replace("ć", "c")
text = text.replace("ę", "e")
text = text.replace("ł", "l")
text = text.replace("ń", "n")
text = text.replace("ó", "o")
text = text.replace("ś", "s")
text = text.replace("ź", "z")
text = text.replace("ż", "z")
text = text.replace("\n", " ")
text = text.replace("\t", " ")
text = text.replace(" ", " ")
return text return text
def act_check(text): def act_check(text):