From d38fbee2ec34bebd116feca72d26e8643a500b1d Mon Sep 17 00:00:00 2001 From: Karol Idaszak Date: Wed, 1 Jun 2022 10:49:28 +0200 Subject: [PATCH] Add preprocessing for replacing polish characters --- system_mockup/analizator_jezyka_naturalnego.py | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/system_mockup/analizator_jezyka_naturalnego.py b/system_mockup/analizator_jezyka_naturalnego.py index 7bc960f..e991c10 100644 --- a/system_mockup/analizator_jezyka_naturalnego.py +++ b/system_mockup/analizator_jezyka_naturalnego.py @@ -2,13 +2,13 @@ import re from acts import hello_act, name_request_act -hello = ['dzie[ńn] dobry', 'dobry wiecz[oó]r', 'witam', 'witaj', 'siema', 'elo', 'cze[śs][ćc]'] -request_name = ['imi[eę]', 'nazywasz'] +hello = ['dzien dobry', 'dobry wieczor', 'witam', 'witaj', 'siema', 'elo', 'czesc'] +request_name = ['imie', 'nazywasz'] question = ['\?$'] acts = {hello_act: [hello], name_request_act: [request_name, question]} - + def analizator_jezyka_naturalnego(text): text = text_preprocess(text) frame = act_check(text) @@ -16,6 +16,18 @@ def analizator_jezyka_naturalnego(text): def text_preprocess(text): text = text.lower() + text = text.replace("ą", "a") + text = text.replace("ć", "c") + text = text.replace("ę", "e") + text = text.replace("ł", "l") + text = text.replace("ń", "n") + text = text.replace("ó", "o") + text = text.replace("ś", "s") + text = text.replace("ź", "z") + text = text.replace("ż", "z") + text = text.replace("\n", " ") + text = text.replace("\t", " ") + text = text.replace(" ", " ") return text def act_check(text):