From 4088d67207ca76a4c8e847045fa13f348bf92c57 Mon Sep 17 00:00:00 2001 From: Karol Idaszak Date: Wed, 1 Jun 2022 10:59:15 +0200 Subject: [PATCH] Add preprocessing for replacing polish characters --- system_mockup/analizator_jezyka_naturalnego.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/system_mockup/analizator_jezyka_naturalnego.py b/system_mockup/analizator_jezyka_naturalnego.py index e012297..ce8643a 100644 --- a/system_mockup/analizator_jezyka_naturalnego.py +++ b/system_mockup/analizator_jezyka_naturalnego.py @@ -46,4 +46,16 @@ def analizator_jezyka_naturalnego(text): def text_preprocess(text): text = text.lower() + text = text.replace("ą", "a") + text = text.replace("ć", "c") + text = text.replace("ę", "e") + text = text.replace("ł", "l") + text = text.replace("ń", "n") + text = text.replace("ó", "o") + text = text.replace("ś", "s") + text = text.replace("ź", "z") + text = text.replace("ż", "z") + text = text.replace("\n", " ") + text = text.replace("\t", " ") + text = text.replace(" ", " ") return text