Add preprocessing for replacing polish characters

This commit is contained in:
Karol Idaszak 2022-06-01 10:59:15 +02:00
parent 7e2e35032d
commit 4088d67207

View File

@ -46,4 +46,16 @@ def analizator_jezyka_naturalnego(text):
def text_preprocess(text): def text_preprocess(text):
text = text.lower() text = text.lower()
text = text.replace("ą", "a")
text = text.replace("ć", "c")
text = text.replace("ę", "e")
text = text.replace("ł", "l")
text = text.replace("ń", "n")
text = text.replace("ó", "o")
text = text.replace("ś", "s")
text = text.replace("ź", "z")
text = text.replace("ż", "z")
text = text.replace("\n", " ")
text = text.replace("\t", " ")
text = text.replace(" ", " ")
return text return text