Add preprocessing for replacing polish characters
This commit is contained in:
parent
7e2e35032d
commit
4088d67207
@ -46,4 +46,16 @@ def analizator_jezyka_naturalnego(text):
|
|||||||
|
|
||||||
def text_preprocess(text):
|
def text_preprocess(text):
|
||||||
text = text.lower()
|
text = text.lower()
|
||||||
|
text = text.replace("ą", "a")
|
||||||
|
text = text.replace("ć", "c")
|
||||||
|
text = text.replace("ę", "e")
|
||||||
|
text = text.replace("ł", "l")
|
||||||
|
text = text.replace("ń", "n")
|
||||||
|
text = text.replace("ó", "o")
|
||||||
|
text = text.replace("ś", "s")
|
||||||
|
text = text.replace("ź", "z")
|
||||||
|
text = text.replace("ż", "z")
|
||||||
|
text = text.replace("\n", " ")
|
||||||
|
text = text.replace("\t", " ")
|
||||||
|
text = text.replace(" ", " ")
|
||||||
return text
|
return text
|
||||||
|
Loading…
Reference in New Issue
Block a user