Systemy_dialogowe/system_mockup/analizator_jezyka_naturalnego.py

import jsgf
from os import listdir
from os.path import isfile, join, dirname, abspath

mypath = dirname(abspath(__file__)) + "/../semantic_parser/gramatics/"
onlyfiles = [f for f in listdir(mypath) if isfile(join(mypath, f))]

grammars = []

for grammarFile in onlyfiles:
    grammar = jsgf.parse_grammar_file(mypath + grammarFile)
    grammars.append(grammar)

def get_dialog_act(rule):
    slots = []
    get_slots(rule.expansion, slots)
    return {'act': rule.grammar.name, 'slots': slots}

def get_slots(expansion, slots):
    if expansion.tag != '':
        slots.append((expansion.tag, expansion.current_match))
        return

    for child in expansion.children:
        get_slots(child, slots)

    if not expansion.children and isinstance(expansion, jsgf.NamedRuleRef):
        get_slots(expansion.referenced_rule.expansion, slots)

def nlu(utterance):
    matched = None
    for grammar in grammars:
        matched = grammar.find_matching_rules(utterance)
        if matched:
            break

    if matched:
        return get_dialog_act(matched[0])
    else:
        return {'act': 'null', 'slots': []}
        
def analizator_jezyka_naturalnego(text):
    text = text_preprocess(text)
    frame = nlu(text)
    return frame

def text_preprocess(text):
    text = text.lower()
    text = text.replace("ą", "a")
    text = text.replace("ć", "c")
    text = text.replace("ę", "e")
    text = text.replace("ł", "l")
    text = text.replace("ń", "n")
    text = text.replace("ó", "o")
    text = text.replace("ś", "s")
    text = text.replace("ź", "z")
    text = text.replace("ż", "z")
    text = text.replace("\n", " ")
    text = text.replace("\t", " ")
    text = text.replace("  ", " ")
    return text
added parser to system mockup 2022-05-25 12:21:06 +02:00			`import jsgf`
			`from os import listdir`
Fix dialogue state reset 2022-06-08 08:20:49 +02:00			`from os.path import isfile, join, dirname, abspath`
bugfix 2022-04-19 21:25:16 +02:00
Fix dialogue state reset 2022-06-08 08:20:49 +02:00			`mypath = dirname(abspath(__file__)) + "/../semantic_parser/gramatics/"`
added parser to system mockup 2022-05-25 12:21:06 +02:00			`onlyfiles = [f for f in listdir(mypath) if isfile(join(mypath, f))]`
Move acts to consts 2022-04-19 23:22:20 +02:00
added parser to system mockup 2022-05-25 12:21:06 +02:00			`grammars = []`
bugfix 2022-04-19 21:25:16 +02:00
added parser to system mockup 2022-05-25 12:21:06 +02:00			`for grammarFile in onlyfiles:`
			`grammar = jsgf.parse_grammar_file(mypath + grammarFile)`
			`grammars.append(grammar)`

			`def get_dialog_act(rule):`
			`slots = []`
			`get_slots(rule.expansion, slots)`
			`return {'act': rule.grammar.name, 'slots': slots}`

			`def get_slots(expansion, slots):`
			`if expansion.tag != '':`
			`slots.append((expansion.tag, expansion.current_match))`
			`return`

			`for child in expansion.children:`
			`get_slots(child, slots)`

			`if not expansion.children and isinstance(expansion, jsgf.NamedRuleRef):`
			`get_slots(expansion.referenced_rule.expansion, slots)`

			`def nlu(utterance):`
			`matched = None`
			`for grammar in grammars:`
			`matched = grammar.find_matching_rules(utterance)`
			`if matched:`
			`break`

			`if matched:`
			`return get_dialog_act(matched[0])`
			`else:`
			`return {'act': 'null', 'slots': []}`
bugfix 2022-04-19 21:25:16 +02:00
			`def analizator_jezyka_naturalnego(text):`
			`text = text_preprocess(text)`
added parser to system mockup 2022-05-25 12:21:06 +02:00			`frame = nlu(text)`
bugfix 2022-04-19 21:25:16 +02:00			`return frame`

			`def text_preprocess(text):`
			`text = text.lower()`
Add preprocessing for replacing polish characters 2022-06-01 10:59:15 +02:00			`text = text.replace("ą", "a")`
			`text = text.replace("ć", "c")`
			`text = text.replace("ę", "e")`
			`text = text.replace("ł", "l")`
			`text = text.replace("ń", "n")`
			`text = text.replace("ó", "o")`
			`text = text.replace("ś", "s")`
			`text = text.replace("ź", "z")`
			`text = text.replace("ż", "z")`
			`text = text.replace("\n", " ")`
			`text = text.replace("\t", " ")`
			`text = text.replace(" ", " ")`
bugfix 2022-04-19 21:25:16 +02:00			`return text`