System_Dialogowy_Janet/Code/Modules/ML_NLU_module.py

import jsgf
from tabulate import tabulate
from flair.data import Sentence, Token
from flair.datasets import SentenceDataset
from flair.models import SequenceTagger

class ML_NLU:
    def __init__(self, acts, arguments):
        self.acts = acts
        self.arguments = arguments
        self.slot_model, self.frame_model = self.setup()

    def nolabel2o(self, line, i):
        return 'O' if line[i] == 'NoLabel' else line[i]

    def conllu2flair(self, sentences, label=None):
        fsentences = []
        for sentence in sentences:
            fsentence = Sentence()
            for token in sentence:
                ftoken = Token(token['form'])
                if label:
                    ftoken.add_tag(label, token[label])
                fsentence.add_token(ftoken)
            fsentences.append(fsentence)
        return SentenceDataset(fsentences)


    def predict(self, sentence):
        csentence = [{'form': word} for word in sentence]
        fsentence = self.conllu2flair([csentence])[0]
        self.slot_model.predict(fsentence)
        self.frame_model.predict(fsentence)
        possible_intents = {}
        for token in fsentence:
            for intent in token.annotation_layers["frame"]:
                if(intent.value in possible_intents):
                    possible_intents[intent.value] += intent.score
                else:
                    possible_intents[intent.value] = intent.score
        return [(token, ftoken.get_tag('slot').value) for token, ftoken in zip(sentence, fsentence)], max(possible_intents)

    def setup(self):
        slot_model = SequenceTagger.load('slot-model/final-model.pt')
        frame_model = SequenceTagger.load('frame-model/final-model.pt')
        return slot_model, frame_model

    def test_nlu(self, utterance):
        if utterance:
            slots, act = self.predict(utterance.split())
            slots = [x for x in slots if x[1] != 'O']
            arguments = self.convert_slot_to_argument(slots)
            return {'act': act, 'slots': arguments}
        else:
            return 'Critical Error'
    
    def convert_slot_to_argument(self, slots):
        arguments = []
        candidate = None
        for slot in slots:
            if slot[1].startswith("B-"):
                if(candidate != None):
                    arguments.append(candidate)
                candidate = [slot[1].replace("B-", ""), slot[0]]
            if slot[1].startswith("I-") and candidate != None and slot[1].endswith(candidate[0]):
                candidate[1] += " " + slot[0]
        if(candidate != None):
            arguments.append(candidate)
        return [(x[0], x[1]) for x in arguments]
Rozdzielenie na klasy 2021-05-27 15:11:28 +02:00			`import jsgf`
			`from tabulate import tabulate`
			`from flair.data import Sentence, Token`
			`from flair.datasets import SentenceDataset`
			`from flair.models import SequenceTagger`

			`class ML_NLU:`
			`def __init__(self, acts, arguments):`
			`self.acts = acts`
			`self.arguments = arguments`
			`self.slot_model, self.frame_model = self.setup()`

			`def nolabel2o(self, line, i):`
			`return 'O' if line[i] == 'NoLabel' else line[i]`

			`def conllu2flair(self, sentences, label=None):`
			`fsentences = []`
			`for sentence in sentences:`
			`fsentence = Sentence()`
			`for token in sentence:`
			`ftoken = Token(token['form'])`
			`if label:`
			`ftoken.add_tag(label, token[label])`
			`fsentence.add_token(ftoken)`
			`fsentences.append(fsentence)`
			`return SentenceDataset(fsentences)`


			`def predict(self, sentence):`
			`csentence = [{'form': word} for word in sentence]`
			`fsentence = self.conllu2flair([csentence])[0]`
			`self.slot_model.predict(fsentence)`
			`self.frame_model.predict(fsentence)`
			`possible_intents = {}`
			`for token in fsentence:`
			`for intent in token.annotation_layers["frame"]:`
			`if(intent.value in possible_intents):`
			`possible_intents[intent.value] += intent.score`
			`else:`
			`possible_intents[intent.value] = intent.score`
			`return [(token, ftoken.get_tag('slot').value) for token, ftoken in zip(sentence, fsentence)], max(possible_intents)`

			`def setup(self):`
			`slot_model = SequenceTagger.load('slot-model/final-model.pt')`
			`frame_model = SequenceTagger.load('frame-model/final-model.pt')`
			`return slot_model, frame_model`

			`def test_nlu(self, utterance):`
			`if utterance:`
			`slots, act = self.predict(utterance.split())`
			`slots = [x for x in slots if x[1] != 'O']`
			`arguments = self.convert_slot_to_argument(slots)`
			`return {'act': act, 'slots': arguments}`
			`else:`
			`return 'Critical Error'`

			`def convert_slot_to_argument(self, slots):`
			`arguments = []`
			`candidate = None`
			`for slot in slots:`
			`if slot[1].startswith("B-"):`
			`if(candidate != None):`
			`arguments.append(candidate)`
			`candidate = [slot[1].replace("B-", ""), slot[0]]`
			`if slot[1].startswith("I-") and candidate != None and slot[1].endswith(candidate[0]):`
			`candidate[1] += " " + slot[0]`
			`if(candidate != None):`
			`arguments.append(candidate)`
			`return [(x[0], x[1]) for x in arguments]`