System_Dialogowy_Janet/Code/Modules/ML_NLU_module.py

import jsgf
from tabulate import tabulate
from flair.data import Sentence, Token
from flair.datasets import SentenceDataset
from flair.models import SequenceTagger

class ML_NLU:
    def __init__(self):
        self.slot_model, self.frame_model = self.setup()

    def nolabel2o(self, line, i):
        return 'O' if line[i] == 'NoLabel' else line[i]

    def conllu2flair(self, sentences, label=None):
        fsentences = []
        for sentence in sentences:
            fsentence = Sentence()
            for token in sentence:
                ftoken = Token(token['form'])
                if label:
                    ftoken.add_tag(label, token[label])
                fsentence.add_token(ftoken)
            fsentences.append(fsentence)
        return SentenceDataset(fsentences)


    def predict(self, sentence):
        csentence = [{'form': word} for word in sentence]
        fsentence = self.conllu2flair([csentence])[0]
        self.slot_model.predict(fsentence)
        self.frame_model.predict(fsentence)
        possible_intents = {}
        for token in fsentence:
            for intent in token.annotation_layers["frame"]:
                if(intent.value in possible_intents):
                    possible_intents[intent.value] += intent.score
                else:
                    possible_intents[intent.value] = intent.score
        return [(token, ftoken.get_tag('slot').value) for token, ftoken in zip(sentence, fsentence)], max(possible_intents)

    def setup(self):
        slot_model = SequenceTagger.load('slot-model/final-model.pt')
        frame_model = SequenceTagger.load('frame-model/final-model.pt')
        return slot_model, frame_model

    def test_nlu(self, utterance):
        if utterance:
            slots, act = self.predict(utterance.split())
            slots = [x for x in slots if x[1] != 'O']
            arguments = self.convert_slot_to_argument(slots)
            return self.convert_act_to_list(act, arguments)
        else:
            return 'Critical Error'
    
    def convert_slot_to_argument(self, slots):
        arguments = []
        candidate = None
        for slot in slots:
            if slot[1].startswith("B-"):
                if(candidate != None):
                    arguments.append(candidate)
                candidate = [slot[1].replace("B-", ""), slot[0]]
            if slot[1].startswith("I-") and candidate != None and slot[1].endswith(candidate[0]):
                candidate[1] += " " + slot[0]
        if(candidate != None):
            arguments.append(candidate)
        return [(x[0], x[1]) for x in arguments]

    def convert_act_to_list(self, act, slots):
        result = []
        for i in range(len(slots)):
            intent = act
            domain = act.split('/')[0]
            slot = slots[i][0]
            value = slots[i][1]
            result.append([intent, domain, slot, value])
        return result
Rozdzielenie na klasy 2021-05-27 15:11:28 +02:00			`import jsgf`
			`from tabulate import tabulate`
			`from flair.data import Sentence, Token`
			`from flair.datasets import SentenceDataset`
			`from flair.models import SequenceTagger`

			`class ML_NLU:`
DST (start) 2021-05-30 13:31:34 +02:00			`def __init__(self):`
Rozdzielenie na klasy 2021-05-27 15:11:28 +02:00			`self.slot_model, self.frame_model = self.setup()`

			`def nolabel2o(self, line, i):`
			`return 'O' if line[i] == 'NoLabel' else line[i]`

			`def conllu2flair(self, sentences, label=None):`
			`fsentences = []`
			`for sentence in sentences:`
			`fsentence = Sentence()`
			`for token in sentence:`
			`ftoken = Token(token['form'])`
			`if label:`
			`ftoken.add_tag(label, token[label])`
			`fsentence.add_token(ftoken)`
			`fsentences.append(fsentence)`
			`return SentenceDataset(fsentences)`


			`def predict(self, sentence):`
			`csentence = [{'form': word} for word in sentence]`
			`fsentence = self.conllu2flair([csentence])[0]`
			`self.slot_model.predict(fsentence)`
			`self.frame_model.predict(fsentence)`
			`possible_intents = {}`
			`for token in fsentence:`
			`for intent in token.annotation_layers["frame"]:`
			`if(intent.value in possible_intents):`
			`possible_intents[intent.value] += intent.score`
			`else:`
			`possible_intents[intent.value] = intent.score`
			`return [(token, ftoken.get_tag('slot').value) for token, ftoken in zip(sentence, fsentence)], max(possible_intents)`

			`def setup(self):`
			`slot_model = SequenceTagger.load('slot-model/final-model.pt')`
			`frame_model = SequenceTagger.load('frame-model/final-model.pt')`
			`return slot_model, frame_model`

			`def test_nlu(self, utterance):`
			`if utterance:`
			`slots, act = self.predict(utterance.split())`
			`slots = [x for x in slots if x[1] != 'O']`
			`arguments = self.convert_slot_to_argument(slots)`
Wstępnie działający DST 2021-05-30 14:45:42 +02:00			`return self.convert_act_to_list(act, arguments)`
Rozdzielenie na klasy 2021-05-27 15:11:28 +02:00			`else:`
			`return 'Critical Error'`

			`def convert_slot_to_argument(self, slots):`
			`arguments = []`
			`candidate = None`
			`for slot in slots:`
			`if slot[1].startswith("B-"):`
			`if(candidate != None):`
			`arguments.append(candidate)`
			`candidate = [slot[1].replace("B-", ""), slot[0]]`
			`if slot[1].startswith("I-") and candidate != None and slot[1].endswith(candidate[0]):`
			`candidate[1] += " " + slot[0]`
			`if(candidate != None):`
			`arguments.append(candidate)`
			`return [(x[0], x[1]) for x in arguments]`
Wstępnie działający DST 2021-05-30 14:45:42 +02:00
			`def convert_act_to_list(self, act, slots):`
			`result = []`
			`for i in range(len(slots)):`
			`intent = act`
			`domain = act.split('/')[0]`
			`slot = slots[i][0]`
			`value = slots[i][1]`
			`result.append([intent, domain, slot, value])`
			`return result`