From 166707dd02060d1f98724d987c6d57a5ad838d86 Mon Sep 17 00:00:00 2001 From: s464951 Date: Mon, 10 Jun 2024 22:27:30 +0200 Subject: [PATCH] data fix --- chatbot/data/intents.json | 3 ++ chatbot/data/response.json | 5 ++- chatbot/data/test_dialog.conllu | 8 ++-- chatbot/data/train_dialog.conllu | 66 +------------------------------ chatbot/models/nlu_train2.py | 10 ++--- chatbot/modules/nlu.py | 68 +++++++++++++++++++------------- 6 files changed, 58 insertions(+), 102 deletions(-) diff --git a/chatbot/data/intents.json b/chatbot/data/intents.json index cb53462..7b12f1b 100644 --- a/chatbot/data/intents.json +++ b/chatbot/data/intents.json @@ -4,5 +4,8 @@ "(?i)\\b(kim|czym)\\s+(jesteÅ›|jestes|jest)\\b", "(?i)\\b(cześć|czesc|witaj|witam|hej|siema|helo|hello)\\b", "(?i)\\b(proszÄ™|powiedz|opowiedz|opisz)\\s+(mi|nam)\\s+(o|wiÄ™cej|coÅ›)\\s+(o|na temat)\\s*(twoim|ciebie|twoje)\\s*(imieniu|imieniem|imiÄ™|nazwisko)\\b" + ], + "ask_price": [ + "(?i)\\b(jaka jest cena produktu)\\b" ] } diff --git a/chatbot/data/response.json b/chatbot/data/response.json index 226cb51..c3fe424 100644 --- a/chatbot/data/response.json +++ b/chatbot/data/response.json @@ -20,5 +20,8 @@ "WyglÄ…da na to, że krążymy wokół tego samego tematu. Czy możemy przejść do czegoÅ› innego?", "Znowu to samo pytanie, może zmienimy temat?", "Ponownie pytasz o to samo, czy jest coÅ› innego, o co chciaÅ‚byÅ› zapytać?" - ] + ], + "give_price": [ + "Cena to 20zl" + ] } diff --git a/chatbot/data/test_dialog.conllu b/chatbot/data/test_dialog.conllu index a1a8918..721fbf9 100644 --- a/chatbot/data/test_dialog.conllu +++ b/chatbot/data/test_dialog.conllu @@ -49,8 +49,8 @@ 6 dowiedzieć, request NoLabel 7 jakie request NoLabel 8 sÄ… request NoLabel -9 opcje request B-delivery-method -10 dostawy? request I-delivery-method +9 opcje request B-delivery_method +10 dostawy? request I-delivery_method # text: OczywiÅ›cie! Oferujemy dostawÄ™ kurierem za 10 zÅ‚ oraz odbiór osobisty w naszym sklepie. # intent: inform @@ -93,5 +93,5 @@ 1 ProszÄ™ request NoLabel 2 podać request NoLabel 3 ceny request B-price -4 weza request B-product -5 ogrodowego. request I-product +4 weza request B-item +5 ogrodowego. request I-item diff --git a/chatbot/data/train_dialog.conllu b/chatbot/data/train_dialog.conllu index a1761af..fa02b5e 100644 --- a/chatbot/data/train_dialog.conllu +++ b/chatbot/data/train_dialog.conllu @@ -1134,67 +1134,5 @@ 1 ProszÄ™ request NoLabel 2 podać request NoLabel 3 cene request B-price -4 fotela request B-product -5 ogrodowego. request I-product - -# text: Czesc chcialabym kupic szampon do wlosow -# intent: request -# slots: -1 Czesc request NoLabel -2 chcialabym request NoLabel -3 kupic request NoLabel -4 szampon request B-item -5 do request I-item -6 wlosow request I-item - -# text: Czesc jaka jest cena le¿aka? -# intent: request -# slots: -1 Czesc request NoLabel -2 jaka request NoLabel -3 jest request NoLabel -4 cena request B-price -5 le¿aka? request B-item - -# text: Ile kosztuje krzeslo? -# intent: request -# slots: -1 Ile request NoLabel -2 kosztuje request B-price -3 krzeslo? request B-item - -# text: Jaka cena mleka? -# intent: request -# slots: -1 Jaka request NoLabel -2 cena request B-price -3 mleka? request B-item - -# text: Ile kosztuje ten produkt? -# intent: request -# slots: -1 Ile request NoLabel -2 kosztuje request B-price -3 ten request NoLabel -4 produkt? request B-item - -# text: Chce kupic doniczke -# intent: request -# slots: -1 Chce request NoLabel -2 kupic request NoLabel -3 doniczke request B-item - -# text: Jaka cena dostawy? -# intent: request -# slots: -1 Jaka request NoLabel -2 cena request B-price -3 dostawy? request B-delivery_method - -# text: Ul. Poznanska 2323 -# intent: inform -# slots: -1 Ul. inform B-address -2 Poznanska inform I-address -3 2323 inform I-address +4 fotela request B-item +5 ogrodowego. request I-item diff --git a/chatbot/models/nlu_train2.py b/chatbot/models/nlu_train2.py index bb6a2fa..a7782dc 100644 --- a/chatbot/models/nlu_train2.py +++ b/chatbot/models/nlu_train2.py @@ -112,10 +112,10 @@ class Model: trainset = list(parse_incr(f, fields=['id', 'form', 'frame', 'slot'], field_parsers=field_parsers)) with open(self.test_dataset, encoding='utf-8') as f: testset = list(parse_incr(f, fields=['id', 'form', 'frame', 'slot'], field_parsers=field_parsers)) - + print('TRAINSET:', trainset) corpus = Corpus(train=conllu2flair(trainset, label_type), test=conllu2flair(testset, label_type)) label_dictionary = corpus.make_label_dictionary(label_type=label_type) - + print('LABEL:' ,label_dictionary) embedding_types = [ WordEmbeddings('pl'), FlairEmbeddings('pl-forward'), @@ -133,6 +133,6 @@ class Model: -model = Model(train_dataset='../data/test_dialog.conllu', test_dataset='../data/test_dialog.conllu') -model.train_model('frame') -model.train_model('slot', field_parsers={'slot': nolabel2o}) +#model = Model(train_dataset='../data/test_dialog.conllu', test_dataset='../data/test_dialog.conllu') +# model2 = Model(train_dataset='../data/test_dialog.conllu', test_dataset='../data/test_dialog.conllu') +# model2.train_model('slot', field_parsers={'slot': nolabel2o}) diff --git a/chatbot/modules/nlu.py b/chatbot/modules/nlu.py index f75ae25..849a84b 100644 --- a/chatbot/modules/nlu.py +++ b/chatbot/modules/nlu.py @@ -1,38 +1,50 @@ -import os -import jsgf +from flair.models import SequenceTagger +import sys +sys.path.append("..") +from models.nlu_train2 import predict_frame, predict_slot +import logging + +logging.getLogger('flair').setLevel(logging.CRITICAL) class NLU: def __init__(self): - self.grammars = [ - jsgf.parse_grammar_file(f"grammars/{file_name}") - for file_name in os.listdir("grammars") - ] + self.frame_model = SequenceTagger.load('../models/frame-model/final-model.pt') + self.slot_model = SequenceTagger.load('../models/slot-model/final-model.pt') - def get_dialog_act(self, rule): + def get_intent(self, text: str): + return predict_frame(self.frame_model, text.split(), 'frame') + + def get_slot(self, text: str): + pred = predict_slot(self.slot_model, text.split(), 'slot') slots = [] - self.get_slots(rule.expansion, slots) - return {"act": rule.grammar.name, "slots": slots} + current_slot = None + current_slot_value = [] - def get_slots(self, expansion, slots): - if expansion.tag != "": - slots.append((expansion.tag, expansion.current_match)) - return + for frame in pred: + slot = frame["slot"] + if slot.startswith("B-"): + if current_slot: + slots.append({'name': current_slot, 'value': " ".join(current_slot_value)}) + current_slot = slot[2:] + current_slot_value = [frame["form"]] + elif slot.startswith("I-"): + current_slot_value.append(frame["form"]) - for child in expansion.children: - self.get_slots(child, slots) + if current_slot: + slots.append({'name': current_slot, 'value': " ".join(current_slot_value)}) - if not expansion.children and isinstance(expansion, jsgf.NamedRuleRef): - self.get_slots(expansion.referenced_rule.expansion, slots) + return slots - def match(self, utterance): - list_of_illegal_character = [",", ".", "'", "?", "!", ":", "-", "/"] - for illegal_character in list_of_illegal_character[:-2]: - utterance = utterance.replace(f"{illegal_character}", "") - for illegal_character in list_of_illegal_character[-2:]: - utterance = utterance.replace(f"{illegal_character}", " ") + def analyze(self, text: str): + intent = self.get_intent(text) + slots = self.get_slot(text) + print({'intent': intent, + 'slots': slots}) + return { + 'intent': intent, + 'slots': slots + } - for grammar in self.grammars: - matched = grammar.find_matching_rules(utterance.lower()) - if matched: - return self.get_dialog_act(matched[0]) - return {"act": "null", "slots": []} +nlu = NLU() + +nlu.analyze("Chce kupic lakier do pazanokci") \ No newline at end of file