data fix

2024-06-10 22:27:30 +02:00 · 2024-06-10 22:27:30 +02:00 · 166707dd02
commit 166707dd02
parent 2139011821
6 changed files with 58 additions and 102 deletions
--- a/chatbot/data/intents.json
+++ b/chatbot/data/intents.json
@ -4,5 +4,8 @@
    "(?i)\\b(kim|czym)\\s+(jesteś|jestes|jest)\\b",
    "(?i)\\b(cześć|czesc|witaj|witam|hej|siema|helo|hello)\\b",
    "(?i)\\b(proszę|powiedz|opowiedz|opisz)\\s+(mi|nam)\\s+(o|więcej|coś)\\s+(o|na temat)\\s*(twoim|ciebie|twoje)\\s*(imieniu|imieniem|imię|nazwisko)\\b"
  ],
  "ask_price": [
    "(?i)\\b(jaka jest cena produktu)\\b"
  ]
 }
--- a/chatbot/data/response.json
+++ b/chatbot/data/response.json
@ -20,5 +20,8 @@
    "Wygląda na to, że krążymy wokół tego samego tematu. Czy możemy przejść do czegoś innego?",
    "Znowu to samo pytanie, może zmienimy temat?",
    "Ponownie pytasz o to samo, czy jest coś innego, o co chciałbyś zapytać?"
    ],
  "give_price": [
    "Cena to 20zl"
  ]
 }
--- a/chatbot/data/test_dialog.conllu
+++ b/chatbot/data/test_dialog.conllu
@ -49,8 +49,8 @@
 6	dowiedzieć,	request	NoLabel
 7	jakie	request	NoLabel
 8	są	request	NoLabel
-9	opcje	request	B-delivery-method
+9	opcje	request	B-delivery_method
-10	dostawy?	request	I-delivery-method
+10	dostawy?	request	I-delivery_method
 # text: Oczywiście! Oferujemy dostawę kurierem za 10 zł oraz odbiór osobisty w naszym sklepie.
 # intent: inform
@ -93,5 +93,5 @@
 1	Proszę	request	NoLabel
 2	podać	request	NoLabel
 3	ceny	request	B-price
-4	weza	request	B-product
+4	weza	request	B-item
-5	ogrodowego.	request	I-product
+5	ogrodowego.	request	I-item
--- a/chatbot/data/train_dialog.conllu
+++ b/chatbot/data/train_dialog.conllu
@ -1134,67 +1134,5 @@
 1	Proszę	request	NoLabel
 2	podać	request	NoLabel
 3	cene	request	B-price
-4	fotela	request	B-product
+4	fotela	request	B-item
-5	ogrodowego.	request	I-product
+5	ogrodowego.	request	I-item
 # text: Czesc chcialabym kupic szampon do wlosow
 # intent: request
 # slots:
 1	Czesc	request	NoLabel
 2	chcialabym	request	NoLabel
 3	kupic	request	NoLabel
 4	szampon	request	B-item
 5	do	request	I-item
 6	wlosow	request	I-item
 # text: Czesc jaka jest cena le¿aka?
 # intent: request
 # slots:
 1	Czesc	request	NoLabel
 2	jaka	request	NoLabel
 3	jest	request	NoLabel
 4	cena	request	B-price
 5	le¿aka?	request	B-item
 # text: Ile kosztuje krzeslo?
 # intent: request
 # slots:
 1	Ile	request	NoLabel
 2	kosztuje	request	B-price
 3	krzeslo?	request	B-item
 # text: Jaka cena mleka?
 # intent: request
 # slots:
 1	Jaka	request	NoLabel
 2	cena	request	B-price
 3	mleka?	request	B-item
 # text: Ile kosztuje ten produkt?
 # intent: request
 # slots:
 1	Ile	request	NoLabel
 2	kosztuje	request	B-price
 3	ten	request	NoLabel
 4	produkt?	request	B-item
 # text: Chce kupic doniczke
 # intent: request
 # slots:
 1	Chce	request	NoLabel
 2	kupic	request	NoLabel
 3	doniczke	request	B-item
 # text: Jaka cena dostawy?
 # intent: request
 # slots:
 1	Jaka	request	NoLabel
 2	cena	request	B-price
 3	dostawy?	request	B-delivery_method
 # text: Ul. Poznanska 2323
 # intent: inform
 # slots:
 1	Ul.	inform	B-address
 2	Poznanska	inform	I-address
 3	2323	inform	I-address
--- a/chatbot/models/nlu_train2.py
+++ b/chatbot/models/nlu_train2.py
@ -112,10 +112,10 @@ class Model:
            trainset = list(parse_incr(f, fields=['id', 'form', 'frame', 'slot'], field_parsers=field_parsers))
        with open(self.test_dataset, encoding='utf-8') as f:
            testset = list(parse_incr(f, fields=['id', 'form', 'frame', 'slot'], field_parsers=field_parsers))
-
+        print('TRAINSET:', trainset)
        corpus = Corpus(train=conllu2flair(trainset, label_type), test=conllu2flair(testset, label_type))
        label_dictionary = corpus.make_label_dictionary(label_type=label_type)
-
+        print('LABEL:' ,label_dictionary)
        embedding_types = [
            WordEmbeddings('pl'),
            FlairEmbeddings('pl-forward'),
@ -133,6 +133,6 @@ class Model:
-model = Model(train_dataset='../data/test_dialog.conllu', test_dataset='../data/test_dialog.conllu')
+#model = Model(train_dataset='../data/test_dialog.conllu', test_dataset='../data/test_dialog.conllu')
-model.train_model('frame')
+# model2 = Model(train_dataset='../data/test_dialog.conllu', test_dataset='../data/test_dialog.conllu')
-model.train_model('slot', field_parsers={'slot': nolabel2o})
+# model2.train_model('slot', field_parsers={'slot': nolabel2o})
--- a/chatbot/modules/nlu.py
+++ b/chatbot/modules/nlu.py
@ -1,38 +1,50 @@
-import os
+from flair.models import SequenceTagger
-import jsgf
+import sys
 sys.path.append("..")
 from models.nlu_train2 import predict_frame, predict_slot
 import logging
 logging.getLogger('flair').setLevel(logging.CRITICAL)
 class NLU:
    def __init__(self):
-        self.grammars = [
+        self.frame_model = SequenceTagger.load('../models/frame-model/final-model.pt')
-            jsgf.parse_grammar_file(f"grammars/{file_name}")
+        self.slot_model = SequenceTagger.load('../models/slot-model/final-model.pt')
            for file_name in os.listdir("grammars")
        ]
-    def get_dialog_act(self, rule):
+    def get_intent(self, text: str):
        return predict_frame(self.frame_model, text.split(), 'frame')
    def get_slot(self, text: str):
        pred = predict_slot(self.slot_model, text.split(), 'slot')
        slots = []
-        self.get_slots(rule.expansion, slots)
+        current_slot = None
-        return {"act": rule.grammar.name, "slots": slots}
+        current_slot_value = []
-    def get_slots(self, expansion, slots):
+        for frame in pred:
-        if expansion.tag != "":
+            slot = frame["slot"]
-            slots.append((expansion.tag, expansion.current_match))
+            if slot.startswith("B-"):
-            return
+                if current_slot:
                    slots.append({'name': current_slot, 'value': " ".join(current_slot_value)})
                current_slot = slot[2:]
                current_slot_value = [frame["form"]]
            elif slot.startswith("I-"):
                current_slot_value.append(frame["form"])
-        for child in expansion.children:
+        if current_slot:
-            self.get_slots(child, slots)
+            slots.append({'name': current_slot, 'value': " ".join(current_slot_value)})
-        if not expansion.children and isinstance(expansion, jsgf.NamedRuleRef):
+        return slots
            self.get_slots(expansion.referenced_rule.expansion, slots)
-    def match(self, utterance):
+    def analyze(self, text: str):
-        list_of_illegal_character = [",", ".", "'", "?", "!", ":", "-", "/"]
+        intent = self.get_intent(text)
-        for illegal_character in list_of_illegal_character[:-2]:
+        slots = self.get_slot(text)
-            utterance = utterance.replace(f"{illegal_character}", "")
+        print({'intent': intent,
-        for illegal_character in list_of_illegal_character[-2:]:
+            'slots': slots})
-            utterance = utterance.replace(f"{illegal_character}", " ")
+        return {
            'intent': intent,
            'slots': slots
        }
-        for grammar in self.grammars:
+nlu = NLU()
-            matched = grammar.find_matching_rules(utterance.lower())
+
-            if matched:
+nlu.analyze("Chce kupic lakier do pazanokci")
                return self.get_dialog_act(matched[0])
        return {"act": "null", "slots": []}