data fix

2024-06-10 22:27:30 +02:00 · 2024-06-10 22:27:30 +02:00 · 166707dd02
commit 166707dd02
parent 2139011821
6 changed files with 58 additions and 102 deletions
--- a/chatbot/data/intents.json
+++ b/chatbot/data/intents.json
@ -4,5 +4,8 @@
    "(?i)\\b(kim|czym)\\s+(jesteś|jestes|jest)\\b",
    "(?i)\\b(cześć|czesc|witaj|witam|hej|siema|helo|hello)\\b",
    "(?i)\\b(proszę|powiedz|opowiedz|opisz)\\s+(mi|nam)\\s+(o|więcej|coś)\\s+(o|na temat)\\s*(twoim|ciebie|twoje)\\s*(imieniu|imieniem|imię|nazwisko)\\b"
+  ],
+  "ask_price": [
+    "(?i)\\b(jaka jest cena produktu)\\b"
  ]
 }
--- a/chatbot/data/response.json
+++ b/chatbot/data/response.json
@ -20,5 +20,8 @@
    "Wygląda na to, że krążymy wokół tego samego tematu. Czy możemy przejść do czegoś innego?",
    "Znowu to samo pytanie, może zmienimy temat?",
    "Ponownie pytasz o to samo, czy jest coś innego, o co chciałbyś zapytać?"
-    ]
+    ],
+  "give_price": [
+    "Cena to 20zl"
+  ]
 }
--- a/chatbot/data/test_dialog.conllu
+++ b/chatbot/data/test_dialog.conllu
@ -49,8 +49,8 @@
 6	dowiedzieć,	request	NoLabel
 7	jakie	request	NoLabel
 8	są	request	NoLabel
-9	opcje	request	B-delivery-method
-10	dostawy?	request	I-delivery-method
+9	opcje	request	B-delivery_method
+10	dostawy?	request	I-delivery_method

 # text: Oczywiście! Oferujemy dostawę kurierem za 10 zł oraz odbiór osobisty w naszym sklepie.
 # intent: inform
@ -93,5 +93,5 @@
 1	Proszę	request	NoLabel
 2	podać	request	NoLabel
 3	ceny	request	B-price
-4	weza	request	B-product
-5	ogrodowego.	request	I-product
+4	weza	request	B-item
+5	ogrodowego.	request	I-item
--- a/chatbot/data/train_dialog.conllu
+++ b/chatbot/data/train_dialog.conllu
@ -1134,67 +1134,5 @@
 1	Proszę	request	NoLabel
 2	podać	request	NoLabel
 3	cene	request	B-price
-4	fotela	request	B-product
-5	ogrodowego.	request	I-product
-
-# text: Czesc chcialabym kupic szampon do wlosow
-# intent: request
-# slots:
-1	Czesc	request	NoLabel
-2	chcialabym	request	NoLabel
-3	kupic	request	NoLabel
-4	szampon	request	B-item
-5	do	request	I-item
-6	wlosow	request	I-item
-
-# text: Czesc jaka jest cena le¿aka?
-# intent: request
-# slots:
-1	Czesc	request	NoLabel
-2	jaka	request	NoLabel
-3	jest	request	NoLabel
-4	cena	request	B-price
-5	le¿aka?	request	B-item
-
-# text: Ile kosztuje krzeslo?
-# intent: request
-# slots:
-1	Ile	request	NoLabel
-2	kosztuje	request	B-price
-3	krzeslo?	request	B-item
-
-# text: Jaka cena mleka?
-# intent: request
-# slots:
-1	Jaka	request	NoLabel
-2	cena	request	B-price
-3	mleka?	request	B-item
-
-# text: Ile kosztuje ten produkt?
-# intent: request
-# slots:
-1	Ile	request	NoLabel
-2	kosztuje	request	B-price
-3	ten	request	NoLabel
-4	produkt?	request	B-item
-
-# text: Chce kupic doniczke
-# intent: request
-# slots:
-1	Chce	request	NoLabel
-2	kupic	request	NoLabel
-3	doniczke	request	B-item
-
-# text: Jaka cena dostawy?
-# intent: request
-# slots:
-1	Jaka	request	NoLabel
-2	cena	request	B-price
-3	dostawy?	request	B-delivery_method
-
-# text: Ul. Poznanska 2323
-# intent: inform
-# slots:
-1	Ul.	inform	B-address
-2	Poznanska	inform	I-address
-3	2323	inform	I-address
+4	fotela	request	B-item
+5	ogrodowego.	request	I-item
--- a/chatbot/models/nlu_train2.py
+++ b/chatbot/models/nlu_train2.py
@ -112,10 +112,10 @@ class Model:
            trainset = list(parse_incr(f, fields=['id', 'form', 'frame', 'slot'], field_parsers=field_parsers))
        with open(self.test_dataset, encoding='utf-8') as f:
            testset = list(parse_incr(f, fields=['id', 'form', 'frame', 'slot'], field_parsers=field_parsers))
-
+        print('TRAINSET:', trainset)
        corpus = Corpus(train=conllu2flair(trainset, label_type), test=conllu2flair(testset, label_type))
        label_dictionary = corpus.make_label_dictionary(label_type=label_type)
-
+        print('LABEL:' ,label_dictionary)
        embedding_types = [
            WordEmbeddings('pl'),
            FlairEmbeddings('pl-forward'),
@ -133,6 +133,6 @@ class Model:



-model = Model(train_dataset='../data/test_dialog.conllu', test_dataset='../data/test_dialog.conllu')
-model.train_model('frame')
-model.train_model('slot', field_parsers={'slot': nolabel2o})
+#model = Model(train_dataset='../data/test_dialog.conllu', test_dataset='../data/test_dialog.conllu')
+# model2 = Model(train_dataset='../data/test_dialog.conllu', test_dataset='../data/test_dialog.conllu')
+# model2.train_model('slot', field_parsers={'slot': nolabel2o})
--- a/chatbot/modules/nlu.py
+++ b/chatbot/modules/nlu.py
@ -1,38 +1,50 @@
-import os
-import jsgf
+from flair.models import SequenceTagger
+import sys
+sys.path.append("..")
+from models.nlu_train2 import predict_frame, predict_slot
+import logging
+
+logging.getLogger('flair').setLevel(logging.CRITICAL)

 class NLU:
    def __init__(self):
-        self.grammars = [
-            jsgf.parse_grammar_file(f"grammars/{file_name}")
-            for file_name in os.listdir("grammars")
-        ]
+        self.frame_model = SequenceTagger.load('../models/frame-model/final-model.pt')
+        self.slot_model = SequenceTagger.load('../models/slot-model/final-model.pt')

-    def get_dialog_act(self, rule):
+    def get_intent(self, text: str):
+        return predict_frame(self.frame_model, text.split(), 'frame')
+
+    def get_slot(self, text: str):
+        pred = predict_slot(self.slot_model, text.split(), 'slot')
        slots = []
-        self.get_slots(rule.expansion, slots)
-        return {"act": rule.grammar.name, "slots": slots}
+        current_slot = None
+        current_slot_value = []

-    def get_slots(self, expansion, slots):
-        if expansion.tag != "":
-            slots.append((expansion.tag, expansion.current_match))
-            return
+        for frame in pred:
+            slot = frame["slot"]
+            if slot.startswith("B-"):
+                if current_slot:
+                    slots.append({'name': current_slot, 'value': " ".join(current_slot_value)})
+                current_slot = slot[2:]
+                current_slot_value = [frame["form"]]
+            elif slot.startswith("I-"):
+                current_slot_value.append(frame["form"])

-        for child in expansion.children:
-            self.get_slots(child, slots)
+        if current_slot:
+            slots.append({'name': current_slot, 'value': " ".join(current_slot_value)})

-        if not expansion.children and isinstance(expansion, jsgf.NamedRuleRef):
-            self.get_slots(expansion.referenced_rule.expansion, slots)
+        return slots

-    def match(self, utterance):
-        list_of_illegal_character = [",", ".", "'", "?", "!", ":", "-", "/"]
-        for illegal_character in list_of_illegal_character[:-2]:
-            utterance = utterance.replace(f"{illegal_character}", "")
-        for illegal_character in list_of_illegal_character[-2:]:
-            utterance = utterance.replace(f"{illegal_character}", " ")
+    def analyze(self, text: str):
+        intent = self.get_intent(text)
+        slots = self.get_slot(text)
+        print({'intent': intent,
+            'slots': slots})
+        return {
+            'intent': intent,
+            'slots': slots
+        }

-        for grammar in self.grammars:
-            matched = grammar.find_matching_rules(utterance.lower())
-            if matched:
-                return self.get_dialog_act(matched[0])
-        return {"act": "null", "slots": []}
+nlu = NLU()
+
+nlu.analyze("Chce kupic lakier do pazanokci")