data fix
This commit is contained in:
parent
2139011821
commit
166707dd02
@ -4,5 +4,8 @@
|
|||||||
"(?i)\\b(kim|czym)\\s+(jesteś|jestes|jest)\\b",
|
"(?i)\\b(kim|czym)\\s+(jesteś|jestes|jest)\\b",
|
||||||
"(?i)\\b(cześć|czesc|witaj|witam|hej|siema|helo|hello)\\b",
|
"(?i)\\b(cześć|czesc|witaj|witam|hej|siema|helo|hello)\\b",
|
||||||
"(?i)\\b(proszę|powiedz|opowiedz|opisz)\\s+(mi|nam)\\s+(o|więcej|coś)\\s+(o|na temat)\\s*(twoim|ciebie|twoje)\\s*(imieniu|imieniem|imię|nazwisko)\\b"
|
"(?i)\\b(proszę|powiedz|opowiedz|opisz)\\s+(mi|nam)\\s+(o|więcej|coś)\\s+(o|na temat)\\s*(twoim|ciebie|twoje)\\s*(imieniu|imieniem|imię|nazwisko)\\b"
|
||||||
|
],
|
||||||
|
"ask_price": [
|
||||||
|
"(?i)\\b(jaka jest cena produktu)\\b"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
@ -20,5 +20,8 @@
|
|||||||
"Wygląda na to, że krążymy wokół tego samego tematu. Czy możemy przejść do czegoś innego?",
|
"Wygląda na to, że krążymy wokół tego samego tematu. Czy możemy przejść do czegoś innego?",
|
||||||
"Znowu to samo pytanie, może zmienimy temat?",
|
"Znowu to samo pytanie, może zmienimy temat?",
|
||||||
"Ponownie pytasz o to samo, czy jest coś innego, o co chciałbyś zapytać?"
|
"Ponownie pytasz o to samo, czy jest coś innego, o co chciałbyś zapytać?"
|
||||||
]
|
],
|
||||||
|
"give_price": [
|
||||||
|
"Cena to 20zl"
|
||||||
|
]
|
||||||
}
|
}
|
||||||
|
@ -49,8 +49,8 @@
|
|||||||
6 dowiedzieć, request NoLabel
|
6 dowiedzieć, request NoLabel
|
||||||
7 jakie request NoLabel
|
7 jakie request NoLabel
|
||||||
8 są request NoLabel
|
8 są request NoLabel
|
||||||
9 opcje request B-delivery-method
|
9 opcje request B-delivery_method
|
||||||
10 dostawy? request I-delivery-method
|
10 dostawy? request I-delivery_method
|
||||||
|
|
||||||
# text: Oczywiście! Oferujemy dostawę kurierem za 10 zł oraz odbiór osobisty w naszym sklepie.
|
# text: Oczywiście! Oferujemy dostawę kurierem za 10 zł oraz odbiór osobisty w naszym sklepie.
|
||||||
# intent: inform
|
# intent: inform
|
||||||
@ -93,5 +93,5 @@
|
|||||||
1 Proszę request NoLabel
|
1 Proszę request NoLabel
|
||||||
2 podać request NoLabel
|
2 podać request NoLabel
|
||||||
3 ceny request B-price
|
3 ceny request B-price
|
||||||
4 weza request B-product
|
4 weza request B-item
|
||||||
5 ogrodowego. request I-product
|
5 ogrodowego. request I-item
|
||||||
|
@ -1134,67 +1134,5 @@
|
|||||||
1 Proszę request NoLabel
|
1 Proszę request NoLabel
|
||||||
2 podać request NoLabel
|
2 podać request NoLabel
|
||||||
3 cene request B-price
|
3 cene request B-price
|
||||||
4 fotela request B-product
|
4 fotela request B-item
|
||||||
5 ogrodowego. request I-product
|
5 ogrodowego. request I-item
|
||||||
|
|
||||||
# text: Czesc chcialabym kupic szampon do wlosow
|
|
||||||
# intent: request
|
|
||||||
# slots:
|
|
||||||
1 Czesc request NoLabel
|
|
||||||
2 chcialabym request NoLabel
|
|
||||||
3 kupic request NoLabel
|
|
||||||
4 szampon request B-item
|
|
||||||
5 do request I-item
|
|
||||||
6 wlosow request I-item
|
|
||||||
|
|
||||||
# text: Czesc jaka jest cena le¿aka?
|
|
||||||
# intent: request
|
|
||||||
# slots:
|
|
||||||
1 Czesc request NoLabel
|
|
||||||
2 jaka request NoLabel
|
|
||||||
3 jest request NoLabel
|
|
||||||
4 cena request B-price
|
|
||||||
5 le¿aka? request B-item
|
|
||||||
|
|
||||||
# text: Ile kosztuje krzeslo?
|
|
||||||
# intent: request
|
|
||||||
# slots:
|
|
||||||
1 Ile request NoLabel
|
|
||||||
2 kosztuje request B-price
|
|
||||||
3 krzeslo? request B-item
|
|
||||||
|
|
||||||
# text: Jaka cena mleka?
|
|
||||||
# intent: request
|
|
||||||
# slots:
|
|
||||||
1 Jaka request NoLabel
|
|
||||||
2 cena request B-price
|
|
||||||
3 mleka? request B-item
|
|
||||||
|
|
||||||
# text: Ile kosztuje ten produkt?
|
|
||||||
# intent: request
|
|
||||||
# slots:
|
|
||||||
1 Ile request NoLabel
|
|
||||||
2 kosztuje request B-price
|
|
||||||
3 ten request NoLabel
|
|
||||||
4 produkt? request B-item
|
|
||||||
|
|
||||||
# text: Chce kupic doniczke
|
|
||||||
# intent: request
|
|
||||||
# slots:
|
|
||||||
1 Chce request NoLabel
|
|
||||||
2 kupic request NoLabel
|
|
||||||
3 doniczke request B-item
|
|
||||||
|
|
||||||
# text: Jaka cena dostawy?
|
|
||||||
# intent: request
|
|
||||||
# slots:
|
|
||||||
1 Jaka request NoLabel
|
|
||||||
2 cena request B-price
|
|
||||||
3 dostawy? request B-delivery_method
|
|
||||||
|
|
||||||
# text: Ul. Poznanska 2323
|
|
||||||
# intent: inform
|
|
||||||
# slots:
|
|
||||||
1 Ul. inform B-address
|
|
||||||
2 Poznanska inform I-address
|
|
||||||
3 2323 inform I-address
|
|
||||||
|
@ -112,10 +112,10 @@ class Model:
|
|||||||
trainset = list(parse_incr(f, fields=['id', 'form', 'frame', 'slot'], field_parsers=field_parsers))
|
trainset = list(parse_incr(f, fields=['id', 'form', 'frame', 'slot'], field_parsers=field_parsers))
|
||||||
with open(self.test_dataset, encoding='utf-8') as f:
|
with open(self.test_dataset, encoding='utf-8') as f:
|
||||||
testset = list(parse_incr(f, fields=['id', 'form', 'frame', 'slot'], field_parsers=field_parsers))
|
testset = list(parse_incr(f, fields=['id', 'form', 'frame', 'slot'], field_parsers=field_parsers))
|
||||||
|
print('TRAINSET:', trainset)
|
||||||
corpus = Corpus(train=conllu2flair(trainset, label_type), test=conllu2flair(testset, label_type))
|
corpus = Corpus(train=conllu2flair(trainset, label_type), test=conllu2flair(testset, label_type))
|
||||||
label_dictionary = corpus.make_label_dictionary(label_type=label_type)
|
label_dictionary = corpus.make_label_dictionary(label_type=label_type)
|
||||||
|
print('LABEL:' ,label_dictionary)
|
||||||
embedding_types = [
|
embedding_types = [
|
||||||
WordEmbeddings('pl'),
|
WordEmbeddings('pl'),
|
||||||
FlairEmbeddings('pl-forward'),
|
FlairEmbeddings('pl-forward'),
|
||||||
@ -133,6 +133,6 @@ class Model:
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
model = Model(train_dataset='../data/test_dialog.conllu', test_dataset='../data/test_dialog.conllu')
|
#model = Model(train_dataset='../data/test_dialog.conllu', test_dataset='../data/test_dialog.conllu')
|
||||||
model.train_model('frame')
|
# model2 = Model(train_dataset='../data/test_dialog.conllu', test_dataset='../data/test_dialog.conllu')
|
||||||
model.train_model('slot', field_parsers={'slot': nolabel2o})
|
# model2.train_model('slot', field_parsers={'slot': nolabel2o})
|
||||||
|
@ -1,38 +1,50 @@
|
|||||||
import os
|
from flair.models import SequenceTagger
|
||||||
import jsgf
|
import sys
|
||||||
|
sys.path.append("..")
|
||||||
|
from models.nlu_train2 import predict_frame, predict_slot
|
||||||
|
import logging
|
||||||
|
|
||||||
|
logging.getLogger('flair').setLevel(logging.CRITICAL)
|
||||||
|
|
||||||
class NLU:
|
class NLU:
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.grammars = [
|
self.frame_model = SequenceTagger.load('../models/frame-model/final-model.pt')
|
||||||
jsgf.parse_grammar_file(f"grammars/{file_name}")
|
self.slot_model = SequenceTagger.load('../models/slot-model/final-model.pt')
|
||||||
for file_name in os.listdir("grammars")
|
|
||||||
]
|
|
||||||
|
|
||||||
def get_dialog_act(self, rule):
|
def get_intent(self, text: str):
|
||||||
|
return predict_frame(self.frame_model, text.split(), 'frame')
|
||||||
|
|
||||||
|
def get_slot(self, text: str):
|
||||||
|
pred = predict_slot(self.slot_model, text.split(), 'slot')
|
||||||
slots = []
|
slots = []
|
||||||
self.get_slots(rule.expansion, slots)
|
current_slot = None
|
||||||
return {"act": rule.grammar.name, "slots": slots}
|
current_slot_value = []
|
||||||
|
|
||||||
def get_slots(self, expansion, slots):
|
for frame in pred:
|
||||||
if expansion.tag != "":
|
slot = frame["slot"]
|
||||||
slots.append((expansion.tag, expansion.current_match))
|
if slot.startswith("B-"):
|
||||||
return
|
if current_slot:
|
||||||
|
slots.append({'name': current_slot, 'value': " ".join(current_slot_value)})
|
||||||
|
current_slot = slot[2:]
|
||||||
|
current_slot_value = [frame["form"]]
|
||||||
|
elif slot.startswith("I-"):
|
||||||
|
current_slot_value.append(frame["form"])
|
||||||
|
|
||||||
for child in expansion.children:
|
if current_slot:
|
||||||
self.get_slots(child, slots)
|
slots.append({'name': current_slot, 'value': " ".join(current_slot_value)})
|
||||||
|
|
||||||
if not expansion.children and isinstance(expansion, jsgf.NamedRuleRef):
|
return slots
|
||||||
self.get_slots(expansion.referenced_rule.expansion, slots)
|
|
||||||
|
|
||||||
def match(self, utterance):
|
def analyze(self, text: str):
|
||||||
list_of_illegal_character = [",", ".", "'", "?", "!", ":", "-", "/"]
|
intent = self.get_intent(text)
|
||||||
for illegal_character in list_of_illegal_character[:-2]:
|
slots = self.get_slot(text)
|
||||||
utterance = utterance.replace(f"{illegal_character}", "")
|
print({'intent': intent,
|
||||||
for illegal_character in list_of_illegal_character[-2:]:
|
'slots': slots})
|
||||||
utterance = utterance.replace(f"{illegal_character}", " ")
|
return {
|
||||||
|
'intent': intent,
|
||||||
|
'slots': slots
|
||||||
|
}
|
||||||
|
|
||||||
for grammar in self.grammars:
|
nlu = NLU()
|
||||||
matched = grammar.find_matching_rules(utterance.lower())
|
|
||||||
if matched:
|
nlu.analyze("Chce kupic lakier do pazanokci")
|
||||||
return self.get_dialog_act(matched[0])
|
|
||||||
return {"act": "null", "slots": []}
|
|
Loading…
Reference in New Issue
Block a user