Compare commits

...

4 Commits

9 changed files with 1012 additions and 4 deletions

15
conllu_generator.py Normal file
View File

@ -0,0 +1,15 @@
print("Script to automatically append data to data/dialog.conllu")
print("Start typing now. Press Ctrl+C to stop.")
while True:
with open("data/train_dialog.conllu", "a") as f:
text = input("Text: ")
act = input("Intent: ")
slots = text.split(" ")
f.write(
f"\n# text: {text}\n# intent: {act}\n# slots:\n"
)
for i, slot in enumerate(slots):
label = input(f"{i}/{slot} label: ")
f.write(f"{i+1}\t{slot}\t{act}\t{label}\n")
print("---")

View File

@ -6,7 +6,7 @@ user a jakie są dostępne pizze? request(menu)
system Mamy wiele rodzajów, czy ma Pan ochotę na coś konkretnego? reqmore()
system plecam pizzę ananasową. sos pomidorowy, mozarella, ananas i cebula offer(pizza=pineapple,ingredient=tomato_souce,ingredient=mozarella,ingredient=pineapple,ingredient=onion)
user świetnie, w takim razie poproszę dwie duże pizze diavola oraz margaritę request(quantity=2,pizza=diavola)&request(pizza=margharita)
system oczywiście, czy chciałby Pan coś do picia? affirm()&request(drink)
system oczywiście, czy chciałby Pan coś do picia? affirm()&request(drink)
system polecam sok z buraka offer(drink=beetroot_juice)
user tak, poproszę jeden affirm()&request(quantity=1)
system Oczywiście, płaci Pan kartą, blikiem, gotówką czy przelewem? Zamówienie na miejscu czy na wynos? request(payment_method)&request(collection)

Can't render this file because it has a wrong number of fields in line 9.

View File

@ -5,7 +5,7 @@ user chciałbym zamówić pizzę wegetariańską w średnim rozmiarze request(pi
system Oczywiście, czy chciałby Pan jakieś dodatki? affirm()&reqmore(additional_ingredients)
user a co znajduje się domyślnie na takiej pizzy? request(menu)
system ser, mozarella, sos pomidorowy, cebula oraz karczochy offer(ingredients=cheese,ingredients=mozarella,ingredients=tomato_souce,ingredients=onion,ingredients=artichokes)
user dobrze, w takim razie odpowiada affirm()
user dobrze, w takim razie odpowiada affirm()
user a rozmiar średni o ile jest mniejszy od dużego? request(size_difference=medium-large)
system rozmiar średni: 30 cm inform(medium_size=30)
system rozmiar duży: 40 cm inform(large_size=40)

Can't render this file because it has a wrong number of fields in line 8.

View File

@ -1,5 +1,5 @@
kto treść treść
user Witam welcomemsg()
kto treść akt
user Witam welcomemsg()
system Dzień dobry, witam w naszej restauracji Pizzeria. W czym mogę pomóc? welcomemsg()&request(order)
user Chcę jeść pickę inform(food=pizza)
system Jaki rodzaj pizzy sobie Pan życzy, oraz jaki rozmiar? request(type,size)

Can't render this file because it has a wrong number of fields in line 2.

772
data/train_dialog.conllu Normal file
View File

@ -0,0 +1,772 @@
# text: cześć
# intent: welcomemsg
# slots:
1 cześć welcomemsg NoLabel
# text: witam
# intent: welcomemsg
# slots:
1 witam welcomemsg NoLabel
# text: dzień dobry
# intent: welcomemsg
# slots:
1 dzień welcomemsg NoLabel
2 dobry welcomemsg NoLabel
# text: dobry wieczór
# intent: welcomemsg
# slots:
1 dobry welcomemsg NoLabel
2 wieczór welcomemsg NoLabel
# text: dziękuję
# intent: bye
# slots:
1 dziękuję bye NoLabel
# text: dzięki
# intent: bye
# slots:
1 dzięki bye NoLabel
# text: pozdrawiam
# intent: bye
# slots:
1 pozdrawiam bye NoLabel
# text: do widzenia
# intent: bye
# slots:
1 do bye NoLabel
2 widzenia bye NoLabel
# text: dziękuję serdecznie
# intent: bye
# slots:
1 dziękuję bye NoLabel
2 serdecznie bye NoLabel
# text: dziękuję bardzo
# intent: bye
# slots:
1 dziękuję bye NoLabel
2 bardzo bye NoLabel
# text: papa
# intent: bye
# slots:
1 papa bye NoLabel
# text: tak
# intent: affirm
# slots:
1 tak affirm NoLabel
# text: poproszę
# intent: affirm
# slots:
1 poproszę affirm NoLabel
# text: to poproszę
# intent: affirm
# slots:
1 to affirm NoLabel
2 poproszę affirm NoLabel
# text: okej
# intent: affirm
# slots:
1 okej affirm NoLabel
# text: okej to poproszę
# intent: affirm
# slots:
1 okej affirm NoLabel
2 to affirm NoLabel
3 poproszę affirm NoLabel
# text: poproszę pizzę hawajska w rozmiarze dużym
# intent: inform/order
# slots:
1 poproszę inform/order NoLabel
2 pizzę inform/order B-food
3 hawajska inform/order B-pizza
4 w inform/order NoLabel
5 rozmiarze inform/order NoLabel
6 dużym inform/order B-size
# text: chciałbym zamówić pizzę
# intent: request/menu
# slots:
1 chciałbym request/menu NoLabel
2 zamówić request/menu NoLabel
3 pizzę request/menu B-food
# text: chciałbym pizzę z kurczakiem
# intent: inform/order
# slots:
1 chciałbym inform/order NoLabel
2 pizzę inform/order B-food
3 z inform/order B-ingredient
4 kurczakiem inform/order I-ingredient
# text: proszę pizzę z szynką
# intent: inform/order
# slots:
1 proszę inform/order NoLabel
2 pizzę inform/order B-food
3 z inform/order NoLabel
4 szynką inform/order B-ingredient
# text: poproszę pizzę barcelona w rozmiarze L
# intent: inform/order
# slots:
1 poproszę inform/order NoLabel
2 pizzę inform/order B-food
3 barcelona inform/order B-pizza
4 w inform/order NoLabel
5 rozmiarze inform/order NoLabel
6 L inform/order B-size
# text: jedna pizza tuna, druga pizza hawajska
# intent: inform/order
# slots:
1 jedna inform/order B-quantity
2 pizza inform/order B-food
3 tuna, inform/order B-pizza
4 druga inform/order B-quantity
5 pizza inform/order B-food
6 hawajska inform/order B-pizza
# text: wezmę pizzę capri bez pieczarek
# intent: inform/order
# slots:
1 wezmę inform/order NoLabel
2 pizzę inform/order B-food
3 capri inform/order B-pizza
4 bez inform/order B-ingredient/neg
5 pieczarek inform/order I-ingredient/neg
# text: jakie pizze są w ofercie
# intent: request/menu
# slots:
1 jakie request/menu NoLabel
2 pizze request/menu B-food
3 są request/menu NoLabel
4 w request/menu NoLabel
5 ofercie request/menu NoLabel
# text: w jakich rozmiarach są pizze
# intent: request/size
# slots:
1 w request/size NoLabel
2 jakich request/size NoLabel
3 rozmiarach request/size NoLabel
4 są request/size NoLabel
5 pizze request/size NoLabel
# text: jakie pizze są w menu
# intent: request/menu
# slots:
1 jakie request/menu NoLabel
2 pizze request/menu B-food
3 są request/menu NoLabel
4 w request/menu NoLabel
5 menu request/menu NoLabel
# text: chciałbym zamówić pizze
# intent: request/menu
# slots:
1 chciałbym request/menu NoLabel
2 zamówić request/menu NoLabel
3 pizze request/menu B-food
# text: co macie w ofercie
# intent: request/menu
# slots:
1 co request/menu NoLabel
2 macie request/menu NoLabel
3 w request/menu NoLabel
4 ofercie request/menu NoLabel
# text: poproszę pizzę margarita
# intent: inform/order
# slots:
1 poproszę inform/order NoLabel
2 pizzę inform/order B-food
3 margarita inform/order B-pizza
# text: poproszę pizze hawajską bez ananasa
# intent: inform/order
# slots:
1 poproszę inform/order NoLabel
2 pizze inform/order B-food
3 hawajska inform/order B-pizza
4 bez inform/order B-ingredient/neg
5 ananasa inform/order I-ingredient/neg
# text: poproszę 5 pizz
# intent: inform/order
# slots:
1 poproszę inform/order NoLabel
2 5 inform/order B-quantity
3 pizz inform/order B-food
# text: porposzę capri w rozmiarze m
# intent: inform/order
# slots:
1 porposzę inform/order NoLabel
2 capri inform/order NoLabel
3 w inform/order NoLabel
4 rozmiarze inform/order NoLabel
5 m inform/order NoLabel
# text: nie, to wszystko
# intent: inform/order-complete
# slots:
1 nie, inform/order-complete NoLabel
2 to inform/order-complete NoLabel
3 wszystko inform/order-complete NoLabel
# text: kiedy dojdzie dostawa?
# intent: request/time
# slots:
1 kiedy request/time NoLabel
2 dojdzie request/time NoLabel
3 dostawa? request/time NoLabel
# text: kiedy mogę się spodziewać dostawy?
# intent: request/time
# slots:
1 kiedy request/time NoLabel
2 mogę request/time NoLabel
3 się request/time NoLabel
4 spodziewać request/time NoLabel
5 dostawy? request/time NoLabel
# text: kiedy dojedzie kurier?
# intent: request/time
# slots:
1 kiedy request/time NoLabel
2 dojedzie request/time NoLabel
3 kurier? request/time NoLabel
# text: to wszystko
# intent: inform/order-complete
# slots:
1 to inform/order-complete NoLabel
2 wszystko inform/order-complete NoLabel
# text: adres dostawy to Koszalińska 10/15
# intent: inform/address
# slots:
1 adres inform/address NoLabel
2 dostawy inform/address NoLabel
3 to inform/address NoLabel
4 Koszalińska inform/address B-address
5 10/15 inform/address I-address
# text: proszę dzwonić na numer 320 561 123
# intent: inform/phone
# slots:
1 proszę inform/phone NoLabel
2 dzwonić inform/phone NoLabel
3 na inform/phone NoLabel
4 numer inform/phone NoLabel B-phone
5 320 inform/phone NoLabel I-phone
6 561 inform/phone NoLabel I-phone
7 123 inform/phone NoLabel I-phone
# text: adres Kołątaja 15
# intent: inform/address
# slots:
1 adres inform/address NoLabel
2 Kołątaja inform/address B-address
3 15 inform/address I-address
# text: Poznań, ul. Piastów 90/13
# intent: inform/address
# slots:
1 Poznań, inform/address B-address
2 ul. inform/address I-address
3 Piastów inform/address I-address
4 90/13 inform/address I-address
# text: ul. Piotra Skargi 13
# intent: inform/address
# slots:
1 ul. inform/address B-address
2 Piotra inform/address I-address
3 Skargi inform/address I-address
4 13 inform/address I-address
# text: aleje jerozolimskie 10
# intent: inform
# slots:
1 aleje inform/address B-address
2 jerozolimskie inform/address I-address
3 10 inform/address I-address
# text: nie
# intent: negate
# slots:
1 nie negate NoLabel
# text: zaprzeczam
# intent: negate
# slots:
1 zaprzeczam negate NoLabel
# text: nie dziękuję
# intent: negate
# slots:
1 nie negate NoLabel
2 dziękuję negate NoLabel
# text: co jest w ofercie
# intent: request/menu
# slots:
1 co request/menu NoLabel
2 jest request/menu NoLabel
3 w request/menu NoLabel
4 ofercie request/menu NoLabel
# text: chciałbym 3 pizze, hawajskie duże
# intent: inform/order
# slots:
1 chciałbym inform/order NoLabel
2 3 inform/order B-quantity
3 pizze, inform/order B-food
4 hawajskie inform/order B-pizza
5 duże inform/order B-size
# text: chciałbym barcelona xl
# intent: inform/order
# slots:
1 chciałbym inform/order NoLabel
2 barcelona inform/order B-pizza
3 xl inform/order B-size
# text: kruszyn krajeński, Polna 34
# intent: inform/address
# slots:
1 kruszyn inform/address B-address
2 krajeński, inform/address B-address
3 Polna inform/address B-address
4 34 inform/address B-address
# text: odbiorę na miejscu
# intent: inform/delivery
# slots:
1 odbiorę inform/delivery NoLabel
2 na inform/delivery NoLabel
3 miejscu inform/delivery B-delivery
# text: poproszę z dowozem
# intent: inform/delivery
# slots:
1 poproszę inform/delivery NoLabel
2 z inform/delivery NoLabel
3 dowozem inform/delivery B-delivery
# text: ile całość będzie kosztować
# intent: request/price
# slots:
1 ile request/price NoLabel
2 całość request/price NoLabel
3 będzie request/price NoLabel
4 kosztować request/price NoLabel
# text: jaki koszt
# intent: request/price
# slots:
1 jaki request/price NoLabel
2 koszt request/price NoLabel
# text: ile zapłacę
# intent: request/price
# slots:
1 ile request/price NoLabel
2 zapłacę request/price NoLabel
# text: prosiłbym o dowóz na 17:30
# intent: inform/time
# slots:
1 prosiłbym inform/time NoLabel
2 o inform/time NoLabel
3 dowóz inform/time NoLabel
4 na inform/time NoLabel
5 17:30 inform/time B-time
# text: czy można dowieźć na 20?
# intent: inform/time
# slots:
1 czy inform/time NoLabel
2 można inform/time NoLabel
3 dowieźć inform/time NoLabel
4 na inform/time NoLabel
5 20? inform/time B-time
# text: czy mozżna bez cebuli
# intent: inform/order
# slots:
1 czy inform/order NoLabel
2 mozżna inform/order NoLabel
3 bez inform/order B-ingredient/neg
4 cebuli inform/order I-ingredient/neg
# text: bez pieczarek
# intent: inform/order
# slots:
1 bez inform/order B-ingredient/neg
2 pieczarek inform/order I-ingredient/neg
# text: jaka cena?
# intent: request/price
# slots:
1 jaka request/price NoLabel
2 cena? request/price NoLabel
# text: cena
# intent: request/price
# slots:
1 cena request/price NoLabel
# text: jaki koszt
# intent: request/price
# slots:
1 jaki request/price NoLabel
2 koszt request/price NoLabel
# text: cena dowozu
# intent: request/delivery-price
# slots:
1 cena request/delivery-price NoLabel
2 dowozu request/delivery-price NoLabel
# text: sos czosnkowy
# intent: inform/order
# slots:
1 sos inform/order B-sauce
2 czosnkowy inform/order I-sauce
# text: poproszę o realizację zamówienia
# intent: inform/order-complete
# slots:
1 poproszę inform/order-complete NoLabel
2 o inform/order-complete NoLabel
3 realizację inform/order-complete NoLabel
4 zamówienia inform/order-complete NoLabel
# text: zamawiam
# intent: inform/order-complete
# slots:
1 zamawiam inform/order-complete NoLabel
# text: i to wszystko
# intent: inform/order-complete
# slots:
1 i inform/order-complete NoLabel
2 to inform/order-complete NoLabel
3 wszystko inform/order-complete NoLabel
# text: jaki rozmiar
# intent: request/size
# slots:
1 jaki request/size NoLabel
2 rozmiar request/size NoLabel
# text: złożyć zamówienie
# intent: request/menu
# slots:
1 złożyć request/menu NoLabel
2 zamówienie request/menu NoLabel
# text: poproszę 4 pizze, tuna, hawajska, capri, barcelona, wszystkie w rozmiarze xl
# intent: inform/order
# slots:
1 poproszę inform/order NoLabel
2 4 inform/order B-quantity
3 pizze, inform/order B-food
4 tuna, inform/order B-pizza
5 hawajska, inform/order B-pizza
6 capri, inform/order B-pizza
7 barcelona, inform/order B-pizza
8 wszystkie inform/order NoLabel
9 w inform/order NoLabel
10 rozmiarze inform/order NoLabel
11 xl inform/order B-size
# text: mój numer to 123 324 534
# intent: inform/phone
# slots:
1 mój inform/phone NoLabel
2 numer inform/phone NoLabel
3 to inform/phone NoLabel
4 123 inform/phone B-phone
5 324 inform/phone I-phone
6 534 inform/phone I-phone
# text: 653 321 787
# intent: inform/phone
# slots:
1 653 inform/phone B-phone
2 321 inform/phone I-phone
3 787 inform/phone I-phone
# text: wezmę pizzę z szynką, tuńczykiem i cebulą, w rozmiarze L
# intent: inform/order
# slots:
1 wezmę inform/order NoLabel
2 pizzę inform/order B-food
3 z inform/order NoLabel
4 szynką, inform/order B-ingredient
5 tuńczykiem inform/order B-ingredient
6 i inform/order NoLabel
7 cebulą, inform/order B-ingredient
8 w inform/order NoLabel
9 rozmiarze inform/order NoLabel
10 L inform/order B-size
# text: szynka
# intent: inform/order
# slots:
1 szynka inform/order B-ingredient
# text: pieczarki
# intent: inform/order
# slots:
1 pieczarki inform/order B-ingredient
# text: cebula
# intent: inform/order
# slots:
1 cebula inform/order B-ingredient
# text: papryka
# intent: inform/order
# slots:
1 papryka inform/order B-ingredient
# text: pomidor
# intent: inform/order
# slots:
1 pomidor inform/order B-ingredient
# text: ser
# intent: inform/order
# slots:
1 ser inform/order B-ingredient
# text: ananas kurczak tunczyk cebula
# intent: inform/order
# slots:
1 ananas inform/order B-ingredient
2 kurczak inform/order B-ingredient
3 tunczyk inform/order B-ingredient
4 cebula inform/order B-ingredient
# text: barcelona hawajska tuna capri margarita
# intent: inform/order
# slots:
1 barcelona inform/order B-pizza
2 hawajska inform/order B-pizza
3 tuna inform/order B-pizza
4 capri inform/order B-pizza
5 margarita inform/order B-pizza
# text: poproszę hawajską i tuna XL
# intent: inform/order
# slots:
1 poproszę inform/order NoLabel
2 hawajską inform/order B-pizza
3 i inform/order NoLabel
4 tuna inform/order B-pizza
5 XL inform/order B-size
# text: wezmę 3 pizze tuna, średnią, dużą i bardzo dużą
# intent: inform/order
# slots:
1 wezmę inform/order NoLabel
2 3 inform/order B-quantity
3 pizze inform/order B-food
4 tuna, inform/order B-pizza
5 średnią, inform/order B-size
6 dużą inform/order B-size
7 i inform/order NoLabel
8 bardzo inform/order B-size
9 dużą inform/order I-size
# text: rozmiar m
# intent: inform/order
# slots:
1 rozmiar inform/order B-size
2 m inform/order I-size
# text: rozmar l
# intent: inform/order
# slots:
1 rozmar inform/order B-size
2 l inform/order I-size
# text: rozmiar xl
# intent: inform/order
# slots:
1 rozmiar inform/order B-size
2 xl inform/order I-size
# text: na godzine 9
# intent: inform/time
# slots:
1 na inform/time NoLabel
2 godzine inform/time B-time
3 9 inform/time I-time
# text: godz 16
# intent: inform/time
# slots:
1 godz inform/time B-time
2 16 inform/time I-time
# text: na 19 poproszę
# intent: inform/time
# slots:
1 na inform/time NoLabel
2 19 inform/time B-time
3 poproszę inform/time NoLabel
# text: godzine dziesiątą
# intent: inform/time
# slots:
1 godzine inform/time B-time
2 dziesiątą inform/time I-time
# text: na szóstą
# intent: inform/time
# slots:
1 na inform/time NoLabel
2 szóstą inform/time B-time
# text: tak zgadza się
# intent: affirm
# slots:
1 tak affirm NoLabel
2 zgadza affirm NoLabel
3 się affirm NoLabel
# text: potwierdzam
# intent: affirm
# slots:
1 potwierdzam affirm NoLabel
# text: nie zgadza się
# intent: negate
# slots:
1 nie negate NoLabel
2 zgadza negate NoLabel
3 się negate NoLabel
# text: numer to 333444555
# intent: inform/phone
# slots:
1 numer inform/phone NoLabel
2 to inform/phone NoLabel
3 333444555 inform/phone B-phone
# text: numer to 335 23 25 29
# intent: inform/phone
# slots:
1 numer inform/phone NoLabel
2 to inform/phone NoLabel
3 335 inform/phone B-phone
4 23 inform/phone I-phone
5 25 inform/phone I-phone
6 29 inform/phone I-phone
# text: dziękuję to wszystko
# intent: inform/order-complete
# slots:
1 dziękuję inform/order-complete NoLabel
2 to inform/order-complete NoLabel
3 wszystko inform/order-complete NoLabel
# text: potwierdzam zamówienie
# intent: inform/order-complete
# slots:
1 potwierdzam inform/order-complete NoLabel
2 zamówienie inform/order-complete NoLabel
# text: bez kurczaka
# intent: inform/order
# slots:
1 bez inform/order B-ingredient/neg
2 kurczaka inform/order I-ingredient/neg
# text: bez cebuli
# intent: inform/order
# slots:
1 bez inform/order B-ingredient/neg
2 cebuli inform/order I-ingredient/neg
# text: bez szynki i pieczarek
# intent: inform/order
# slots:
1 bez inform/order B-ingridient/neg
2 szynki inform/order I-ingredient/neg
3 i inform/order NoLabel
4 pieczarek inform/order B-ingredient/neg
# text: proszę pizzę z ananasem, kurczakiem i serem
# intent: inform/order
# slots:
1 proszę inform/order NoLabel
2 pizzę inform/order B-food
3 z inform/order NoLabel
4 ananasem, inform/order B-ingredient
5 kurczakiem inform/order B-ingredient
6 i inform/order NoLabel
7 serem inform/order B-ingredient
# text: ile płacę
# intent: request/price
# slots:
1 ile request/price NoLabel
2 płacę request/price NoLabel
# text: ile zapłacę
# intent: request/price
# slots:
1 ile request/price NoLabel
2 zapłacę request/price NoLabel
# text: sos tysiąca wysp
# intent: inform/order
# slots:
1 sos inform/order B-sauce
2 tysiąca inform/order I-sauce
3 wysp inform/order I-sauce
# text: sos koperkowy
# intent: inform/order
# slots:
1 sos inform/order B-sauce
2 koperkowy inform/order I-sauce
# text: tak dziękuję
# intent: affirm
# slots:
1 tak affirm NoLabel
2 dziękuję affirm NoLabel
# text: nnie, dziękuję
# intent: negate
# slots:
1 nnie, negate NoLabel
2 dziękuję negate NoLabel
# text: nie dzięki
# intent: negate
# slots:
1 nie negate NoLabel
2 dzięki negate NoLabel

45
evaluate.py Normal file
View File

@ -0,0 +1,45 @@
import re
import os
import pandas as pd
import numpy as np
from nlu_utils import predict_multiple
from flair.models import SequenceTagger
def __parse_acts(acts):
acts_split = acts.split('&')
remove_slot_regex = "[\(\[].*?[\)\]]"
return set(re.sub(remove_slot_regex, "", act) for act in acts_split)
def __parse_predictions(predictions):
return set(prediction.split('/')[0] for prediction in predictions)
# Exploratory tests
frame_model = SequenceTagger.load('frame-model-prod/best-model.pt')
# slot_model = SequenceTagger.load('slot-model-prod/final-model.pt')
total_acts = 0
act_correct_predictions = 0
slot_correct_predictions = 0
for file_name in os.listdir('data'):
if file_name.split('.')[-1] != 'tsv':
continue
df = pd.read_csv(f'data/{file_name}', sep='\t', names=['kto', 'treść', 'akt'])
df = df[df.kto == 'user']
all_data = np.array(df)
for row in all_data:
sentence = row[1]
acts = __parse_acts(row[2])
predictions_raw = predict_multiple(frame_model, sentence.split(), 'frame')
predictions = __parse_predictions(predictions_raw)
for act in acts:
total_acts += 1
if act in predictions:
act_correct_predictions += 1
print(f"Accuracy - predicting acts: {(act_correct_predictions / total_acts)*100} ({act_correct_predictions}/{total_acts})")

30
nlu_tests.py Normal file
View File

@ -0,0 +1,30 @@
from flair.models import SequenceTagger
from nlu_utils import predict_single, predict_multiple, predict_and_annotate
# Exploratory tests
frame_model = SequenceTagger.load('frame-model/best-model.pt')
tests = [
'chciałbym zamówić pizzę',
'na godzinę 12',
'prosiłbym o pizzę z pieczarkami',
'to wszystko, jaka cena?',
'ile kosztuje pizza',
'do widzenia',
'tak',
'nie dziękuję',
'dodatkowy ser',
'pizzę barcelona bez cebuli',
]
# print("=== Exploratory tests - frame model ===")
for test in tests:
print(f"Sentence: {test}")
print(f"Single prediction: {predict_single(frame_model, test.split(), 'frame')}")
print(f"Multiple predictions: {predict_multiple(frame_model, test.split(), 'frame')}")
print(f"Annotated sentence: {predict_and_annotate(frame_model, test.split(), 'frame')}")
print("=== Exploratory tests - slot model ===")
slot_model = SequenceTagger.load('slot-model/final-model.pt')
for test in tests:
print(f"Sentence: {test}")
print(f"Prediction: {predict_and_annotate(slot_model, test.split(), 'slot')}")

46
nlu_train.py Normal file
View File

@ -0,0 +1,46 @@
from conllu import parse_incr
from flair.data import Corpus
from flair.embeddings import StackedEmbeddings
from flair.embeddings import WordEmbeddings
from flair.embeddings import CharacterEmbeddings
from flair.embeddings import FlairEmbeddings
from flair.models import SequenceTagger
from flair.trainers import ModelTrainer
from nlu_utils import conllu2flair, nolabel2o
import random
import torch
random.seed(42)
torch.manual_seed(42)
if torch.cuda.is_available():
torch.cuda.manual_seed(0)
torch.cuda.manual_seed_all(0)
torch.backends.cudnn.enabled = False
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True
def train_model(label_type, field_parsers = {}):
with open('data/train_dialog.conllu', encoding='utf-8') as trainfile:
trainset = list(parse_incr(trainfile, fields=['id', 'form', 'frame', 'slot'], field_parsers=field_parsers))
corpus = Corpus(train=conllu2flair(trainset, label_type), test=conllu2flair(trainset, label_type))
label_dictionary = corpus.make_label_dictionary(label_type=label_type)
embedding_types = [
WordEmbeddings('pl'),
FlairEmbeddings('pl-forward'),
FlairEmbeddings('pl-backward'),
CharacterEmbeddings(),
]
embeddings = StackedEmbeddings(embeddings=embedding_types)
tagger = SequenceTagger(hidden_size=256, embeddings=embeddings, tag_dictionary=label_dictionary, tag_type=label_type, use_crf=True, tag_format="BIO")
frame_trainer = ModelTrainer(tagger, corpus)
frame_trainer.train(f'{label_type}-model', learning_rate=0.1, mini_batch_size=32, max_epochs=75, train_with_dev=False)
if __name__ == '__main__':
train_model("frame")
train_model('slot', field_parsers={'slot': nolabel2o})

100
nlu_utils.py Normal file
View File

@ -0,0 +1,100 @@
from flair.data import Sentence
from flair.datasets import FlairDatapointDataset
def nolabel2o(line, i):
return 'O' if line[i] == 'NoLabel' else line[i]
def conllu2flair(sentences, label=None):
if label == "frame":
return conllu2flair_frame(sentences, label)
else:
return conllu2flair_slot(sentences, label)
def conllu2flair_frame(sentences, label=None):
fsentences = []
for sentence in sentences:
tokens = [token["form"] for token in sentence]
fsentence = Sentence(' '.join(tokens), use_tokenizer=False)
for i in range(len(fsentence)):
fsentence[i:i+1].add_label(label, sentence[i][label])
fsentences.append(fsentence)
return FlairDatapointDataset(fsentences)
def conllu2flair_slot(sentences, label=None):
fsentences = []
for sentence in sentences:
fsentence = Sentence(' '.join(token['form'] for token in sentence), use_tokenizer=False)
start_idx = None
end_idx = None
tag = None
if label:
for idx, (token, ftoken) in enumerate(zip(sentence, fsentence)):
if token[label].startswith('B-'):
start_idx = idx
end_idx = idx
tag = token[label][2:]
elif token[label].startswith('I-'):
end_idx = idx
elif token[label] == 'O':
if start_idx is not None:
fsentence[start_idx:end_idx+1].add_label(label, tag)
start_idx = None
end_idx = None
tag = None
if start_idx is not None:
fsentence[start_idx:end_idx+1].add_label(label, tag)
fsentences.append(fsentence)
return FlairDatapointDataset(fsentences)
def __predict(model, csentence):
fsentence = conllu2flair([csentence])[0]
model.predict(fsentence)
return fsentence
def __csentence(sentence, label_type):
if label_type == "frame":
return [{'form': word } for word in sentence]
else:
return [{'form': word, 'slot': 'O'} for word in sentence]
def predict_single(model, sentence, label_type):
csentence = __csentence(sentence, label_type)
fsentence = __predict(model, csentence)
intent = {}
for span in fsentence.get_spans(label_type):
tag = span.get_label(label_type).value
if tag in intent:
intent[tag] += 1
else:
intent[tag] = 1
return max(intent, key=intent.get)
def predict_multiple(model, sentence, label_type):
csentence = __csentence(sentence, label_type)
fsentence = __predict(model, csentence)
return set(span.get_label(label_type).value for span in fsentence.get_spans(label_type))
def predict_and_annotate(model, sentence, label_type):
csentence = __csentence(sentence, label_type)
fsentence = __predict(model, csentence)
for span in fsentence.get_spans(label_type):
tag = span.get_label(label_type).value
if label_type == "frame":
csentence[span.tokens[0].idx-1]['frame'] = tag
else:
csentence[span.tokens[0].idx - 1]['slot'] = f'B-{tag}'
for token in span.tokens[1:]:
csentence[token.idx - 1]['slot'] = f'I-{tag}'
return csentence