Finalne poprawki
This commit is contained in:
parent
846c6991e7
commit
16af0e732c
@ -1,15 +1,15 @@
|
|||||||
print("Script to automatically append data to data/dialog.conllu")
|
print("Script to automatically append data to data/dialog.conllu")
|
||||||
print("Start typing now. Press Ctrl+C to stop.")
|
print("Start typing now. Press Ctrl+C to stop.")
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
with open("data/train_dialog.conllu", "a") as f:
|
with open("data/train_dialog.conllu", "a") as f:
|
||||||
text = input("Text: ")
|
text = input("Text: ")
|
||||||
act = input("Intent: ")
|
act = input("Intent: ")
|
||||||
slots = text.split(" ")
|
slots = text.split(" ")
|
||||||
f.write(
|
f.write(
|
||||||
f"\n# text: {text}\n# intent: {act}\n# slots:\n"
|
f"\n# text: {text}\n# intent: {act}\n# slots:\n"
|
||||||
)
|
)
|
||||||
for i, slot in enumerate(slots):
|
for i, slot in enumerate(slots):
|
||||||
label = input(f"{i}/{slot} label: ")
|
label = input(f"{i}/{slot} label: ")
|
||||||
f.write(f"{i+1}\t{slot}\t{act}\t{label}\n")
|
f.write(f"{i+1}\t{slot}\t{act}\t{label}\n")
|
||||||
print("---")
|
print("---")
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -332,11 +332,11 @@
|
|||||||
3 w request/menu NoLabel
|
3 w request/menu NoLabel
|
||||||
4 ofercie request/menu NoLabel
|
4 ofercie request/menu NoLabel
|
||||||
|
|
||||||
# text: chciałbym 3 pizze, hawajskie duże
|
# text: chciałbym trzy pizze, hawajskie duże
|
||||||
# intent: inform/order
|
# intent: inform/order
|
||||||
# slots:
|
# slots:
|
||||||
1 chciałbym inform/order NoLabel
|
1 chciałbym inform/order NoLabel
|
||||||
2 3 inform/order B-quantity
|
2 trzy inform/order B-quantity
|
||||||
3 pizze, inform/order B-food
|
3 pizze, inform/order B-food
|
||||||
4 hawajskie inform/order B-pizza
|
4 hawajskie inform/order B-pizza
|
||||||
5 duże inform/order B-size
|
5 duże inform/order B-size
|
||||||
@ -585,11 +585,11 @@
|
|||||||
4 tuna inform/order B-pizza
|
4 tuna inform/order B-pizza
|
||||||
5 XL inform/order B-size
|
5 XL inform/order B-size
|
||||||
|
|
||||||
# text: wezmę 3 pizze tuna, średnią, dużą i bardzo dużą
|
# text: wezmę 3x pizze tuna, średnią, dużą i bardzo dużą
|
||||||
# intent: inform/order
|
# intent: inform/order
|
||||||
# slots:
|
# slots:
|
||||||
1 wezmę inform/order NoLabel
|
1 wezmę inform/order NoLabel
|
||||||
2 3 inform/order B-quantity
|
2 3x inform/order B-quantity
|
||||||
3 pizze inform/order B-food
|
3 pizze inform/order B-food
|
||||||
4 tuna, inform/order B-pizza
|
4 tuna, inform/order B-pizza
|
||||||
5 średnią, inform/order B-size
|
5 średnią, inform/order B-size
|
||||||
@ -825,6 +825,14 @@
|
|||||||
1 jakie request/ingredients NoLabel
|
1 jakie request/ingredients NoLabel
|
||||||
2 składniki request/ingredients NoLabel
|
2 składniki request/ingredients NoLabel
|
||||||
|
|
||||||
|
# text: co jest na pizzy
|
||||||
|
# intent: request/ingredients
|
||||||
|
# slots:
|
||||||
|
1 co request/ingredients NoLabel
|
||||||
|
2 jest request/ingredients NoLabel
|
||||||
|
3 na request/ingredients NoLabel
|
||||||
|
4 pizzy request/ingredients NoLabel
|
||||||
|
|
||||||
# text: jakie są napoje
|
# text: jakie są napoje
|
||||||
# intent: request/drinks
|
# intent: request/drinks
|
||||||
# slots:
|
# slots:
|
||||||
@ -850,3 +858,54 @@
|
|||||||
2 macie request/drinks NoLabel
|
2 macie request/drinks NoLabel
|
||||||
3 do request/drinks NoLabel
|
3 do request/drinks NoLabel
|
||||||
4 picia request/drinks NoLabel
|
4 picia request/drinks NoLabel
|
||||||
|
|
||||||
|
# text: czy są dostępne jakieś sosy?
|
||||||
|
# intent: request/sauce
|
||||||
|
# slots:
|
||||||
|
1 czy request/sauce NoLabel
|
||||||
|
2 są request/sauce NoLabel
|
||||||
|
3 dostępne request/sauce NoLabel
|
||||||
|
4 jakieś request/sauce NoLabel
|
||||||
|
5 sosy? request/sauce NoLabel
|
||||||
|
|
||||||
|
# text: Grzegorz Pieczarski
|
||||||
|
# intent: inform/name
|
||||||
|
# slots:
|
||||||
|
1 Grzegorz inform/name B-name
|
||||||
|
2 Pieczarski inform/name I-name
|
||||||
|
|
||||||
|
# text: Sergiusz Kaczmarek
|
||||||
|
# intent: inform/name
|
||||||
|
# slots:
|
||||||
|
1 Sergiusz inform/name B-name
|
||||||
|
2 Kaczmarek inform/name I-name
|
||||||
|
|
||||||
|
# text: jaki koszt dowozu
|
||||||
|
# intent: request/delivery-price
|
||||||
|
# slots:
|
||||||
|
1 jaki request/delivery-price NoLabel
|
||||||
|
2 koszt request/delivery-price NoLabel
|
||||||
|
3 dowozu request/delivery-price NoLabel
|
||||||
|
|
||||||
|
# text: jakie sosy w menu?
|
||||||
|
# intent: request/sauce
|
||||||
|
# slots:
|
||||||
|
1 jakie request/sauce NoLabel
|
||||||
|
2 sosy request/sauce NoLabel
|
||||||
|
3 w request/sauce NoLabel
|
||||||
|
4 menu? request/sauce NoLabel
|
||||||
|
|
||||||
|
# text: Napój pepsi i cola
|
||||||
|
# intent: inform/order
|
||||||
|
# slots:
|
||||||
|
1 Napój inform/order NoLabel
|
||||||
|
2 pepsi inform/order B-drink
|
||||||
|
3 i inform/order NoLabel
|
||||||
|
4 cola inform/order B-drink
|
||||||
|
|
||||||
|
# text: woda i sok
|
||||||
|
# intent: inform/order
|
||||||
|
# slots:
|
||||||
|
1 woda inform/order B-drink
|
||||||
|
2 i inform/order NoLabel
|
||||||
|
3 sok inform/order B-drink
|
106
evaluate.py
106
evaluate.py
@ -1,45 +1,63 @@
|
|||||||
import re
|
import re
|
||||||
import os
|
import os
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from nlu_utils import predict_multiple
|
from nlu_utils import predict_multiple
|
||||||
from flair.models import SequenceTagger
|
from flair.models import SequenceTagger
|
||||||
|
from conllu import parse_incr
|
||||||
def __parse_acts(acts):
|
from flair.data import Corpus
|
||||||
acts_split = acts.split('&')
|
from nlu_utils import conllu2flair, nolabel2o
|
||||||
remove_slot_regex = "[\(\[].*?[\)\]]"
|
|
||||||
return set(re.sub(remove_slot_regex, "", act) for act in acts_split)
|
# Frame model evaluation
|
||||||
|
frame_model = SequenceTagger.load('frame-model-prod/best-model.pt')
|
||||||
def __parse_predictions(predictions):
|
with open('data/test_dialog_46.conllu', encoding='utf-8') as trainfile:
|
||||||
return set(prediction.split('/')[0] for prediction in predictions)
|
testset = list(parse_incr(trainfile, fields=['id', 'form', 'frame', 'slot'], field_parsers={}))
|
||||||
|
|
||||||
# Exploratory tests
|
corpus = Corpus(test=conllu2flair(testset, "frame"))
|
||||||
frame_model = SequenceTagger.load('frame-model-prod/best-model.pt')
|
result = frame_model.evaluate(corpus.test, mini_batch_size=1, gold_label_type="frame")
|
||||||
# slot_model = SequenceTagger.load('slot-model-prod/final-model.pt')
|
print(result.detailed_results)
|
||||||
|
|
||||||
total_acts = 0
|
# Slot model evaluation
|
||||||
act_correct_predictions = 0
|
slot_model = SequenceTagger.load('slot-model-prod/best-model.pt')
|
||||||
slot_correct_predictions = 0
|
|
||||||
|
with open('data/test_dialog_46.conllu', encoding='utf-8') as trainfile:
|
||||||
for file_name in os.listdir('data'):
|
testset = list(parse_incr(trainfile, fields=['id', 'form', 'frame', 'slot'], field_parsers={'slot': nolabel2o}))
|
||||||
if file_name.split('.')[-1] != 'tsv':
|
|
||||||
continue
|
corpus = Corpus(test=conllu2flair(testset, "slot"))
|
||||||
|
result = slot_model.evaluate(corpus.test, mini_batch_size=8, gold_label_type="slot")
|
||||||
df = pd.read_csv(f'data/{file_name}', sep='\t', names=['kto', 'treść', 'akt'])
|
print(result.detailed_results)
|
||||||
df = df[df.kto == 'user']
|
|
||||||
all_data = np.array(df)
|
# Custom evaluation
|
||||||
|
def __parse_acts(acts):
|
||||||
for row in all_data:
|
acts_split = acts.split('&')
|
||||||
sentence = row[1]
|
remove_slot_regex = "[\(\[].*?[\)\]]"
|
||||||
acts = __parse_acts(row[2])
|
return set(re.sub(remove_slot_regex, "", act) for act in acts_split)
|
||||||
|
|
||||||
predictions_raw = predict_multiple(frame_model, sentence.split(), 'frame')
|
def __parse_predictions(predictions):
|
||||||
predictions = __parse_predictions(predictions_raw)
|
return set(prediction.split('/')[0] for prediction in predictions)
|
||||||
|
|
||||||
for act in acts:
|
total_acts = 0
|
||||||
total_acts += 1
|
act_correct_predictions = 0
|
||||||
if act in predictions:
|
slot_correct_predictions = 0
|
||||||
act_correct_predictions += 1
|
|
||||||
|
for file_name in os.listdir('data'):
|
||||||
|
if file_name.split('.')[-1] != 'tsv':
|
||||||
|
continue
|
||||||
|
|
||||||
|
df = pd.read_csv(f'data/{file_name}', sep='\t', names=['kto', 'treść', 'akt'])
|
||||||
|
df = df[df.kto == 'user']
|
||||||
|
all_data = np.array(df)
|
||||||
|
|
||||||
|
for row in all_data:
|
||||||
|
sentence = row[1]
|
||||||
|
acts = __parse_acts(row[2])
|
||||||
|
|
||||||
|
predictions_raw = predict_multiple(frame_model, sentence.split(), 'frame')
|
||||||
|
predictions = __parse_predictions(predictions_raw)
|
||||||
|
|
||||||
|
for act in acts:
|
||||||
|
total_acts += 1
|
||||||
|
if act in predictions:
|
||||||
|
act_correct_predictions += 1
|
||||||
|
|
||||||
print(f"Accuracy - predicting acts: {(act_correct_predictions / total_acts)*100} ({act_correct_predictions}/{total_acts})")
|
print(f"Accuracy - predicting acts: {(act_correct_predictions / total_acts)*100} ({act_correct_predictions}/{total_acts})")
|
58
nlu_tests.py
58
nlu_tests.py
@ -1,30 +1,30 @@
|
|||||||
from flair.models import SequenceTagger
|
from flair.models import SequenceTagger
|
||||||
from nlu_utils import predict_single, predict_multiple, predict_and_annotate
|
from nlu_utils import predict_single, predict_multiple, predict_and_annotate
|
||||||
|
|
||||||
# Exploratory tests
|
# Exploratory tests
|
||||||
frame_model = SequenceTagger.load('frame-model/best-model.pt')
|
frame_model = SequenceTagger.load('frame-model/best-model.pt')
|
||||||
tests = [
|
tests = [
|
||||||
'chciałbym zamówić pizzę',
|
'chciałbym zamówić pizzę',
|
||||||
'na godzinę 12',
|
'na godzinę 12',
|
||||||
'prosiłbym o pizzę z pieczarkami',
|
'prosiłbym o pizzę z pieczarkami',
|
||||||
'to wszystko, jaka cena?',
|
'to wszystko, jaka cena?',
|
||||||
'ile kosztuje pizza',
|
'ile kosztuje pizza',
|
||||||
'do widzenia',
|
'do widzenia',
|
||||||
'tak',
|
'tak',
|
||||||
'nie dziękuję',
|
'nie dziękuję',
|
||||||
'dodatkowy ser',
|
'dodatkowy ser',
|
||||||
'pizzę barcelona bez cebuli',
|
'pizzę barcelona bez cebuli',
|
||||||
]
|
]
|
||||||
|
|
||||||
# print("=== Exploratory tests - frame model ===")
|
# print("=== Exploratory tests - frame model ===")
|
||||||
for test in tests:
|
for test in tests:
|
||||||
print(f"Sentence: {test}")
|
print(f"Sentence: {test}")
|
||||||
print(f"Single prediction: {predict_single(frame_model, test.split(), 'frame')}")
|
print(f"Single prediction: {predict_single(frame_model, test.split(), 'frame')}")
|
||||||
print(f"Multiple predictions: {predict_multiple(frame_model, test.split(), 'frame')}")
|
print(f"Multiple predictions: {predict_multiple(frame_model, test.split(), 'frame')}")
|
||||||
print(f"Annotated sentence: {predict_and_annotate(frame_model, test.split(), 'frame')}")
|
print(f"Annotated sentence: {predict_and_annotate(frame_model, test.split(), 'frame')}")
|
||||||
|
|
||||||
print("=== Exploratory tests - slot model ===")
|
print("=== Exploratory tests - slot model ===")
|
||||||
slot_model = SequenceTagger.load('slot-model/final-model.pt')
|
slot_model = SequenceTagger.load('slot-model/final-model.pt')
|
||||||
for test in tests:
|
for test in tests:
|
||||||
print(f"Sentence: {test}")
|
print(f"Sentence: {test}")
|
||||||
print(f"Prediction: {predict_and_annotate(slot_model, test.split(), 'slot')}")
|
print(f"Prediction: {predict_and_annotate(slot_model, test.split(), 'slot')}")
|
88
nlu_train.py
88
nlu_train.py
@ -1,46 +1,42 @@
|
|||||||
from conllu import parse_incr
|
from conllu import parse_incr
|
||||||
from flair.data import Corpus
|
from flair.data import Corpus
|
||||||
from flair.embeddings import StackedEmbeddings
|
from flair.embeddings import StackedEmbeddings
|
||||||
from flair.embeddings import WordEmbeddings
|
from flair.embeddings import WordEmbeddings
|
||||||
from flair.embeddings import CharacterEmbeddings
|
from flair.embeddings import CharacterEmbeddings
|
||||||
from flair.embeddings import FlairEmbeddings
|
from flair.embeddings import FlairEmbeddings
|
||||||
from flair.models import SequenceTagger
|
from flair.models import SequenceTagger
|
||||||
from flair.trainers import ModelTrainer
|
from flair.trainers import ModelTrainer
|
||||||
from nlu_utils import conllu2flair, nolabel2o
|
from nlu_utils import conllu2flair, nolabel2o
|
||||||
|
|
||||||
import random
|
import torch
|
||||||
import torch
|
if torch.cuda.is_available():
|
||||||
random.seed(42)
|
torch.backends.cudnn.enabled = False
|
||||||
torch.manual_seed(42)
|
torch.backends.cudnn.benchmark = False
|
||||||
|
torch.backends.cudnn.deterministic = True
|
||||||
if torch.cuda.is_available():
|
|
||||||
torch.cuda.manual_seed(0)
|
def train_model(label_type, field_parsers = {}):
|
||||||
torch.cuda.manual_seed_all(0)
|
with open('data/train_dialog.conllu', encoding='utf-8') as f:
|
||||||
torch.backends.cudnn.enabled = False
|
trainset = list(parse_incr(f, fields=['id', 'form', 'frame', 'slot'], field_parsers=field_parsers))
|
||||||
torch.backends.cudnn.benchmark = False
|
with open('data/test_dialog_46.conllu', encoding='utf-8') as f:
|
||||||
torch.backends.cudnn.deterministic = True
|
testset = list(parse_incr(f, fields=['id', 'form', 'frame', 'slot'], field_parsers=field_parsers))
|
||||||
|
|
||||||
|
breakpoint()
|
||||||
def train_model(label_type, field_parsers = {}):
|
corpus = Corpus(train=conllu2flair(trainset, label_type), test=conllu2flair(testset, label_type))
|
||||||
with open('data/train_dialog.conllu', encoding='utf-8') as trainfile:
|
label_dictionary = corpus.make_label_dictionary(label_type=label_type)
|
||||||
trainset = list(parse_incr(trainfile, fields=['id', 'form', 'frame', 'slot'], field_parsers=field_parsers))
|
|
||||||
|
embedding_types = [
|
||||||
corpus = Corpus(train=conllu2flair(trainset, label_type), test=conllu2flair(trainset, label_type))
|
WordEmbeddings('pl'),
|
||||||
label_dictionary = corpus.make_label_dictionary(label_type=label_type)
|
FlairEmbeddings('pl-forward'),
|
||||||
|
FlairEmbeddings('pl-backward'),
|
||||||
embedding_types = [
|
CharacterEmbeddings(),
|
||||||
WordEmbeddings('pl'),
|
]
|
||||||
FlairEmbeddings('pl-forward'),
|
|
||||||
FlairEmbeddings('pl-backward'),
|
embeddings = StackedEmbeddings(embeddings=embedding_types)
|
||||||
CharacterEmbeddings(),
|
tagger = SequenceTagger(hidden_size=256, embeddings=embeddings, tag_dictionary=label_dictionary, tag_type=label_type, use_crf=True, tag_format="BIO")
|
||||||
]
|
|
||||||
|
frame_trainer = ModelTrainer(tagger, corpus)
|
||||||
embeddings = StackedEmbeddings(embeddings=embedding_types)
|
frame_trainer.train(f'{label_type}-model', learning_rate=0.1, mini_batch_size=16, max_epochs=75, train_with_dev=False)
|
||||||
tagger = SequenceTagger(hidden_size=256, embeddings=embeddings, tag_dictionary=label_dictionary, tag_type=label_type, use_crf=True, tag_format="BIO")
|
|
||||||
|
if __name__ == '__main__':
|
||||||
frame_trainer = ModelTrainer(tagger, corpus)
|
train_model("frame")
|
||||||
frame_trainer.train(f'{label_type}-model', learning_rate=0.1, mini_batch_size=32, max_epochs=75, train_with_dev=False)
|
# train_model('slot', field_parsers={'slot': nolabel2o})
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
train_model("frame")
|
|
||||||
train_model('slot', field_parsers={'slot': nolabel2o})
|
|
199
nlu_utils.py
199
nlu_utils.py
@ -1,100 +1,101 @@
|
|||||||
from flair.data import Sentence
|
from flair.data import Sentence
|
||||||
from flair.datasets import FlairDatapointDataset
|
from flair.datasets import FlairDatapointDataset
|
||||||
|
|
||||||
def nolabel2o(line, i):
|
def nolabel2o(line, i):
|
||||||
return 'O' if line[i] == 'NoLabel' else line[i]
|
return 'O' if line[i] == 'NoLabel' else line[i]
|
||||||
|
|
||||||
def conllu2flair(sentences, label=None):
|
def conllu2flair(sentences, label=None):
|
||||||
if label == "frame":
|
if label == "frame":
|
||||||
return conllu2flair_frame(sentences, label)
|
return conllu2flair_frame(sentences, label)
|
||||||
else:
|
else:
|
||||||
return conllu2flair_slot(sentences, label)
|
return conllu2flair_slot(sentences, label)
|
||||||
|
|
||||||
def conllu2flair_frame(sentences, label=None):
|
def conllu2flair_frame(sentences, label=None):
|
||||||
fsentences = []
|
fsentences = []
|
||||||
for sentence in sentences:
|
for sentence in sentences:
|
||||||
tokens = [token["form"] for token in sentence]
|
tokens = [token["form"] for token in sentence]
|
||||||
fsentence = Sentence(' '.join(tokens), use_tokenizer=False)
|
fsentence = Sentence(' '.join(tokens), use_tokenizer=False)
|
||||||
|
|
||||||
for i in range(len(fsentence)):
|
for i in range(len(fsentence)):
|
||||||
fsentence[i:i+1].add_label(label, sentence[i][label])
|
fsentence[i:i+1].add_label(label, sentence[i][label])
|
||||||
|
|
||||||
fsentences.append(fsentence)
|
fsentences.append(fsentence)
|
||||||
|
|
||||||
return FlairDatapointDataset(fsentences)
|
return FlairDatapointDataset(fsentences)
|
||||||
|
|
||||||
def conllu2flair_slot(sentences, label=None):
|
def conllu2flair_slot(sentences, label=None):
|
||||||
fsentences = []
|
fsentences = []
|
||||||
|
for sentence in sentences:
|
||||||
for sentence in sentences:
|
fsentence = Sentence(' '.join(token['form'] for token in sentence), use_tokenizer=False)
|
||||||
fsentence = Sentence(' '.join(token['form'] for token in sentence), use_tokenizer=False)
|
start_idx = None
|
||||||
start_idx = None
|
end_idx = None
|
||||||
end_idx = None
|
tag = None
|
||||||
tag = None
|
|
||||||
|
if label:
|
||||||
if label:
|
for idx, (token, ftoken) in enumerate(zip(sentence, fsentence)):
|
||||||
for idx, (token, ftoken) in enumerate(zip(sentence, fsentence)):
|
if token[label].startswith('B-'):
|
||||||
if token[label].startswith('B-'):
|
if start_idx is not None:
|
||||||
start_idx = idx
|
fsentence[start_idx:end_idx+1].add_label(label, tag)
|
||||||
end_idx = idx
|
start_idx = idx
|
||||||
tag = token[label][2:]
|
end_idx = idx
|
||||||
elif token[label].startswith('I-'):
|
tag = token[label][2:]
|
||||||
end_idx = idx
|
elif token[label].startswith('I-'):
|
||||||
elif token[label] == 'O':
|
end_idx = idx
|
||||||
if start_idx is not None:
|
elif token[label] == 'O':
|
||||||
fsentence[start_idx:end_idx+1].add_label(label, tag)
|
if start_idx is not None:
|
||||||
start_idx = None
|
fsentence[start_idx:end_idx+1].add_label(label, tag)
|
||||||
end_idx = None
|
start_idx = None
|
||||||
tag = None
|
end_idx = None
|
||||||
|
tag = None
|
||||||
if start_idx is not None:
|
|
||||||
fsentence[start_idx:end_idx+1].add_label(label, tag)
|
if start_idx is not None:
|
||||||
|
fsentence[start_idx:end_idx+1].add_label(label, tag)
|
||||||
fsentences.append(fsentence)
|
|
||||||
return FlairDatapointDataset(fsentences)
|
fsentences.append(fsentence)
|
||||||
|
return FlairDatapointDataset(fsentences)
|
||||||
def __predict(model, csentence):
|
|
||||||
fsentence = conllu2flair([csentence])[0]
|
def __predict(model, csentence):
|
||||||
model.predict(fsentence)
|
fsentence = conllu2flair([csentence])[0]
|
||||||
return fsentence
|
model.predict(fsentence)
|
||||||
|
return fsentence
|
||||||
def __csentence(sentence, label_type):
|
|
||||||
if label_type == "frame":
|
def __csentence(sentence, label_type):
|
||||||
return [{'form': word } for word in sentence]
|
if label_type == "frame":
|
||||||
else:
|
return [{'form': word } for word in sentence]
|
||||||
return [{'form': word, 'slot': 'O'} for word in sentence]
|
else:
|
||||||
|
return [{'form': word, 'slot': 'O'} for word in sentence]
|
||||||
def predict_single(model, sentence, label_type):
|
|
||||||
csentence = __csentence(sentence, label_type)
|
def predict_single(model, sentence, label_type):
|
||||||
fsentence = __predict(model, csentence)
|
csentence = __csentence(sentence, label_type)
|
||||||
intent = {}
|
fsentence = __predict(model, csentence)
|
||||||
|
intent = {}
|
||||||
for span in fsentence.get_spans(label_type):
|
|
||||||
tag = span.get_label(label_type).value
|
for span in fsentence.get_spans(label_type):
|
||||||
if tag in intent:
|
tag = span.get_label(label_type).value
|
||||||
intent[tag] += 1
|
if tag in intent:
|
||||||
else:
|
intent[tag] += 1
|
||||||
intent[tag] = 1
|
else:
|
||||||
|
intent[tag] = 1
|
||||||
return max(intent, key=intent.get)
|
|
||||||
|
return max(intent, key=intent.get)
|
||||||
def predict_multiple(model, sentence, label_type):
|
|
||||||
csentence = __csentence(sentence, label_type)
|
def predict_multiple(model, sentence, label_type):
|
||||||
fsentence = __predict(model, csentence)
|
csentence = __csentence(sentence, label_type)
|
||||||
|
fsentence = __predict(model, csentence)
|
||||||
return set(span.get_label(label_type).value for span in fsentence.get_spans(label_type))
|
|
||||||
|
return set(span.get_label(label_type).value for span in fsentence.get_spans(label_type))
|
||||||
def predict_and_annotate(model, sentence, label_type):
|
|
||||||
csentence = __csentence(sentence, label_type)
|
def predict_and_annotate(model, sentence, label_type):
|
||||||
fsentence = __predict(model, csentence)
|
csentence = __csentence(sentence, label_type)
|
||||||
|
fsentence = __predict(model, csentence)
|
||||||
for span in fsentence.get_spans(label_type):
|
|
||||||
tag = span.get_label(label_type).value
|
for span in fsentence.get_spans(label_type):
|
||||||
if label_type == "frame":
|
tag = span.get_label(label_type).value
|
||||||
csentence[span.tokens[0].idx-1]['frame'] = tag
|
if label_type == "frame":
|
||||||
else:
|
csentence[span.tokens[0].idx-1]['frame'] = tag
|
||||||
csentence[span.tokens[0].idx - 1]['slot'] = f'B-{tag}'
|
else:
|
||||||
for token in span.tokens[1:]:
|
csentence[span.tokens[0].idx - 1]['slot'] = f'B-{tag}'
|
||||||
csentence[token.idx - 1]['slot'] = f'I-{tag}'
|
for token in span.tokens[1:]:
|
||||||
|
csentence[token.idx - 1]['slot'] = f'I-{tag}'
|
||||||
|
|
||||||
return csentence
|
return csentence
|
Loading…
Reference in New Issue
Block a user