48 lines
1.6 KiB
Python
48 lines
1.6 KiB
Python
|
import pandas as pd
|
||
|
from tabulate import tabulate
|
||
|
from flair.data import Sentence, Token
|
||
|
from flair.datasets import SentenceDataset
|
||
|
from flair.models import SequenceTagger
|
||
|
|
||
|
def conllu2flair(sentences, label=None):
|
||
|
fsentences = []
|
||
|
for sentence in sentences:
|
||
|
fsentence = Sentence()
|
||
|
for token in sentence:
|
||
|
ftoken = Token(token['form'])
|
||
|
if label:
|
||
|
ftoken.add_tag(label, token[label])
|
||
|
fsentence.add_token(ftoken)
|
||
|
fsentences.append(fsentence)
|
||
|
return SentenceDataset(fsentences)
|
||
|
|
||
|
def predict(frame_model, sentence):
|
||
|
csentence = [{'form': word} for word in sentence]
|
||
|
fsentence = conllu2flair([csentence])[0]
|
||
|
frame_model.predict(fsentence)
|
||
|
possible_intents = {}
|
||
|
for token in fsentence:
|
||
|
for intent in token.annotation_layers["frame"]:
|
||
|
if(intent.value in possible_intents):
|
||
|
possible_intents[intent.value] += intent.score
|
||
|
else:
|
||
|
possible_intents[intent.value] = intent.score
|
||
|
return max(possible_intents)
|
||
|
|
||
|
frame_model = SequenceTagger.load('frame-model/final-model.pt')
|
||
|
data = []
|
||
|
with open('data.tsv') as f:
|
||
|
lines = f.readlines()
|
||
|
|
||
|
for line in lines[1:]:
|
||
|
data.append((line.split("\t")[0], line.split("\t")[1]))
|
||
|
|
||
|
correct = 0
|
||
|
for sentence in data:
|
||
|
predicted_intent = predict(frame_model, sentence[0].split())
|
||
|
if predicted_intent == sentence[1].replace('\n',''):
|
||
|
correct+=1
|
||
|
else:
|
||
|
print(predicted_intent + " != " + sentence[1].replace('\n',''))
|
||
|
|
||
|
print(f"{correct/len(data)} {correct}/{len(data)}")
|