2023-06-10 01:51:01 +02:00
|
|
|
from transformers import pipeline
|
|
|
|
|
|
|
|
pipe = pipeline('text-classification', model="olczig/irony-polish-detection", tokenizer = "olczig/irony-polish-detection")
|
|
|
|
|
|
|
|
def irony_prediction(data):
|
|
|
|
result = pipe(data)
|
|
|
|
return result
|
|
|
|
|
|
|
|
def clear_data(data):
|
2023-06-11 14:49:02 +02:00
|
|
|
data = [i.replace('#','').
|
|
|
|
replace('@','').
|
|
|
|
replace('\uf8ff','').
|
|
|
|
replace('\t','').
|
|
|
|
replace('\"','').
|
|
|
|
replace('\U000fe329','').
|
|
|
|
replace('\U000fe35b','').
|
|
|
|
replace('\U000fe4ef','').
|
|
|
|
replace('\U000fe341','')
|
|
|
|
for i in data['sentences']]
|
|
|
|
return data
|
|
|
|
|
|
|
|
def count_predictions(predictions):
|
|
|
|
l0 = 0
|
|
|
|
l1 = 0
|
|
|
|
all = {}
|
|
|
|
|
|
|
|
for i in predictions:
|
|
|
|
if i['label'] == 'LABEL_0':
|
|
|
|
l0 += 1
|
|
|
|
if i['label'] == 'LABEL_1':
|
|
|
|
l1 += 1
|
|
|
|
|
|
|
|
all['irony'] = l1
|
2023-06-11 16:20:44 +02:00
|
|
|
all['non_irony'] = l0
|
2023-06-11 14:49:02 +02:00
|
|
|
|
|
|
|
return all
|