exact_data2023/application/functions/style.py

34 lines
829 B
Python
Raw Normal View History

2023-06-14 00:03:39 +02:00
from transformers import pipeline
import re
pipe = pipeline('text-classification', model="jagiyahh/simple-polish-stylistic-errors", tokenizer = 'dkleczek/bert-base-polish-uncased-v1')
def style_prediction(data):
result = pipe(data)
return result
def clear_data(data):
data = [re.sub(r"[^A-Za-zżźćńółęąśŻŹĆĄŚĘŁÓŃ ']+", r"", i) for i in data['sentences']]
data = [x for x in data if x != '']
data = [i.strip() for i in data]
data = [i.lower() for i in data]
return data
def count_predictions(predictions):
l0 = 0
l1 = 0
all = {}
for i in predictions:
if i['label'] == 'LABEL_0':
l0 += 1
if i['label'] == 'LABEL_1':
l1 += 1
all['stylistically_positive'] = l0
all['stylistically_negative'] = l1
return all