34 lines
829 B
Python
34 lines
829 B
Python
|
from transformers import pipeline
|
||
|
import re
|
||
|
|
||
|
pipe = pipeline('text-classification', model="jagiyahh/simple-polish-stylistic-errors", tokenizer = 'dkleczek/bert-base-polish-uncased-v1')
|
||
|
|
||
|
def style_prediction(data):
|
||
|
result = pipe(data)
|
||
|
|
||
|
return result
|
||
|
|
||
|
def clear_data(data):
|
||
|
data = [re.sub(r"[^A-Za-zżźćńółęąśŻŹĆĄŚĘŁÓŃ ']+", r"", i) for i in data['sentences']]
|
||
|
data = [x for x in data if x != '']
|
||
|
data = [i.strip() for i in data]
|
||
|
data = [i.lower() for i in data]
|
||
|
|
||
|
return data
|
||
|
|
||
|
def count_predictions(predictions):
|
||
|
l0 = 0
|
||
|
l1 = 0
|
||
|
all = {}
|
||
|
|
||
|
for i in predictions:
|
||
|
if i['label'] == 'LABEL_0':
|
||
|
l0 += 1
|
||
|
if i['label'] == 'LABEL_1':
|
||
|
l1 += 1
|
||
|
|
||
|
all['stylistically_positive'] = l0
|
||
|
all['stylistically_negative'] = l1
|
||
|
|
||
|
return all
|