from transformers import pipeline
import re

pipe = pipeline('text-classification', model="jagiyahh/simple-polish-stylistic-errors", tokenizer = 'dkleczek/bert-base-polish-uncased-v1')

def style_prediction(data):
    result = pipe(data)
    
    return result

def clear_data(data):
    data = [re.sub(r"[^A-Za-zżźćńółęąśŻŹĆĄŚĘŁÓŃ ']+", r"", i) for i in data['sentences']]
    data = [x for x in data if x != '']
    data = [i.strip() for i in data]
    data = [i.lower() for i in data]

    return data

def count_predictions(predictions):
    l0 = 0
    l1 = 0
    all = {}

    for i in predictions:
        if i['label'] == 'LABEL_0':
            l0 += 1
        if i['label'] == 'LABEL_1':
            l1 += 1

    all['stylistically_positive'] = l0
    all['stylistically_negative'] = l1

    return all