from transformers import pipeline import re pipe = pipeline('text-classification', model="jagiyahh/simple-polish-stylistic-errors", tokenizer = 'dkleczek/bert-base-polish-uncased-v1') def style_prediction(data): result = pipe(data) return result def clear_data(data): data = [re.sub(r"[^A-Za-zżźćńółęąśŻŹĆĄŚĘŁÓŃ ']+", r"", i) for i in data['sentences']] data = [x for x in data if x != ''] data = [i.strip() for i in data] data = [i.lower() for i in data] return data def count_predictions(predictions): l0 = 0 l1 = 0 all = {} for i in predictions: if i['label'] == 'LABEL_0': l0 += 1 if i['label'] == 'LABEL_1': l1 += 1 all['stylistically_positive'] = l0 all['stylistically_negative'] = l1 return all