exact_data2023/application/functions/sentiment.py
2023-05-29 23:41:36 +02:00

38 lines
965 B
Python

from transformers import AutoTokenizer
from transformers import pipeline
import re
model = 'application/models/sentiment_model'
tokenizer = AutoTokenizer.from_pretrained('application/tokenizers/sentiment_tokenizer')
def sentiment_prediction(data):
pipe = pipeline('text-classification', model=model, tokenizer = tokenizer)
result = pipe(data)
return result
def clear_data(data):
data = [re.sub(r"[^A-Za-zżźćńółęąśŻŹĆĄŚĘŁÓŃ ']+", r"", i) for i in data['sentences']]
data = [i.strip() for i in data]
data = [i.lower() for i in data]
return data
def count_predictions(predictions):
l0 = 0
l1 = 0
l2 = 0
all = {}
for i in predictions:
if i['label'] == 'LABEL_0':
l0 += 1
if i['label'] == 'LABEL_1':
l1 += 1
if i['label'] == 'LABEL_2':
l2 += 1
all['positive'] = l1
all['negative'] = l0
all['neutral'] = l2
return all