exact_data2023/application/functions/sentiment.py

54 lines
1.6 KiB
Python
Raw Normal View History

2023-05-27 15:10:30 +02:00
from transformers import AutoTokenizer
from transformers import pipeline, GPT2ForSequenceClassification
import re
from facebook_scraper import get_posts
2023-05-27 15:10:30 +02:00
# model = 'application/models/sentiment_model'
# tokenizer = AutoTokenizer.from_pretrained('application/tokenizers/sentiment_tokenizer')
model = GPT2ForSequenceClassification.from_pretrained("Scigi/sentiment-analysis-model", num_labels=3)
tokenizer = AutoTokenizer.from_pretrained("Scigi/sentiment-analysis-model")
2023-05-27 15:10:30 +02:00
def sentiment_prediction(data):
pipe = pipeline('text-classification', model=model, tokenizer = tokenizer)
result = pipe(data)
2023-05-27 16:44:44 +02:00
return result
def clear_data(data):
data = [re.sub(r"[^A-Za-zżźćńółęąśŻŹĆĄŚĘŁÓŃ ']+", r"", i) for i in data['sentences']]
2023-05-30 13:57:44 +02:00
data = [x for x in data if x != '']
data = [i.strip() for i in data]
data = [i.lower() for i in data]
return data
2023-05-27 16:44:44 +02:00
def count_predictions(predictions):
l0 = 0
l1 = 0
l2 = 0
all = {}
for i in predictions:
if i['label'] == 'LABEL_0':
l0 += 1
if i['label'] == 'LABEL_1':
l1 += 1
if i['label'] == 'LABEL_2':
l2 += 1
all['positive'] = l1
all['negative'] = l0
all['neutral'] = l2
return all
def scrapp_comments(url):
2023-05-30 00:47:35 +02:00
comments= []
all = {}
for post in get_posts(post_urls=[url], options={"allow_extra_requests": False, "comments":True, "extra_info":True}):
2023-05-30 00:47:35 +02:00
text_post = post['text']
for comment in post['comments_full']:
2023-05-30 00:47:35 +02:00
comments.append(comment['comment_text'])
all['post'] = text_post
2023-05-30 13:57:44 +02:00
all['sentences'] = comments
2023-05-30 00:47:35 +02:00
return all