exact_data2023/application/functions/sentiment.py

51 lines
1.3 KiB
Python
Raw Permalink Normal View History

2023-05-31 16:56:13 +02:00
from transformers import pipeline
import re
from facebook_scraper import get_posts
2023-05-27 15:10:30 +02:00
2023-05-31 16:56:13 +02:00
pipe = pipeline('text-classification', model="Scigi/sentiment-analysis-model", tokenizer = "Scigi/sentiment-analysis-model")
2023-05-27 15:10:30 +02:00
def sentiment_prediction(data):
result = pipe(data)
2023-05-31 16:56:13 +02:00
2023-05-27 16:44:44 +02:00
return result
def clear_data(data):
data = [re.sub(r"[^A-Za-zżźćńółęąśŻŹĆĄŚĘŁÓŃ ']+", r"", i) for i in data['sentences']]
2023-05-30 13:57:44 +02:00
data = [x for x in data if x != '']
data = [i.strip() for i in data]
data = [i.lower() for i in data]
2023-05-31 16:56:13 +02:00
return data
2023-05-27 16:44:44 +02:00
def count_predictions(predictions):
l0 = 0
l1 = 0
l2 = 0
all = {}
for i in predictions:
if i['label'] == 'LABEL_0':
l0 += 1
if i['label'] == 'LABEL_1':
l1 += 1
if i['label'] == 'LABEL_2':
l2 += 1
all['positive'] = l1
all['negative'] = l0
all['neutral'] = l2
return all
def scrapp_comments(url):
2023-05-30 00:47:35 +02:00
comments= []
all = {}
2023-06-01 22:39:26 +02:00
for post in get_posts(post_urls=[url], options={"allow_extra_requests": False, "comments":True, "extra_info":True}):
2023-05-30 00:47:35 +02:00
text_post = post['text']
for comment in post['comments_full']:
2023-05-30 00:47:35 +02:00
comments.append(comment['comment_text'])
all['post'] = text_post
2023-05-30 13:57:44 +02:00
all['sentences'] = comments
2023-05-31 16:56:13 +02:00
2023-05-30 00:47:35 +02:00
return all