55 lines
1.6 KiB
Python
55 lines
1.6 KiB
Python
from transformers import AutoTokenizer
|
|
from transformers import pipeline, GPT2ForSequenceClassification
|
|
import re
|
|
from facebook_scraper import get_posts
|
|
|
|
# model = 'application/models/sentiment_model'
|
|
# tokenizer = AutoTokenizer.from_pretrained('application/tokenizers/sentiment_tokenizer')
|
|
|
|
model = GPT2ForSequenceClassification.from_pretrained("Scigi/sentiment-analysis-model", num_labels=3)
|
|
tokenizer = AutoTokenizer.from_pretrained("Scigi/sentiment-analysis-model")
|
|
|
|
def sentiment_prediction(data):
|
|
pipe = pipeline('text-classification', model=model, tokenizer = tokenizer)
|
|
result = pipe(data)
|
|
|
|
return result
|
|
|
|
def clear_data(data):
|
|
data = [re.sub(r"[^A-Za-zżźćńółęąśŻŹĆĄŚĘŁÓŃ ']+", r"", i) for i in data['sentences']]
|
|
data = [x for x in data if x != '']
|
|
data = [i.strip() for i in data]
|
|
data = [i.lower() for i in data]
|
|
return data
|
|
|
|
def count_predictions(predictions):
|
|
l0 = 0
|
|
l1 = 0
|
|
l2 = 0
|
|
all = {}
|
|
|
|
for i in predictions:
|
|
if i['label'] == 'LABEL_0':
|
|
l0 += 1
|
|
if i['label'] == 'LABEL_1':
|
|
l1 += 1
|
|
if i['label'] == 'LABEL_2':
|
|
l2 += 1
|
|
|
|
all['positive'] = l1
|
|
all['negative'] = l0
|
|
all['neutral'] = l2
|
|
|
|
return all
|
|
|
|
def scrapp_comments(url):
|
|
comments= []
|
|
all = {}
|
|
for post in get_posts(post_urls=[url], options={"allow_extra_requests": False, "comments":True, "extra_info":True}):
|
|
text_post = post['text']
|
|
for comment in post['comments_full']:
|
|
comments.append(comment['comment_text'])
|
|
all['post'] = text_post
|
|
all['sentences'] = comments
|
|
return all
|