from transformers import AutoTokenizer from transformers import pipeline, GPT2ForSequenceClassification import re from facebook_scraper import get_posts # model = 'application/models/sentiment_model' # tokenizer = AutoTokenizer.from_pretrained('application/tokenizers/sentiment_tokenizer') model = GPT2ForSequenceClassification.from_pretrained("Scigi/sentiment-analysis-model", num_labels=3) tokenizer = AutoTokenizer.from_pretrained("Scigi/sentiment-analysis-model") def sentiment_prediction(data): pipe = pipeline('text-classification', model=model, tokenizer = tokenizer) result = pipe(data) return result def clear_data(data): data = [re.sub(r"[^A-Za-zżźćńółęąśŻŹĆĄŚĘŁÓŃ ']+", r"", i) for i in data['sentences']] data = [x for x in data if x != ''] data = [i.strip() for i in data] data = [i.lower() for i in data] return data def count_predictions(predictions): l0 = 0 l1 = 0 l2 = 0 all = {} for i in predictions: if i['label'] == 'LABEL_0': l0 += 1 if i['label'] == 'LABEL_1': l1 += 1 if i['label'] == 'LABEL_2': l2 += 1 all['positive'] = l1 all['negative'] = l0 all['neutral'] = l2 return all def scrapp_comments(url): comments= [] all = {} for post in get_posts(post_urls=[url], options={"allow_extra_requests": False, "comments":True, "extra_info":True}): text_post = post['text'] for comment in post['comments_full']: comments.append(comment['comment_text']) all['post'] = text_post all['sentences'] = comments return all