add scrapper for comments, endpoint for scrapping

This commit is contained in:
Maciej Ścigacz 2023-05-30 00:00:34 +02:00
parent e4786e2027
commit 081b161ca2
2 changed files with 18 additions and 3 deletions

View File

@ -1,6 +1,7 @@
from transformers import AutoTokenizer from transformers import AutoTokenizer
from transformers import pipeline from transformers import pipeline
import re import re
from facebook_scraper import get_posts
model = 'application/models/sentiment_model' model = 'application/models/sentiment_model'
tokenizer = AutoTokenizer.from_pretrained('application/tokenizers/sentiment_tokenizer') tokenizer = AutoTokenizer.from_pretrained('application/tokenizers/sentiment_tokenizer')
@ -36,3 +37,10 @@ def count_predictions(predictions):
all['neutral'] = l2 all['neutral'] = l2
return all return all
def scrapp_comments(url):
result = []
for post in get_posts(post_urls=[url], options={"allow_extra_requests": False, "comments":True, "extra_info":True}):
for comment in post['comments_full']:
result.append(comment['comment_text'])
return result

View File

@ -3,7 +3,7 @@ from flask import(
jsonify, jsonify,
Blueprint, Blueprint,
) )
from application.functions.sentiment import sentiment_prediction, count_predictions, clear_data from application.functions.sentiment import sentiment_prediction, count_predictions, clear_data, scrapp_comments
sentiment_service = Blueprint("sentiment_service", __name__) sentiment_service = Blueprint("sentiment_service", __name__)
@ -19,3 +19,10 @@ def get_data():
predicitons[i]['label'] = predicitons[i]['label'][6:] predicitons[i]['label'] = predicitons[i]['label'][6:]
return jsonify({"predictions": predicitons, "count_labels": count_labels}) return jsonify({"predictions": predicitons, "count_labels": count_labels})
@sentiment_service.route("/scrapp_comments", methods=['POST'])
def scrapp():
url = request.get_json()
comments = scrapp_comments(url['link'])
return jsonify({'comments':comments})