add scrapper for comments, endpoint for scrapping

2023-05-30 00:00:34 +02:00 · 2023-05-30 00:00:34 +02:00 · 081b161ca2
commit 081b161ca2
parent e4786e2027
2 changed files with 18 additions and 3 deletions
--- a/application/functions/sentiment.py
+++ b/application/functions/sentiment.py
@ -1,6 +1,7 @@
 from transformers import AutoTokenizer
 from transformers import pipeline
 import re
 from facebook_scraper import get_posts
 model = 'application/models/sentiment_model'
 tokenizer = AutoTokenizer.from_pretrained('application/tokenizers/sentiment_tokenizer')
@ -36,3 +37,10 @@ def count_predictions(predictions):
    all['neutral'] = l2
    return all
 def scrapp_comments(url):
    result = []
    for post in get_posts(post_urls=[url], options={"allow_extra_requests": False, "comments":True, "extra_info":True}):
        for comment in post['comments_full']:
            result.append(comment['comment_text'])
    return result
--- a/application/services/sentiment_service.py
+++ b/application/services/sentiment_service.py
@ -3,7 +3,7 @@ from flask import(
    jsonify, 
    Blueprint,
    )
-from application.functions.sentiment import sentiment_prediction, count_predictions, clear_data
+from application.functions.sentiment import sentiment_prediction, count_predictions, clear_data, scrapp_comments
 sentiment_service = Blueprint("sentiment_service", __name__)    
@ -19,3 +19,10 @@ def get_data():
        predicitons[i]['label'] = predicitons[i]['label'][6:]
    return jsonify({"predictions": predicitons, "count_labels": count_labels})
@sentiment_service.route("/scrapp_comments", methods=['POST'])
 def scrapp():
    url = request.get_json()
    comments = scrapp_comments(url['link'])
    return jsonify({'comments':comments})