add preprocessing of data before predicitons

2023-05-29 23:41:36 +02:00 · 2023-05-29 23:41:36 +02:00 · e4786e2027
commit e4786e2027
parent e43a18241c
2 changed files with 11 additions and 4 deletions
--- a/application/functions/sentiment.py
+++ b/application/functions/sentiment.py
@ -1,9 +1,9 @@
 from transformers import AutoTokenizer
 from transformers import pipeline
 import re
 model = 'application/models/sentiment_model'
 tokenizer = AutoTokenizer.from_pretrained('application/tokenizers/sentiment_tokenizer')
 # tokenizer = AutoTokenizer.from_pretrained("sdadas/polish-gpt2-small")
 def sentiment_prediction(data):
    pipe = pipeline('text-classification', model=model, tokenizer = tokenizer)
@ -11,6 +11,12 @@ def sentiment_prediction(data):
    return result
 def clear_data(data):
    data = [re.sub(r"[^A-Za-zżźćńółęąśŻŹĆĄŚĘŁÓŃ ']+", r"", i) for i in data['sentences']]
    data = [i.strip() for i in data]
    data = [i.lower() for i in data]
    return data
 def count_predictions(predictions):
    l0 = 0
    l1 = 0
--- a/application/services/sentiment_service.py
+++ b/application/services/sentiment_service.py
@ -3,15 +3,16 @@ from flask import(
    jsonify, 
    Blueprint,
    )
-from application.functions.sentiment import sentiment_prediction, count_predictions
+from application.functions.sentiment import sentiment_prediction, count_predictions, clear_data
 sentiment_service = Blueprint("sentiment_service", __name__)    
@sentiment_service.route("/get_sentiment_data", methods=['POST'])
 def get_data():
    data = request.get_json()
-    predicitons = sentiment_prediction(data['sentences']) #predykcje
+    data_clear = clear_data(data)                       #czyszczenie danych wejsciowych
-    count_labels = count_predictions(predicitons) #dane do wykresu
+    predicitons = sentiment_prediction(data_clear)      #predykcje
    count_labels = count_predictions(predicitons)       #dane do wykresu
    for i in range(0, len(predicitons)):
        predicitons[i]['sentence'] = data['sentences'][i]