add preprocessing of data before predicitons

This commit is contained in:
Maciej Ścigacz 2023-05-29 23:41:36 +02:00
parent e43a18241c
commit e4786e2027
2 changed files with 11 additions and 4 deletions

View File

@ -1,9 +1,9 @@
from transformers import AutoTokenizer from transformers import AutoTokenizer
from transformers import pipeline from transformers import pipeline
import re
model = 'application/models/sentiment_model' model = 'application/models/sentiment_model'
tokenizer = AutoTokenizer.from_pretrained('application/tokenizers/sentiment_tokenizer') tokenizer = AutoTokenizer.from_pretrained('application/tokenizers/sentiment_tokenizer')
# tokenizer = AutoTokenizer.from_pretrained("sdadas/polish-gpt2-small")
def sentiment_prediction(data): def sentiment_prediction(data):
pipe = pipeline('text-classification', model=model, tokenizer = tokenizer) pipe = pipeline('text-classification', model=model, tokenizer = tokenizer)
@ -11,6 +11,12 @@ def sentiment_prediction(data):
return result return result
def clear_data(data):
data = [re.sub(r"[^A-Za-zżźćńółęąśŻŹĆĄŚĘŁÓŃ ']+", r"", i) for i in data['sentences']]
data = [i.strip() for i in data]
data = [i.lower() for i in data]
return data
def count_predictions(predictions): def count_predictions(predictions):
l0 = 0 l0 = 0
l1 = 0 l1 = 0

View File

@ -3,15 +3,16 @@ from flask import(
jsonify, jsonify,
Blueprint, Blueprint,
) )
from application.functions.sentiment import sentiment_prediction, count_predictions from application.functions.sentiment import sentiment_prediction, count_predictions, clear_data
sentiment_service = Blueprint("sentiment_service", __name__) sentiment_service = Blueprint("sentiment_service", __name__)
@sentiment_service.route("/get_sentiment_data", methods=['POST']) @sentiment_service.route("/get_sentiment_data", methods=['POST'])
def get_data(): def get_data():
data = request.get_json() data = request.get_json()
predicitons = sentiment_prediction(data['sentences']) #predykcje data_clear = clear_data(data) #czyszczenie danych wejsciowych
count_labels = count_predictions(predicitons) #dane do wykresu predicitons = sentiment_prediction(data_clear) #predykcje
count_labels = count_predictions(predicitons) #dane do wykresu
for i in range(0, len(predicitons)): for i in range(0, len(predicitons)):
predicitons[i]['sentence'] = data['sentences'][i] predicitons[i]['sentence'] = data['sentences'][i]