add preprocessing of data before predicitons
This commit is contained in:
parent
e43a18241c
commit
e4786e2027
@ -1,9 +1,9 @@
|
||||
from transformers import AutoTokenizer
|
||||
from transformers import pipeline
|
||||
import re
|
||||
|
||||
model = 'application/models/sentiment_model'
|
||||
tokenizer = AutoTokenizer.from_pretrained('application/tokenizers/sentiment_tokenizer')
|
||||
# tokenizer = AutoTokenizer.from_pretrained("sdadas/polish-gpt2-small")
|
||||
|
||||
def sentiment_prediction(data):
|
||||
pipe = pipeline('text-classification', model=model, tokenizer = tokenizer)
|
||||
@ -11,6 +11,12 @@ def sentiment_prediction(data):
|
||||
|
||||
return result
|
||||
|
||||
def clear_data(data):
|
||||
data = [re.sub(r"[^A-Za-zżźćńółęąśŻŹĆĄŚĘŁÓŃ ']+", r"", i) for i in data['sentences']]
|
||||
data = [i.strip() for i in data]
|
||||
data = [i.lower() for i in data]
|
||||
return data
|
||||
|
||||
def count_predictions(predictions):
|
||||
l0 = 0
|
||||
l1 = 0
|
||||
|
@ -3,15 +3,16 @@ from flask import(
|
||||
jsonify,
|
||||
Blueprint,
|
||||
)
|
||||
from application.functions.sentiment import sentiment_prediction, count_predictions
|
||||
from application.functions.sentiment import sentiment_prediction, count_predictions, clear_data
|
||||
|
||||
sentiment_service = Blueprint("sentiment_service", __name__)
|
||||
|
||||
@sentiment_service.route("/get_sentiment_data", methods=['POST'])
|
||||
def get_data():
|
||||
data = request.get_json()
|
||||
predicitons = sentiment_prediction(data['sentences']) #predykcje
|
||||
count_labels = count_predictions(predicitons) #dane do wykresu
|
||||
data_clear = clear_data(data) #czyszczenie danych wejsciowych
|
||||
predicitons = sentiment_prediction(data_clear) #predykcje
|
||||
count_labels = count_predictions(predicitons) #dane do wykresu
|
||||
|
||||
for i in range(0, len(predicitons)):
|
||||
predicitons[i]['sentence'] = data['sentences'][i]
|
||||
|
Loading…
Reference in New Issue
Block a user