add preprocessing of data before predicitons
This commit is contained in:
parent
e43a18241c
commit
e4786e2027
@ -1,9 +1,9 @@
|
|||||||
from transformers import AutoTokenizer
|
from transformers import AutoTokenizer
|
||||||
from transformers import pipeline
|
from transformers import pipeline
|
||||||
|
import re
|
||||||
|
|
||||||
model = 'application/models/sentiment_model'
|
model = 'application/models/sentiment_model'
|
||||||
tokenizer = AutoTokenizer.from_pretrained('application/tokenizers/sentiment_tokenizer')
|
tokenizer = AutoTokenizer.from_pretrained('application/tokenizers/sentiment_tokenizer')
|
||||||
# tokenizer = AutoTokenizer.from_pretrained("sdadas/polish-gpt2-small")
|
|
||||||
|
|
||||||
def sentiment_prediction(data):
|
def sentiment_prediction(data):
|
||||||
pipe = pipeline('text-classification', model=model, tokenizer = tokenizer)
|
pipe = pipeline('text-classification', model=model, tokenizer = tokenizer)
|
||||||
@ -11,6 +11,12 @@ def sentiment_prediction(data):
|
|||||||
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
def clear_data(data):
|
||||||
|
data = [re.sub(r"[^A-Za-zżźćńółęąśŻŹĆĄŚĘŁÓŃ ']+", r"", i) for i in data['sentences']]
|
||||||
|
data = [i.strip() for i in data]
|
||||||
|
data = [i.lower() for i in data]
|
||||||
|
return data
|
||||||
|
|
||||||
def count_predictions(predictions):
|
def count_predictions(predictions):
|
||||||
l0 = 0
|
l0 = 0
|
||||||
l1 = 0
|
l1 = 0
|
||||||
|
@ -3,15 +3,16 @@ from flask import(
|
|||||||
jsonify,
|
jsonify,
|
||||||
Blueprint,
|
Blueprint,
|
||||||
)
|
)
|
||||||
from application.functions.sentiment import sentiment_prediction, count_predictions
|
from application.functions.sentiment import sentiment_prediction, count_predictions, clear_data
|
||||||
|
|
||||||
sentiment_service = Blueprint("sentiment_service", __name__)
|
sentiment_service = Blueprint("sentiment_service", __name__)
|
||||||
|
|
||||||
@sentiment_service.route("/get_sentiment_data", methods=['POST'])
|
@sentiment_service.route("/get_sentiment_data", methods=['POST'])
|
||||||
def get_data():
|
def get_data():
|
||||||
data = request.get_json()
|
data = request.get_json()
|
||||||
predicitons = sentiment_prediction(data['sentences']) #predykcje
|
data_clear = clear_data(data) #czyszczenie danych wejsciowych
|
||||||
count_labels = count_predictions(predicitons) #dane do wykresu
|
predicitons = sentiment_prediction(data_clear) #predykcje
|
||||||
|
count_labels = count_predictions(predicitons) #dane do wykresu
|
||||||
|
|
||||||
for i in range(0, len(predicitons)):
|
for i in range(0, len(predicitons)):
|
||||||
predicitons[i]['sentence'] = data['sentences'][i]
|
predicitons[i]['sentence'] = data['sentences'][i]
|
||||||
|
Loading…
Reference in New Issue
Block a user