From 7c68d0ce7b547bd45f52ee028dd80ea2901cba26 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20=C5=9Acigacz?= Date: Wed, 14 Jun 2023 00:03:39 +0200 Subject: [PATCH] add tone and style services --- application/__init__.py | 4 +++ application/functions/style.py | 33 +++++++++++++++++++++ application/functions/tone.py | 40 ++++++++++++++++++++++++++ application/services/style_services.py | 16 +++++++++++ application/services/tone_services.py | 16 +++++++++++ 5 files changed, 109 insertions(+) create mode 100644 application/functions/style.py create mode 100644 application/functions/tone.py create mode 100644 application/services/style_services.py create mode 100644 application/services/tone_services.py diff --git a/application/__init__.py b/application/__init__.py index 70d39d6..8098b5b 100644 --- a/application/__init__.py +++ b/application/__init__.py @@ -8,9 +8,13 @@ def create_app(): from application.services.sentiment_service import sentiment_service from application.services.errors_service import errors_service from application.services.irony_service import irony_service + from application.services.style_services import style_service + from application.services.tone_services import tone_service application.register_blueprint(sentiment_service) application.register_blueprint(errors_service) application.register_blueprint(irony_service) + application.register_blueprint(style_service) + application.register_blueprint(tone_service) return application \ No newline at end of file diff --git a/application/functions/style.py b/application/functions/style.py new file mode 100644 index 0000000..73ac6a5 --- /dev/null +++ b/application/functions/style.py @@ -0,0 +1,33 @@ +from transformers import pipeline +import re + +pipe = pipeline('text-classification', model="jagiyahh/simple-polish-stylistic-errors", tokenizer = 'dkleczek/bert-base-polish-uncased-v1') + +def style_prediction(data): + result = pipe(data) + + return result + +def clear_data(data): + data = [re.sub(r"[^A-Za-zżźćńółęąśŻŹĆĄŚĘŁÓŃ ']+", r"", i) for i in data['sentences']] + data = [x for x in data if x != ''] + data = [i.strip() for i in data] + data = [i.lower() for i in data] + + return data + +def count_predictions(predictions): + l0 = 0 + l1 = 0 + all = {} + + for i in predictions: + if i['label'] == 'LABEL_0': + l0 += 1 + if i['label'] == 'LABEL_1': + l1 += 1 + + all['stylistically_positive'] = l0 + all['stylistically_negative'] = l1 + + return all diff --git a/application/functions/tone.py b/application/functions/tone.py new file mode 100644 index 0000000..3de5fe4 --- /dev/null +++ b/application/functions/tone.py @@ -0,0 +1,40 @@ +from transformers import BertTokenizer, BertForSequenceClassification +import torch + +model = BertForSequenceClassification.from_pretrained('jagiyahh/simple-polish-tone-recognition') +tokenizer = BertTokenizer.from_pretrained('jagiyahh/simple-polish-tone-recognition') + +labels = ['controversial', 'intriguing', 'formal'] + +def clear_data(data): + data = [i.strip() for i in data] + data = [i.lower() for i in data] + + return data + + +def predict_labels(texts): + encodings = tokenizer(texts, truncation=True, padding=True, return_tensors='pt') + input_ids = encodings['input_ids'] + attention_mask = encodings['attention_mask'] + + with torch.no_grad(): + outputs = model(input_ids, attention_mask=attention_mask) + logits = outputs.logits + + probabilities = torch.sigmoid(logits) + + threshold = 0.5 + predictions = (probabilities > threshold).int() + + predicted_labels = [] + for pred in predictions: + label_indices = torch.nonzero(pred).flatten().tolist() + predicted_labels.append([labels[i] for i in label_indices]) + + return predicted_labels + +def tone_prediction(data): + prediction = predict_labels(data) + + return prediction diff --git a/application/services/style_services.py b/application/services/style_services.py new file mode 100644 index 0000000..458a02f --- /dev/null +++ b/application/services/style_services.py @@ -0,0 +1,16 @@ +from flask import( + request, + jsonify, + Blueprint, + ) +from application.functions.style import style_prediction, clear_data + +style_service = Blueprint("style_service", __name__) + +@style_service.route("/get_style_data", methods=['POST']) +def get_data(): + data = request.get_json() + data_clear = clear_data(data) + predicitons = style_prediction(data_clear) + + return jsonify({"predictions": predicitons}) \ No newline at end of file diff --git a/application/services/tone_services.py b/application/services/tone_services.py new file mode 100644 index 0000000..6dcc3af --- /dev/null +++ b/application/services/tone_services.py @@ -0,0 +1,16 @@ +from flask import( + request, + jsonify, + Blueprint, + ) +from application.functions.tone import tone_prediction, clear_data + +tone_service = Blueprint("tone_service", __name__) + +@tone_service.route("/get_tone_data", methods=['POST']) +def get_data(): + data = request.get_json() + data_clear = clear_data(data['sentences']) + predicitons = tone_prediction(data_clear) + + return jsonify({"predictions": predicitons}) \ No newline at end of file