errors model

This commit is contained in:
szymonj98 2023-05-28 14:50:23 +02:00
parent 2f2de698b6
commit 13c43d27d2
10 changed files with 152369 additions and 1 deletions

View File

@ -6,6 +6,8 @@ def create_app():
CORS(application) CORS(application)
from application.services.sentiment_service import sentiment_service from application.services.sentiment_service import sentiment_service
from application.services.errors_service import errors_service
application.register_blueprint(sentiment_service) application.register_blueprint(sentiment_service)
application.register_blueprint(errors_service)
return application return application

View File

@ -0,0 +1,11 @@
from transformers import AutoTokenizer
from transformers import pipeline
model = 'application/models/errors_model'
tokenizer = AutoTokenizer.from_pretrained('application/tokenizers/errors_tokenizer')
def errors_correction(data):
pipe = pipeline("text2text-generation",model=model,tokenizer=tokenizer,max_length=2000)
result = pipe(data)
return result

View File

@ -0,0 +1,77 @@
{
"_name_or_path": "sdadas/polish-bart-base",
"activation_dropout": 0.1,
"activation_function": "gelu",
"add_bias_logits": false,
"add_final_layer_norm": false,
"architectures": [
"BartForConditionalGeneration"
],
"attention_dropout": 0.1,
"bos_token_id": 0,
"classif_dropout": 0.1,
"classifier_dropout": 0.0,
"d_model": 768,
"decoder_attention_heads": 12,
"decoder_ffn_dim": 3072,
"decoder_layerdrop": 0.0,
"decoder_layers": 6,
"decoder_start_token_id": 2,
"do_blenderbot_90_layernorm": false,
"dropout": 0.1,
"early_stopping": true,
"encoder_attention_heads": 12,
"encoder_ffn_dim": 3072,
"encoder_layerdrop": 0.0,
"encoder_layers": 6,
"eos_token_id": 2,
"extra_pos_embeddings": 2,
"force_bos_token_to_be_generated": false,
"forced_eos_token_id": 2,
"id2label": {
"0": "LABEL_0",
"1": "LABEL_1",
"2": "LABEL_2"
},
"init_std": 0.02,
"is_encoder_decoder": true,
"label2id": {
"LABEL_0": 0,
"LABEL_1": 1,
"LABEL_2": 2
},
"max_position_embeddings": 1024,
"model_type": "bart",
"no_repeat_ngram_size": 3,
"normalize_before": false,
"normalize_embedding": true,
"num_beams": 4,
"num_hidden_layers": 6,
"pad_token_id": 1,
"scale_embedding": false,
"static_position_embeddings": false,
"task_specific_params": {
"summarization": {
"length_penalty": 1.0,
"max_length": 128,
"min_length": 12,
"num_beams": 4
},
"summarization_cnn": {
"length_penalty": 2.0,
"max_length": 142,
"min_length": 56,
"num_beams": 4
},
"summarization_xsum": {
"length_penalty": 1.0,
"max_length": 62,
"min_length": 11,
"num_beams": 6
}
},
"torch_dtype": "float32",
"transformers_version": "4.29.2",
"use_cache": true,
"vocab_size": 50002
}

View File

@ -0,0 +1,12 @@
{
"_from_model_config": true,
"bos_token_id": 0,
"decoder_start_token_id": 2,
"early_stopping": true,
"eos_token_id": 2,
"forced_eos_token_id": 2,
"no_repeat_ngram_size": 3,
"num_beams": 4,
"pad_token_id": 1,
"transformers_version": "4.29.2"
}

Binary file not shown.

Binary file not shown.

View File

@ -0,0 +1,15 @@
from flask import(
request,
jsonify,
Blueprint,
)
from application.functions.errors import errors_correction
errors_service = Blueprint("errors_service", __name__)
@errors_service.route("/get_errors", methods=['PUT'])
def get_data():
data = request.get_json()
predicitons = errors_correction(data['sentence']) #korekcja
return jsonify({"predictions": predicitons})

View File

@ -7,7 +7,7 @@ from application.functions.sentiment import sentiment_prediction, count_predicti
sentiment_service = Blueprint("sentiment_service", __name__) sentiment_service = Blueprint("sentiment_service", __name__)
@sentiment_service.route("/get_sentiment_data", methods=['GET']) @sentiment_service.route("/get_sentiment_data", methods=['PUT'])
def get_data(): def get_data():
data = request.get_json() data = request.get_json()
predicitons = sentiment_prediction(data['sentences']) #predykcje predicitons = sentiment_prediction(data['sentences']) #predykcje

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1 @@
{"unk_token": "<unk>", "bos_token": "<s>", "eos_token": "</s>", "add_prefix_space": false, "errors": "replace", "sep_token": "</s>", "cls_token": "<s>", "pad_token": "<pad>", "mask_token": "<mask>", "trim_offsets": true, "special_tokens_map_file": null, "name_or_path": "original/polish-bart-base", "tokenizer_class": "BartTokenizer"}