From 66a0b1c6bc479fa1b55acc225dfc4dd343ce76af Mon Sep 17 00:00:00 2001 From: szymonj98 Date: Wed, 31 May 2023 16:50:59 +0200 Subject: [PATCH] huggingface error model --- application/functions/errors.py | 7 ++++--- application/functions/sentiment.py | 4 ++-- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/application/functions/errors.py b/application/functions/errors.py index 0c0fcb6..77f5ace 100644 --- a/application/functions/errors.py +++ b/application/functions/errors.py @@ -1,11 +1,12 @@ from transformers import AutoTokenizer from transformers import pipeline +#from transformers import BartForConditionalGeneration -# model = 'application/models/errors_model' -# tokenizer = AutoTokenizer.from_pretrained('application/tokenizers/errors_tokenizer') +tokenizer = AutoTokenizer.from_pretrained("szymonj/polish-simple-error-correction") +#model = BartForConditionalGeneration.from_pretrained("szymonj/polish-simple-error-correction") +pipe = pipeline("text2text-generation",model="szymonj/polish-simple-error-correction",tokenizer=tokenizer,max_length=2000) def errors_correction(data): - pipe = pipeline("text2text-generation",model=model,tokenizer=tokenizer,max_length=2000) result = pipe(data) return result diff --git a/application/functions/sentiment.py b/application/functions/sentiment.py index d54b0dd..dc1f42b 100644 --- a/application/functions/sentiment.py +++ b/application/functions/sentiment.py @@ -6,11 +6,11 @@ from facebook_scraper import get_posts # model = 'application/models/sentiment_model' # tokenizer = AutoTokenizer.from_pretrained('application/tokenizers/sentiment_tokenizer') -model = GPT2ForSequenceClassification.from_pretrained("Scigi/sentiment-analysis-model", num_labels=3) +#model = GPT2ForSequenceClassification.from_pretrained("Scigi/sentiment-analysis-model", num_labels=3) tokenizer = AutoTokenizer.from_pretrained("Scigi/sentiment-analysis-model") +pipe = pipeline('text-classification', model="Scigi/sentiment-analysis-model", tokenizer = tokenizer) def sentiment_prediction(data): - pipe = pipeline('text-classification', model=model, tokenizer = tokenizer) result = pipe(data) return result