sentiment analysis

This commit is contained in:
Maciej Ścigacz 2023-05-27 15:10:30 +02:00
parent 7c78f2eb13
commit c7b805bedb
14 changed files with 102377 additions and 0 deletions

View File

@ -1,2 +1,8 @@
# exact_data2023
### Odpalanie:
pip install -r requirements.txt
flask run

11
application/__init__.py Normal file
View File

@ -0,0 +1,11 @@
from flask import Flask
from flask_cors import CORS
def create_app():
application = Flask(__name__)
CORS(application)
from application.services.sentiment_service import sentiment_service
application.register_blueprint(sentiment_service)
return application

View File

@ -0,0 +1,11 @@
from transformers import AutoTokenizer
from transformers import pipeline
model = 'application/models/sentiment_model'
tokenizer = AutoTokenizer.from_pretrained('application/tokenizers/sentiment_tokenizer')
# tokenizer = AutoTokenizer.from_pretrained("sdadas/polish-gpt2-small")
def sentiment_prediction(data):
pipe = pipeline('text-classification', model=model, tokenizer = tokenizer)
result = pipe(data)
return result

View File

@ -0,0 +1,45 @@
{
"_name_or_path": "sdadas/polish-gpt2-small",
"activation_function": "gelu_fast",
"architectures": [
"GPT2ForSequenceClassification"
],
"attn_pdrop": 0.1,
"bos_token_id": 0,
"embd_pdrop": 0.1,
"eos_token_id": 2,
"id2label": {
"0": "LABEL_0",
"1": "LABEL_1",
"2": "LABEL_2"
},
"initializer_range": 0.02,
"label2id": {
"LABEL_0": 0,
"LABEL_1": 1,
"LABEL_2": 2
},
"layer_norm_epsilon": 1e-05,
"model_type": "gpt2",
"n_embd": 768,
"n_head": 12,
"n_inner": 3072,
"n_layer": 12,
"n_positions": 2048,
"pad_token_id": 51200,
"problem_type": "single_label_classification",
"reorder_and_upcast_attn": false,
"resid_pdrop": 0.1,
"scale_attn_by_inverse_layer_idx": false,
"scale_attn_weights": true,
"summary_activation": null,
"summary_first_dropout": 0.1,
"summary_proj_to_labels": true,
"summary_type": "cls_index",
"summary_use_proj": true,
"tokenizer_class": "GPT2TokenizerFast",
"torch_dtype": "float32",
"transformers_version": "4.29.2",
"use_cache": true,
"vocab_size": 51201
}

Binary file not shown.

Binary file not shown.

View File

@ -0,0 +1,14 @@
from flask import(
request,
jsonify,
Blueprint,
)
from application.functions.sentiment import sentiment_prediction
sentiment_service = Blueprint("sentiment_service", __name__)
@sentiment_service.route("/get_sentiment_data", methods=['GET'])
def get_data():
data = request.get_json()
result = sentiment_prediction(data['sentences'])
return result

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,11 @@
{
"add_bos_token": false,
"add_prefix_space": false,
"bos_token": "<s>",
"eos_token": "</s>",
"errors": "replace",
"name_or_path": "original/polish-gpt2-small",
"special_tokens_map_file": "original/polish-gpt2-small/special_tokens_map.json",
"tokenizer_class": "GPT2Tokenizer",
"unk_token": "<unk>"
}

Binary file not shown.

Binary file not shown.

49
requirements.txt Normal file
View File

@ -0,0 +1,49 @@
aiohttp==3.8.4
aiosignal==1.3.1
async-timeout==4.0.2
attrs==23.1.0
blinker==1.6.2
certifi==2023.5.7
charset-normalizer==3.1.0
click==8.1.3
colorama==0.4.6
datasets==2.12.0
dill==0.3.6
filelock==3.12.0
Flask==2.3.2
Flask-Cors==3.0.10
frozenlist==1.3.3
fsspec==2023.5.0
huggingface-hub==0.14.1
idna==3.4
importlib-metadata==6.6.0
itsdangerous==2.1.2
Jinja2==3.1.2
MarkupSafe==2.1.2
mpmath==1.3.0
multidict==6.0.4
multiprocess==0.70.14
networkx==3.1
numpy==1.24.3
packaging==23.1
pandas==2.0.1
pyarrow==12.0.0
python-dateutil==2.8.2
pytz==2023.3
PyYAML==6.0
regex==2023.5.5
requests==2.31.0
responses==0.18.0
six==1.16.0
sympy==1.12
tokenizers==0.13.3
torch==2.0.1
tqdm==4.65.0
transformers==4.29.2
typing-extensions==4.6.2
tzdata==2023.3
urllib3==2.0.2
Werkzeug==2.3.4
xxhash==3.2.0
yarl==1.9.2
zipp==3.15.0

9
run.py Normal file
View File

@ -0,0 +1,9 @@
from application import create_app
app = create_app()
ctx = app.app_context()
ctx.push()
ctx.pop()
if __name__ == "__main__":
app.run()

2
wsgi.py Normal file
View File

@ -0,0 +1,2 @@
from application import create_app
app = create_app()