systemy_dialogowe/eval.ipynb

12 KiB

import pickle
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import load_model
import tensorflow_addons as tfa
import numpy as np
c:\Users\macty\AppData\Local\Programs\Python\Python311\Lib\site-packages\tensorflow_addons\utils\tfa_eol_msg.py:23: UserWarning: 

TensorFlow Addons (TFA) has ended development and introduction of new features.
TFA has entered a minimal maintenance and release mode until a planned end of life in May 2024.
Please modify downstream libraries to take dependencies from other repositories in our TensorFlow community (e.g. Keras, Keras-CV, and Keras-NLP). 

For more information see: https://github.com/tensorflow/addons/issues/2807 

  warnings.warn(
loaded_model = tf.keras.models.load_model('model')
loaded_model.summary()
Model: "tf_bert_for_sequence_classification"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
=================================================================
 bert (Custom>TFBertMainLaye  multiple                 132121344 
 r)                                                              
                                                                 
 dropout_37 (Dropout)        multiple                  0         
                                                                 
 classifier (Dense)          multiple                  13073     
                                                                 
=================================================================
Total params: 132,134,417
Trainable params: 132,134,417
Non-trainable params: 0
_________________________________________________________________
acts=pd.read_csv('user_acts_one_hot.csv', index_col="Unnamed: 0")
acts=acts.drop(["Agent"],axis=1)
acts=acts.drop(["Act"],axis=1)
from transformers import *
tokenizer = BertTokenizer.from_pretrained("dkleczek/bert-base-polish-uncased-v1")
c:\Users\macty\AppData\Local\Programs\Python\Python311\Lib\site-packages\tqdm\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html
  from .autonotebook import tqdm as notebook_tqdm
c:\Users\macty\AppData\Local\Programs\Python\Python311\Lib\site-packages\transformers\generation_utils.py:24: FutureWarning: Importing `GenerationMixin` from `src/transformers/generation_utils.py` is deprecated and will be removed in Transformers v5. Import as `from transformers import GenerationMixin` instead.
  warnings.warn(
c:\Users\macty\AppData\Local\Programs\Python\Python311\Lib\site-packages\transformers\generation_tf_utils.py:24: FutureWarning: Importing `TFGenerationMixin` from `src/transformers/generation_tf_utils.py` is deprecated and will be removed in Transformers v5. Import as `from transformers import TFGenerationMixin` instead.
  warnings.warn(
loading file vocab.txt from cache at C:\Users\macty/.cache\huggingface\hub\models--dkleczek--bert-base-polish-uncased-v1\snapshots\62be9821055981deafb23f217b68cc41f38cdb76\vocab.txt
loading file added_tokens.json from cache at None
loading file special_tokens_map.json from cache at C:\Users\macty/.cache\huggingface\hub\models--dkleczek--bert-base-polish-uncased-v1\snapshots\62be9821055981deafb23f217b68cc41f38cdb76\special_tokens_map.json
loading file tokenizer_config.json from cache at C:\Users\macty/.cache\huggingface\hub\models--dkleczek--bert-base-polish-uncased-v1\snapshots\62be9821055981deafb23f217b68cc41f38cdb76\tokenizer_config.json
loading configuration file config.json from cache at C:\Users\macty/.cache\huggingface\hub\models--dkleczek--bert-base-polish-uncased-v1\snapshots\62be9821055981deafb23f217b68cc41f38cdb76\config.json
Model config BertConfig {
  "_name_or_path": "dkleczek/bert-base-polish-uncased-v1",
  "architectures": [
    "BertForMaskedLM",
    "BertForPreTraining"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "output_past": true,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.28.1",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 60000
}

input_data = acts["text"].tolist()
encoded_input = tokenizer.batch_encode_plus(input_data, padding=True, truncation=True, return_tensors='tf')
dataset = tf.data.Dataset.from_tensor_slices({
    'input_ids': encoded_input['input_ids'],
    'attention_mask': encoded_input['attention_mask'],
    'token_type_ids': encoded_input['token_type_ids']
}).batch(2)

# make predictions
predictions = loaded_model.predict(dataset)
80/80 [==============================] - 14s 170ms/step
def predict(text):
    input = [ text ]
    encoded_input = tokenizer.batch_encode_plus(input, padding=True, truncation=True, return_tensors='tf')
    dataset = tf.data.Dataset.from_tensor_slices({
    'input_ids': encoded_input['input_ids'],
    'attention_mask': encoded_input['attention_mask'],
    'token_type_ids': encoded_input['token_type_ids']
    }).batch(2)
    predictions = loaded_model.predict(dataset)
    return predictions
     
for prediction in predictions:
    predicted_classes = (predictions[prediction]> 0.5).astype("int32")
predicted_classes
array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 1],
       [0, 0, 0, ..., 0, 1, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 1, ..., 0, 0, 0]])
classes = ["ack","affirm","bye","hello","help","negate","null","repeat","reqalts","reqmore","restart","silence","thankyou","confirm","deny","inform","request"]
true_acts = acts.drop(acts.columns[0],axis=1)
true= true_acts.to_numpy()
results = abs(predicted_classes-true)
23
all=results.size
not_predicted = results.sum()
accuracy = (all-not_predicted)/all
from sklearn.metrics import f1_score
micro_f1 = f1_score(true, predicted_classes, average='micro')
macro_f1 = f1_score(true, predicted_classes, average='macro')
c:\Users\macty\AppData\Local\Programs\Python\Python311\Lib\site-packages\sklearn\metrics\_classification.py:1609: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 in labels with no true nor predicted samples. Use `zero_division` parameter to control this behavior.
  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))
print(micro_f1)
0.9362880886426593