systemy_dialogowe/eval.ipynb at 5cde582e5eeffa45f622ca7245e8cf7ac8103ff4

Maciej Tyczynski d9c9c06603 created model, processed user data

2023-04-20 16:28:18 +02:00

9.2 KiB

Raw Blame History

import pickle
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import load_model
import tensorflow_addons as tfa
import numpy as np

loaded_model = tf.keras.models.load_model('model')

Model config BertConfig {
  "_name_or_path": "dkleczek/bert-base-polish-uncased-v1",
  "architectures": [
    "BertForMaskedLM",
    "BertForPreTraining"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3",
    "4": "LABEL_4",
    "5": "LABEL_5",
    "6": "LABEL_6",
    "7": "LABEL_7",
    "8": "LABEL_8",
    "9": "LABEL_9",
    "10": "LABEL_10",
    "11": "LABEL_11",
    "12": "LABEL_12",
    "13": "LABEL_13",
    "14": "LABEL_14",
    "15": "LABEL_15",
    "16": "LABEL_16"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_10": 10,
    "LABEL_11": 11,
    "LABEL_12": 12,
    "LABEL_13": 13,
    "LABEL_14": 14,
    "LABEL_15": 15,
    "LABEL_16": 16,
    "LABEL_2": 2,
    "LABEL_3": 3,
    "LABEL_4": 4,
    "LABEL_5": 5,
    "LABEL_6": 6,
    "LABEL_7": 7,
    "LABEL_8": 8,
    "LABEL_9": 9
  },
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "output_past": true,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.28.1",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 60000
}

acts=pd.read_csv('user_acts_one_hot.csv', index_col="Unnamed: 0")

acts=acts.drop(["Agent"],axis=1)
acts=acts.drop(["Act"],axis=1)

from transformers import *
tokenizer = BertTokenizer.from_pretrained("dkleczek/bert-base-polish-uncased-v1")

loading file vocab.txt from cache at C:\Users\macty/.cache\huggingface\hub\models--dkleczek--bert-base-polish-uncased-v1\snapshots\62be9821055981deafb23f217b68cc41f38cdb76\vocab.txt
loading file added_tokens.json from cache at None
loading file special_tokens_map.json from cache at C:\Users\macty/.cache\huggingface\hub\models--dkleczek--bert-base-polish-uncased-v1\snapshots\62be9821055981deafb23f217b68cc41f38cdb76\special_tokens_map.json
loading file tokenizer_config.json from cache at C:\Users\macty/.cache\huggingface\hub\models--dkleczek--bert-base-polish-uncased-v1\snapshots\62be9821055981deafb23f217b68cc41f38cdb76\tokenizer_config.json
loading configuration file config.json from cache at C:\Users\macty/.cache\huggingface\hub\models--dkleczek--bert-base-polish-uncased-v1\snapshots\62be9821055981deafb23f217b68cc41f38cdb76\config.json
Model config BertConfig {
  "_name_or_path": "dkleczek/bert-base-polish-uncased-v1",
  "architectures": [
    "BertForMaskedLM",
    "BertForPreTraining"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "output_past": true,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.28.1",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 60000
}

input_data = acts["text"].tolist()
encoded_input = tokenizer.batch_encode_plus(input_data, padding=True, truncation=True, return_tensors='tf')
dataset = tf.data.Dataset.from_tensor_slices({
    'input_ids': encoded_input['input_ids'],
    'attention_mask': encoded_input['attention_mask'],
    'token_type_ids': encoded_input['token_type_ids']
}).batch(2)

# make predictions
predictions = loaded_model.predict(dataset)

WARNING:tensorflow:6 out of the last 8 calls to <function Model.make_predict_function.<locals>.predict_function at 0x00000247C45EE2A0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings could be due to (1) creating @tf.function repeatedly in a loop, (2) passing tensors with different shapes, (3) passing Python objects instead of tensors. For (1), please define your @tf.function outside of the loop. For (2), @tf.function has reduce_retracing=True option that can avoid unnecessary retracing. For (3), please refer to https://www.tensorflow.org/guide/function#controlling_retracing and https://www.tensorflow.org/api_docs/python/tf/function for  more details.
80/80 [==============================] - 14s 160ms/step
{'logits': array([[0.0429822 , 0.07436842, 0.06289113, ..., 0.07107946, 0.22445329,
        0.17556868],
       [0.05423082, 0.04940203, 0.08606787, ..., 0.06320965, 0.09646532,
        0.85783374],
       [0.02925512, 0.04107895, 0.04539371, ..., 0.04229825, 0.891557  ,
        0.05482448],
       ...,
       [0.07066443, 0.06370321, 0.08790383, ..., 0.08178279, 0.10815965,
        0.16227055],
       [0.04984152, 0.03513726, 0.06702502, ..., 0.04850706, 0.08503693,
        0.10317416],
       [0.1308529 , 0.0802078 , 0.8544387 , ..., 0.08336826, 0.08602922,
        0.08140229]], dtype=float32)}

for prediction in predictions:
    predicted_classes = (predictions[prediction]> 0.5).astype("int32")

predicted_classes

array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 1],
       [0, 0, 0, ..., 0, 1, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 1, ..., 0, 0, 0]])

classes = ["ack","affirm","bye","hello","help","negate","null","repeat","reqalts","reqmore","restart","silence","thankyou","confirm","deny","inform","request"]

9.2 KiB Raw Blame History

9.2 KiB

Raw Blame History