{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "c:\\Users\\macty\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\tensorflow_addons\\utils\\tfa_eol_msg.py:23: UserWarning: \n", "\n", "TensorFlow Addons (TFA) has ended development and introduction of new features.\n", "TFA has entered a minimal maintenance and release mode until a planned end of life in May 2024.\n", "Please modify downstream libraries to take dependencies from other repositories in our TensorFlow community (e.g. Keras, Keras-CV, and Keras-NLP). \n", "\n", "For more information see: https://github.com/tensorflow/addons/issues/2807 \n", "\n", " warnings.warn(\n" ] } ], "source": [ "import pickle\n", "import pandas as pd\n", "import tensorflow as tf\n", "from tensorflow.keras.models import load_model\n", "import tensorflow_addons as tfa\n", "import numpy as np" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "loaded_model = tf.keras.models.load_model('model')" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Model: \"tf_bert_for_sequence_classification\"\n", "_________________________________________________________________\n", " Layer (type) Output Shape Param # \n", "=================================================================\n", " bert (Custom>TFBertMainLaye multiple 132121344 \n", " r) \n", " \n", " dropout_37 (Dropout) multiple 0 \n", " \n", " classifier (Dense) multiple 13073 \n", " \n", "=================================================================\n", "Total params: 132,134,417\n", "Trainable params: 132,134,417\n", "Non-trainable params: 0\n", "_________________________________________________________________\n" ] } ], "source": [ "loaded_model.summary()" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "acts=pd.read_csv('user_acts_one_hot.csv', index_col=\"Unnamed: 0\")" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "acts=acts.drop([\"Agent\"],axis=1)\n", "acts=acts.drop([\"Act\"],axis=1)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "c:\\Users\\macty\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\tqdm\\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", " from .autonotebook import tqdm as notebook_tqdm\n", "c:\\Users\\macty\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\transformers\\generation_utils.py:24: FutureWarning: Importing `GenerationMixin` from `src/transformers/generation_utils.py` is deprecated and will be removed in Transformers v5. Import as `from transformers import GenerationMixin` instead.\n", " warnings.warn(\n", "c:\\Users\\macty\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\transformers\\generation_tf_utils.py:24: FutureWarning: Importing `TFGenerationMixin` from `src/transformers/generation_tf_utils.py` is deprecated and will be removed in Transformers v5. Import as `from transformers import TFGenerationMixin` instead.\n", " warnings.warn(\n", "loading file vocab.txt from cache at C:\\Users\\macty/.cache\\huggingface\\hub\\models--dkleczek--bert-base-polish-uncased-v1\\snapshots\\62be9821055981deafb23f217b68cc41f38cdb76\\vocab.txt\n", "loading file added_tokens.json from cache at None\n", "loading file special_tokens_map.json from cache at C:\\Users\\macty/.cache\\huggingface\\hub\\models--dkleczek--bert-base-polish-uncased-v1\\snapshots\\62be9821055981deafb23f217b68cc41f38cdb76\\special_tokens_map.json\n", "loading file tokenizer_config.json from cache at C:\\Users\\macty/.cache\\huggingface\\hub\\models--dkleczek--bert-base-polish-uncased-v1\\snapshots\\62be9821055981deafb23f217b68cc41f38cdb76\\tokenizer_config.json\n", "loading configuration file config.json from cache at C:\\Users\\macty/.cache\\huggingface\\hub\\models--dkleczek--bert-base-polish-uncased-v1\\snapshots\\62be9821055981deafb23f217b68cc41f38cdb76\\config.json\n", "Model config BertConfig {\n", " \"_name_or_path\": \"dkleczek/bert-base-polish-uncased-v1\",\n", " \"architectures\": [\n", " \"BertForMaskedLM\",\n", " \"BertForPreTraining\"\n", " ],\n", " \"attention_probs_dropout_prob\": 0.1,\n", " \"classifier_dropout\": null,\n", " \"hidden_act\": \"gelu\",\n", " \"hidden_dropout_prob\": 0.1,\n", " \"hidden_size\": 768,\n", " \"initializer_range\": 0.02,\n", " \"intermediate_size\": 3072,\n", " \"layer_norm_eps\": 1e-12,\n", " \"max_position_embeddings\": 512,\n", " \"model_type\": \"bert\",\n", " \"num_attention_heads\": 12,\n", " \"num_hidden_layers\": 12,\n", " \"output_past\": true,\n", " \"pad_token_id\": 0,\n", " \"position_embedding_type\": \"absolute\",\n", " \"transformers_version\": \"4.28.1\",\n", " \"type_vocab_size\": 2,\n", " \"use_cache\": true,\n", " \"vocab_size\": 60000\n", "}\n", "\n" ] } ], "source": [ "from transformers import *\n", "tokenizer = BertTokenizer.from_pretrained(\"dkleczek/bert-base-polish-uncased-v1\")\n" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "80/80 [==============================] - 14s 170ms/step\n" ] } ], "source": [ "input_data = acts[\"text\"].tolist()\n", "encoded_input = tokenizer.batch_encode_plus(input_data, padding=True, truncation=True, return_tensors='tf')\n", "dataset = tf.data.Dataset.from_tensor_slices({\n", " 'input_ids': encoded_input['input_ids'],\n", " 'attention_mask': encoded_input['attention_mask'],\n", " 'token_type_ids': encoded_input['token_type_ids']\n", "}).batch(2)\n", "\n", "# make predictions\n", "predictions = loaded_model.predict(dataset)\n" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "def predict(text):\n", " input = [ text ]\n", " encoded_input = tokenizer.batch_encode_plus(input, padding=True, truncation=True, return_tensors='tf')\n", " dataset = tf.data.Dataset.from_tensor_slices({\n", " 'input_ids': encoded_input['input_ids'],\n", " 'attention_mask': encoded_input['attention_mask'],\n", " 'token_type_ids': encoded_input['token_type_ids']\n", " }).batch(2)\n", " predictions = loaded_model.predict(dataset)\n", " return predictions\n", " \n" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "for prediction in predictions:\n", " predicted_classes = (predictions[prediction]> 0.5).astype(\"int32\")\n" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[0, 0, 0, ..., 0, 0, 0],\n", " [0, 0, 0, ..., 0, 0, 1],\n", " [0, 0, 0, ..., 0, 1, 0],\n", " ...,\n", " [0, 0, 0, ..., 0, 0, 0],\n", " [0, 0, 0, ..., 0, 0, 0],\n", " [0, 0, 1, ..., 0, 0, 0]])" ] }, "execution_count": 35, "metadata": {}, "output_type": "execute_result" } ], "source": [ "predicted_classes" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "classes = [\"ack\",\"affirm\",\"bye\",\"hello\",\"help\",\"negate\",\"null\",\"repeat\",\"reqalts\",\"reqmore\",\"restart\",\"silence\",\"thankyou\",\"confirm\",\"deny\",\"inform\",\"request\"]" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [], "source": [ "true_acts = acts.drop(acts.columns[0],axis=1)" ] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [], "source": [ "true= true_acts.to_numpy()" ] }, { "cell_type": "code", "execution_count": 41, "metadata": {}, "outputs": [], "source": [ "results = abs(predicted_classes-true)" ] }, { "cell_type": "code", "execution_count": 46, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "23" ] }, "execution_count": 46, "metadata": {}, "output_type": "execute_result" } ], "source": [] }, { "cell_type": "code", "execution_count": 47, "metadata": {}, "outputs": [], "source": [ "all=results.size" ] }, { "cell_type": "code", "execution_count": 49, "metadata": {}, "outputs": [], "source": [ "not_predicted = results.sum()" ] }, { "cell_type": "code", "execution_count": 50, "metadata": {}, "outputs": [], "source": [ "accuracy = (all-not_predicted)/all" ] }, { "cell_type": "code", "execution_count": 58, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "c:\\Users\\macty\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\sklearn\\metrics\\_classification.py:1609: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 in labels with no true nor predicted samples. Use `zero_division` parameter to control this behavior.\n", " _warn_prf(average, \"true nor predicted\", \"F-score is\", len(true_sum))\n" ] } ], "source": [ "from sklearn.metrics import f1_score\n", "micro_f1 = f1_score(true, predicted_classes, average='micro')\n", "macro_f1 = f1_score(true, predicted_classes, average='macro')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 60, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0.9362880886426593\n" ] } ], "source": [ "print(micro_f1)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.2" }, "orig_nbformat": 4 }, "nbformat": 4, "nbformat_minor": 2 }