Merge remote-tracking branch 'origin/master'
This commit is contained in:
commit
0c1a829435
160
SDMockup.ipynb
160
SDMockup.ipynb
@ -1,5 +1,89 @@
|
|||||||
{
|
{
|
||||||
"cells": [
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 1,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stderr",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"c:\\Users\\macty\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\tensorflow_addons\\utils\\tfa_eol_msg.py:23: UserWarning: \n",
|
||||||
|
"\n",
|
||||||
|
"TensorFlow Addons (TFA) has ended development and introduction of new features.\n",
|
||||||
|
"TFA has entered a minimal maintenance and release mode until a planned end of life in May 2024.\n",
|
||||||
|
"Please modify downstream libraries to take dependencies from other repositories in our TensorFlow community (e.g. Keras, Keras-CV, and Keras-NLP). \n",
|
||||||
|
"\n",
|
||||||
|
"For more information see: https://github.com/tensorflow/addons/issues/2807 \n",
|
||||||
|
"\n",
|
||||||
|
" warnings.warn(\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"import tensorflow as tf\n",
|
||||||
|
"from tensorflow.keras.models import load_model\n",
|
||||||
|
"import tensorflow_addons as tfa\n",
|
||||||
|
"import numpy as np"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 2,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stderr",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"c:\\Users\\macty\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\tqdm\\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
|
||||||
|
" from .autonotebook import tqdm as notebook_tqdm\n",
|
||||||
|
"c:\\Users\\macty\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\transformers\\generation_utils.py:24: FutureWarning: Importing `GenerationMixin` from `src/transformers/generation_utils.py` is deprecated and will be removed in Transformers v5. Import as `from transformers import GenerationMixin` instead.\n",
|
||||||
|
" warnings.warn(\n",
|
||||||
|
"c:\\Users\\macty\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\transformers\\generation_tf_utils.py:24: FutureWarning: Importing `TFGenerationMixin` from `src/transformers/generation_tf_utils.py` is deprecated and will be removed in Transformers v5. Import as `from transformers import TFGenerationMixin` instead.\n",
|
||||||
|
" warnings.warn(\n",
|
||||||
|
"loading file vocab.txt from cache at C:\\Users\\macty/.cache\\huggingface\\hub\\models--dkleczek--bert-base-polish-uncased-v1\\snapshots\\62be9821055981deafb23f217b68cc41f38cdb76\\vocab.txt\n",
|
||||||
|
"loading file added_tokens.json from cache at None\n",
|
||||||
|
"loading file special_tokens_map.json from cache at C:\\Users\\macty/.cache\\huggingface\\hub\\models--dkleczek--bert-base-polish-uncased-v1\\snapshots\\62be9821055981deafb23f217b68cc41f38cdb76\\special_tokens_map.json\n",
|
||||||
|
"loading file tokenizer_config.json from cache at C:\\Users\\macty/.cache\\huggingface\\hub\\models--dkleczek--bert-base-polish-uncased-v1\\snapshots\\62be9821055981deafb23f217b68cc41f38cdb76\\tokenizer_config.json\n",
|
||||||
|
"loading configuration file config.json from cache at C:\\Users\\macty/.cache\\huggingface\\hub\\models--dkleczek--bert-base-polish-uncased-v1\\snapshots\\62be9821055981deafb23f217b68cc41f38cdb76\\config.json\n",
|
||||||
|
"Model config BertConfig {\n",
|
||||||
|
" \"_name_or_path\": \"dkleczek/bert-base-polish-uncased-v1\",\n",
|
||||||
|
" \"architectures\": [\n",
|
||||||
|
" \"BertForMaskedLM\",\n",
|
||||||
|
" \"BertForPreTraining\"\n",
|
||||||
|
" ],\n",
|
||||||
|
" \"attention_probs_dropout_prob\": 0.1,\n",
|
||||||
|
" \"classifier_dropout\": null,\n",
|
||||||
|
" \"hidden_act\": \"gelu\",\n",
|
||||||
|
" \"hidden_dropout_prob\": 0.1,\n",
|
||||||
|
" \"hidden_size\": 768,\n",
|
||||||
|
" \"initializer_range\": 0.02,\n",
|
||||||
|
" \"intermediate_size\": 3072,\n",
|
||||||
|
" \"layer_norm_eps\": 1e-12,\n",
|
||||||
|
" \"max_position_embeddings\": 512,\n",
|
||||||
|
" \"model_type\": \"bert\",\n",
|
||||||
|
" \"num_attention_heads\": 12,\n",
|
||||||
|
" \"num_hidden_layers\": 12,\n",
|
||||||
|
" \"output_past\": true,\n",
|
||||||
|
" \"pad_token_id\": 0,\n",
|
||||||
|
" \"position_embedding_type\": \"absolute\",\n",
|
||||||
|
" \"transformers_version\": \"4.28.1\",\n",
|
||||||
|
" \"type_vocab_size\": 2,\n",
|
||||||
|
" \"use_cache\": true,\n",
|
||||||
|
" \"vocab_size\": 60000\n",
|
||||||
|
"}\n",
|
||||||
|
"\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"loaded_model = tf.keras.models.load_model('model')\n",
|
||||||
|
"from transformers import *\n",
|
||||||
|
"tokenizer = BertTokenizer.from_pretrained(\"dkleczek/bert-base-polish-uncased-v1\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"attachments": {},
|
"attachments": {},
|
||||||
"cell_type": "markdown",
|
"cell_type": "markdown",
|
||||||
@ -29,60 +113,62 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 29,
|
"execution_count": 13,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"class NaturalLanguageUnderstanding:\n",
|
"class NLU:\n",
|
||||||
" acts: dict[list[str], str] = {\n",
|
|
||||||
" ( \"potwierdzam\", \"dobrze\", \"ok\" ): \"ack\",\n",
|
|
||||||
" (\"do widziena\", \"czesc\", \"koniec\", \"do zobaczenia\"): \"bye\",\n",
|
|
||||||
" (\"cześć\", \"dzień dobry\", \"hello\", \"hej\"): \"hello\",\n",
|
|
||||||
" (\"pomóc\", \"pomocy\", \"pomoc\"): \"help\",\n",
|
|
||||||
" (\"zaprzeczam\", \"odrzucam\"): \"negate\",\n",
|
|
||||||
" (\"alternatywny\", \"inne\", \"alternatywa\", \"inna\"): \"requalts\",\n",
|
|
||||||
" (\"szczegółów\", \"informacji\", \"info\", \"informacje\"): \"reqmore\",\n",
|
|
||||||
" (\"restart\"): \"restart\",\n",
|
|
||||||
" (\"dziękuję\", \"dzięki\"): \"thankyou\",\n",
|
|
||||||
" (\"tak\", \"chcę\"): \"confirm\",\n",
|
|
||||||
" (\"nie chce\"): \"deny\",\n",
|
|
||||||
" (\"basen\", \"parking\", \"śniadania\", \"osoby\"): \"inform\",\n",
|
|
||||||
" (\"jaki\",\"?\", \"czy\", \"jak\", \"ile\", \"co\", \"gdzie\"): \"request\"\n",
|
|
||||||
" }\n",
|
|
||||||
" def __init__(self, text: str):\n",
|
" def __init__(self, text: str):\n",
|
||||||
" self.text = text\n",
|
" self.text = text\n",
|
||||||
" self.act = \"\"\n",
|
" self.act = \"\"\n",
|
||||||
" \n",
|
"\n",
|
||||||
" \n",
|
|
||||||
" def get_dialog_act(self): \n",
|
" def get_dialog_act(self): \n",
|
||||||
" for word in self.text.lower().split():\n",
|
" predicted_classes_names=[]\n",
|
||||||
" for key in NaturalLanguageUnderstanding.acts:\n",
|
" input = [self.text]\n",
|
||||||
" if word in key:\n",
|
" encoded_input = tokenizer.batch_encode_plus(input, padding=True, truncation=True, return_tensors='tf')\n",
|
||||||
" self.act = NaturalLanguageUnderstanding.acts[key]\n",
|
" dataset = tf.data.Dataset.from_tensor_slices({\n",
|
||||||
" return\n",
|
" 'input_ids': encoded_input['input_ids'],\n",
|
||||||
" self.act = \"null\"\n",
|
" 'attention_mask': encoded_input['attention_mask'],\n",
|
||||||
" \n",
|
" 'token_type_ids': encoded_input['token_type_ids']\n",
|
||||||
"\n"
|
" }).batch(2)\n",
|
||||||
|
" predictions = loaded_model.predict(dataset)\n",
|
||||||
|
" classes = [\"ack\",\"affirm\",\"bye\",\"hello\",\"help\",\"negate\",\"null\",\"repeat\",\"reqalts\",\"reqmore\",\"restart\",\"silence\",\"thankyou\",\"confirm\",\"deny\",\"inform\",\"request\"]\n",
|
||||||
|
" for prediction in predictions: #trying to get predictions, if none it take maximum\n",
|
||||||
|
" predicted_classes = (predictions[prediction]> 0.5).astype(\"int32\")\n",
|
||||||
|
" if predicted_classes.sum()==0:\n",
|
||||||
|
" predicted_classes=max(predictions[prediction])\n",
|
||||||
|
" predicted_classes_indexes= np.where(predicted_classes==1)[1]\n",
|
||||||
|
" for p_classes in predicted_classes_indexes:\n",
|
||||||
|
" predicted_classes_names.append(classes[p_classes])\n",
|
||||||
|
" self.act=predicted_classes_names\n",
|
||||||
|
" return self.act\n"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 33,
|
"execution_count": 17,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"1/1 [==============================] - 0s 58ms/step\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"data": {
|
"data": {
|
||||||
"text/plain": [
|
"text/plain": [
|
||||||
"'request'"
|
"['request']"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"execution_count": 33,
|
"execution_count": 17,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"output_type": "execute_result"
|
"output_type": "execute_result"
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"nlu = NaturalLanguageUnderstanding(\"Jaki pokój proponujesz w tym hotelu?\")\n",
|
"nlu = NLU(\"Jaki pokój proponujesz w tym hotelu?\")\n",
|
||||||
"nlu.get_dialog_act()\n",
|
"nlu.get_dialog_act()\n",
|
||||||
"nlu.act"
|
"nlu.act"
|
||||||
]
|
]
|
||||||
@ -97,7 +183,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 53,
|
"execution_count": 18,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
@ -111,9 +197,9 @@
|
|||||||
" (\"pokój\", \"pokoje\"): \"room\"\n",
|
" (\"pokój\", \"pokoje\"): \"room\"\n",
|
||||||
" }\n",
|
" }\n",
|
||||||
" \n",
|
" \n",
|
||||||
" def __init__(self, nlu: NaturalLanguageUnderstanding):\n",
|
" def __init__(self, nlu: NLU):\n",
|
||||||
" self.slots = []\n",
|
" self.slots = []\n",
|
||||||
" self.act = nlu.act\n",
|
" self.act = nlu\n",
|
||||||
" self.text = nlu.text\n",
|
" self.text = nlu.text\n",
|
||||||
" \n",
|
" \n",
|
||||||
" def get_dialog_slots(self):\n",
|
" def get_dialog_slots(self):\n",
|
||||||
@ -126,7 +212,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 54,
|
"execution_count": 19,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
@ -135,7 +221,7 @@
|
|||||||
"['room']"
|
"['room']"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"execution_count": 54,
|
"execution_count": 19,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"output_type": "execute_result"
|
"output_type": "execute_result"
|
||||||
}
|
}
|
||||||
@ -303,7 +389,7 @@
|
|||||||
"name": "python",
|
"name": "python",
|
||||||
"nbconvert_exporter": "python",
|
"nbconvert_exporter": "python",
|
||||||
"pygments_lexer": "ipython3",
|
"pygments_lexer": "ipython3",
|
||||||
"version": "3.10.9"
|
"version": "3.11.2"
|
||||||
},
|
},
|
||||||
"orig_nbformat": 4
|
"orig_nbformat": 4
|
||||||
},
|
},
|
||||||
|
301
eval.ipynb
301
eval.ipynb
@ -2,9 +2,25 @@
|
|||||||
"cells": [
|
"cells": [
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 38,
|
"execution_count": 1,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stderr",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"c:\\Users\\macty\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\tensorflow_addons\\utils\\tfa_eol_msg.py:23: UserWarning: \n",
|
||||||
|
"\n",
|
||||||
|
"TensorFlow Addons (TFA) has ended development and introduction of new features.\n",
|
||||||
|
"TFA has entered a minimal maintenance and release mode until a planned end of life in May 2024.\n",
|
||||||
|
"Please modify downstream libraries to take dependencies from other repositories in our TensorFlow community (e.g. Keras, Keras-CV, and Keras-NLP). \n",
|
||||||
|
"\n",
|
||||||
|
"For more information see: https://github.com/tensorflow/addons/issues/2807 \n",
|
||||||
|
"\n",
|
||||||
|
" warnings.warn(\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"import pickle\n",
|
"import pickle\n",
|
||||||
"import pandas as pd\n",
|
"import pandas as pd\n",
|
||||||
@ -16,88 +32,48 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 46,
|
"execution_count": 2,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [],
|
||||||
{
|
|
||||||
"name": "stderr",
|
|
||||||
"output_type": "stream",
|
|
||||||
"text": [
|
|
||||||
"Model config BertConfig {\n",
|
|
||||||
" \"_name_or_path\": \"dkleczek/bert-base-polish-uncased-v1\",\n",
|
|
||||||
" \"architectures\": [\n",
|
|
||||||
" \"BertForMaskedLM\",\n",
|
|
||||||
" \"BertForPreTraining\"\n",
|
|
||||||
" ],\n",
|
|
||||||
" \"attention_probs_dropout_prob\": 0.1,\n",
|
|
||||||
" \"classifier_dropout\": null,\n",
|
|
||||||
" \"hidden_act\": \"gelu\",\n",
|
|
||||||
" \"hidden_dropout_prob\": 0.1,\n",
|
|
||||||
" \"hidden_size\": 768,\n",
|
|
||||||
" \"id2label\": {\n",
|
|
||||||
" \"0\": \"LABEL_0\",\n",
|
|
||||||
" \"1\": \"LABEL_1\",\n",
|
|
||||||
" \"2\": \"LABEL_2\",\n",
|
|
||||||
" \"3\": \"LABEL_3\",\n",
|
|
||||||
" \"4\": \"LABEL_4\",\n",
|
|
||||||
" \"5\": \"LABEL_5\",\n",
|
|
||||||
" \"6\": \"LABEL_6\",\n",
|
|
||||||
" \"7\": \"LABEL_7\",\n",
|
|
||||||
" \"8\": \"LABEL_8\",\n",
|
|
||||||
" \"9\": \"LABEL_9\",\n",
|
|
||||||
" \"10\": \"LABEL_10\",\n",
|
|
||||||
" \"11\": \"LABEL_11\",\n",
|
|
||||||
" \"12\": \"LABEL_12\",\n",
|
|
||||||
" \"13\": \"LABEL_13\",\n",
|
|
||||||
" \"14\": \"LABEL_14\",\n",
|
|
||||||
" \"15\": \"LABEL_15\",\n",
|
|
||||||
" \"16\": \"LABEL_16\"\n",
|
|
||||||
" },\n",
|
|
||||||
" \"initializer_range\": 0.02,\n",
|
|
||||||
" \"intermediate_size\": 3072,\n",
|
|
||||||
" \"label2id\": {\n",
|
|
||||||
" \"LABEL_0\": 0,\n",
|
|
||||||
" \"LABEL_1\": 1,\n",
|
|
||||||
" \"LABEL_10\": 10,\n",
|
|
||||||
" \"LABEL_11\": 11,\n",
|
|
||||||
" \"LABEL_12\": 12,\n",
|
|
||||||
" \"LABEL_13\": 13,\n",
|
|
||||||
" \"LABEL_14\": 14,\n",
|
|
||||||
" \"LABEL_15\": 15,\n",
|
|
||||||
" \"LABEL_16\": 16,\n",
|
|
||||||
" \"LABEL_2\": 2,\n",
|
|
||||||
" \"LABEL_3\": 3,\n",
|
|
||||||
" \"LABEL_4\": 4,\n",
|
|
||||||
" \"LABEL_5\": 5,\n",
|
|
||||||
" \"LABEL_6\": 6,\n",
|
|
||||||
" \"LABEL_7\": 7,\n",
|
|
||||||
" \"LABEL_8\": 8,\n",
|
|
||||||
" \"LABEL_9\": 9\n",
|
|
||||||
" },\n",
|
|
||||||
" \"layer_norm_eps\": 1e-12,\n",
|
|
||||||
" \"max_position_embeddings\": 512,\n",
|
|
||||||
" \"model_type\": \"bert\",\n",
|
|
||||||
" \"num_attention_heads\": 12,\n",
|
|
||||||
" \"num_hidden_layers\": 12,\n",
|
|
||||||
" \"output_past\": true,\n",
|
|
||||||
" \"pad_token_id\": 0,\n",
|
|
||||||
" \"position_embedding_type\": \"absolute\",\n",
|
|
||||||
" \"transformers_version\": \"4.28.1\",\n",
|
|
||||||
" \"type_vocab_size\": 2,\n",
|
|
||||||
" \"use_cache\": true,\n",
|
|
||||||
" \"vocab_size\": 60000\n",
|
|
||||||
"}\n",
|
|
||||||
"\n"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
"source": [
|
||||||
"loaded_model = tf.keras.models.load_model('model')"
|
"loaded_model = tf.keras.models.load_model('model')"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 29,
|
"execution_count": 3,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"Model: \"tf_bert_for_sequence_classification\"\n",
|
||||||
|
"_________________________________________________________________\n",
|
||||||
|
" Layer (type) Output Shape Param # \n",
|
||||||
|
"=================================================================\n",
|
||||||
|
" bert (Custom>TFBertMainLaye multiple 132121344 \n",
|
||||||
|
" r) \n",
|
||||||
|
" \n",
|
||||||
|
" dropout_37 (Dropout) multiple 0 \n",
|
||||||
|
" \n",
|
||||||
|
" classifier (Dense) multiple 13073 \n",
|
||||||
|
" \n",
|
||||||
|
"=================================================================\n",
|
||||||
|
"Total params: 132,134,417\n",
|
||||||
|
"Trainable params: 132,134,417\n",
|
||||||
|
"Non-trainable params: 0\n",
|
||||||
|
"_________________________________________________________________\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"loaded_model.summary()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 4,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
@ -106,7 +82,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 30,
|
"execution_count": 5,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
@ -116,13 +92,19 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 54,
|
"execution_count": 6,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"name": "stderr",
|
"name": "stderr",
|
||||||
"output_type": "stream",
|
"output_type": "stream",
|
||||||
"text": [
|
"text": [
|
||||||
|
"c:\\Users\\macty\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\tqdm\\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
|
||||||
|
" from .autonotebook import tqdm as notebook_tqdm\n",
|
||||||
|
"c:\\Users\\macty\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\transformers\\generation_utils.py:24: FutureWarning: Importing `GenerationMixin` from `src/transformers/generation_utils.py` is deprecated and will be removed in Transformers v5. Import as `from transformers import GenerationMixin` instead.\n",
|
||||||
|
" warnings.warn(\n",
|
||||||
|
"c:\\Users\\macty\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\transformers\\generation_tf_utils.py:24: FutureWarning: Importing `TFGenerationMixin` from `src/transformers/generation_tf_utils.py` is deprecated and will be removed in Transformers v5. Import as `from transformers import TFGenerationMixin` instead.\n",
|
||||||
|
" warnings.warn(\n",
|
||||||
"loading file vocab.txt from cache at C:\\Users\\macty/.cache\\huggingface\\hub\\models--dkleczek--bert-base-polish-uncased-v1\\snapshots\\62be9821055981deafb23f217b68cc41f38cdb76\\vocab.txt\n",
|
"loading file vocab.txt from cache at C:\\Users\\macty/.cache\\huggingface\\hub\\models--dkleczek--bert-base-polish-uncased-v1\\snapshots\\62be9821055981deafb23f217b68cc41f38cdb76\\vocab.txt\n",
|
||||||
"loading file added_tokens.json from cache at None\n",
|
"loading file added_tokens.json from cache at None\n",
|
||||||
"loading file special_tokens_map.json from cache at C:\\Users\\macty/.cache\\huggingface\\hub\\models--dkleczek--bert-base-polish-uncased-v1\\snapshots\\62be9821055981deafb23f217b68cc41f38cdb76\\special_tokens_map.json\n",
|
"loading file special_tokens_map.json from cache at C:\\Users\\macty/.cache\\huggingface\\hub\\models--dkleczek--bert-base-polish-uncased-v1\\snapshots\\62be9821055981deafb23f217b68cc41f38cdb76\\special_tokens_map.json\n",
|
||||||
@ -165,28 +147,14 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 80,
|
"execution_count": 14,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"name": "stdout",
|
"name": "stdout",
|
||||||
"output_type": "stream",
|
"output_type": "stream",
|
||||||
"text": [
|
"text": [
|
||||||
"WARNING:tensorflow:6 out of the last 8 calls to <function Model.make_predict_function.<locals>.predict_function at 0x00000247C45EE2A0> triggered tf.function retracing. Tracing is expensive and the excessive number of tracings could be due to (1) creating @tf.function repeatedly in a loop, (2) passing tensors with different shapes, (3) passing Python objects instead of tensors. For (1), please define your @tf.function outside of the loop. For (2), @tf.function has reduce_retracing=True option that can avoid unnecessary retracing. For (3), please refer to https://www.tensorflow.org/guide/function#controlling_retracing and https://www.tensorflow.org/api_docs/python/tf/function for more details.\n",
|
"80/80 [==============================] - 14s 170ms/step\n"
|
||||||
"80/80 [==============================] - 14s 160ms/step\n",
|
|
||||||
"{'logits': array([[0.0429822 , 0.07436842, 0.06289113, ..., 0.07107946, 0.22445329,\n",
|
|
||||||
" 0.17556868],\n",
|
|
||||||
" [0.05423082, 0.04940203, 0.08606787, ..., 0.06320965, 0.09646532,\n",
|
|
||||||
" 0.85783374],\n",
|
|
||||||
" [0.02925512, 0.04107895, 0.04539371, ..., 0.04229825, 0.891557 ,\n",
|
|
||||||
" 0.05482448],\n",
|
|
||||||
" ...,\n",
|
|
||||||
" [0.07066443, 0.06370321, 0.08790383, ..., 0.08178279, 0.10815965,\n",
|
|
||||||
" 0.16227055],\n",
|
|
||||||
" [0.04984152, 0.03513726, 0.06702502, ..., 0.04850706, 0.08503693,\n",
|
|
||||||
" 0.10317416],\n",
|
|
||||||
" [0.1308529 , 0.0802078 , 0.8544387 , ..., 0.08336826, 0.08602922,\n",
|
|
||||||
" 0.08140229]], dtype=float32)}\n"
|
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
@ -205,7 +173,26 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 96,
|
"execution_count": 10,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"def predict(text):\n",
|
||||||
|
" input = [ text ]\n",
|
||||||
|
" encoded_input = tokenizer.batch_encode_plus(input, padding=True, truncation=True, return_tensors='tf')\n",
|
||||||
|
" dataset = tf.data.Dataset.from_tensor_slices({\n",
|
||||||
|
" 'input_ids': encoded_input['input_ids'],\n",
|
||||||
|
" 'attention_mask': encoded_input['attention_mask'],\n",
|
||||||
|
" 'token_type_ids': encoded_input['token_type_ids']\n",
|
||||||
|
" }).batch(2)\n",
|
||||||
|
" predictions = loaded_model.predict(dataset)\n",
|
||||||
|
" return predictions\n",
|
||||||
|
" \n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 15,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
@ -215,7 +202,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 97,
|
"execution_count": 35,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
@ -230,7 +217,7 @@
|
|||||||
" [0, 0, 1, ..., 0, 0, 0]])"
|
" [0, 0, 1, ..., 0, 0, 0]])"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"execution_count": 97,
|
"execution_count": 35,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"output_type": "execute_result"
|
"output_type": "execute_result"
|
||||||
}
|
}
|
||||||
@ -241,12 +228,128 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 98,
|
"execution_count": null,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"classes = [\"ack\",\"affirm\",\"bye\",\"hello\",\"help\",\"negate\",\"null\",\"repeat\",\"reqalts\",\"reqmore\",\"restart\",\"silence\",\"thankyou\",\"confirm\",\"deny\",\"inform\",\"request\"]"
|
"classes = [\"ack\",\"affirm\",\"bye\",\"hello\",\"help\",\"negate\",\"null\",\"repeat\",\"reqalts\",\"reqmore\",\"restart\",\"silence\",\"thankyou\",\"confirm\",\"deny\",\"inform\",\"request\"]"
|
||||||
]
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 31,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"true_acts = acts.drop(acts.columns[0],axis=1)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 37,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"true= true_acts.to_numpy()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 41,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"results = abs(predicted_classes-true)"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 46,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"23"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 46,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 47,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"all=results.size"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 49,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"not_predicted = results.sum()"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 50,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"accuracy = (all-not_predicted)/all"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 58,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stderr",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"c:\\Users\\macty\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\sklearn\\metrics\\_classification.py:1609: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 in labels with no true nor predicted samples. Use `zero_division` parameter to control this behavior.\n",
|
||||||
|
" _warn_prf(average, \"true nor predicted\", \"F-score is\", len(true_sum))\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"from sklearn.metrics import f1_score\n",
|
||||||
|
"micro_f1 = f1_score(true, predicted_classes, average='micro')\n",
|
||||||
|
"macro_f1 = f1_score(true, predicted_classes, average='macro')"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": []
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 60,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"0.9362880886426593\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"print(micro_f1)"
|
||||||
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
|
17
evaluate.py
17
evaluate.py
@ -1,4 +1,3 @@
|
|||||||
import pickle
|
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
from tensorflow.keras.models import load_model
|
from tensorflow.keras.models import load_model
|
||||||
@ -29,6 +28,18 @@ predictions = loaded_model.predict(dataset)
|
|||||||
for prediction in predictions:
|
for prediction in predictions:
|
||||||
predicted_classes = (predictions[prediction]> 0.5).astype("int32")
|
predicted_classes = (predictions[prediction]> 0.5).astype("int32")
|
||||||
classes = ["ack","affirm","bye","hello","help","negate","null","repeat","reqalts","reqmore","restart","silence","thankyou","confirm","deny","inform","request"]
|
classes = ["ack","affirm","bye","hello","help","negate","null","repeat","reqalts","reqmore","restart","silence","thankyou","confirm","deny","inform","request"]
|
||||||
print(predicted_classes)
|
|
||||||
## to do - evaluating f score
|
|
||||||
|
|
||||||
|
true_acts = acts.drop(acts.columns[0],axis=1)
|
||||||
|
true= true_acts.to_numpy()
|
||||||
|
results = abs(predicted_classes-true)
|
||||||
|
all=results.size
|
||||||
|
not_predicted = results.sum()
|
||||||
|
accuracy = (all-not_predicted)/all
|
||||||
|
from sklearn.metrics import f1_score
|
||||||
|
micro_f1 = f1_score(true, predicted_classes, average='micro')
|
||||||
|
macro_f1 = f1_score(true, predicted_classes, average='macro')
|
||||||
|
|
||||||
|
|
||||||
|
print(f"Accuracy: "+{accuracy})
|
||||||
|
print(f"micro f1 score : "+{micro_f1})
|
||||||
|
print(f"macro f1 score : "+{macro_f1})
|
Loading…
Reference in New Issue
Block a user