systemy_dialogowe/SDMockup.ipynb

13 KiB

import tensorflow as tf
from tensorflow.keras.models import load_model
import tensorflow_addons as tfa
import numpy as np
c:\Users\macty\AppData\Local\Programs\Python\Python311\Lib\site-packages\tensorflow_addons\utils\tfa_eol_msg.py:23: UserWarning: 

TensorFlow Addons (TFA) has ended development and introduction of new features.
TFA has entered a minimal maintenance and release mode until a planned end of life in May 2024.
Please modify downstream libraries to take dependencies from other repositories in our TensorFlow community (e.g. Keras, Keras-CV, and Keras-NLP). 

For more information see: https://github.com/tensorflow/addons/issues/2807 

  warnings.warn(
loaded_model = tf.keras.models.load_model('model')
from transformers import *
tokenizer = BertTokenizer.from_pretrained("dkleczek/bert-base-polish-uncased-v1")
c:\Users\macty\AppData\Local\Programs\Python\Python311\Lib\site-packages\tqdm\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html
  from .autonotebook import tqdm as notebook_tqdm
c:\Users\macty\AppData\Local\Programs\Python\Python311\Lib\site-packages\transformers\generation_utils.py:24: FutureWarning: Importing `GenerationMixin` from `src/transformers/generation_utils.py` is deprecated and will be removed in Transformers v5. Import as `from transformers import GenerationMixin` instead.
  warnings.warn(
c:\Users\macty\AppData\Local\Programs\Python\Python311\Lib\site-packages\transformers\generation_tf_utils.py:24: FutureWarning: Importing `TFGenerationMixin` from `src/transformers/generation_tf_utils.py` is deprecated and will be removed in Transformers v5. Import as `from transformers import TFGenerationMixin` instead.
  warnings.warn(
loading file vocab.txt from cache at C:\Users\macty/.cache\huggingface\hub\models--dkleczek--bert-base-polish-uncased-v1\snapshots\62be9821055981deafb23f217b68cc41f38cdb76\vocab.txt
loading file added_tokens.json from cache at None
loading file special_tokens_map.json from cache at C:\Users\macty/.cache\huggingface\hub\models--dkleczek--bert-base-polish-uncased-v1\snapshots\62be9821055981deafb23f217b68cc41f38cdb76\special_tokens_map.json
loading file tokenizer_config.json from cache at C:\Users\macty/.cache\huggingface\hub\models--dkleczek--bert-base-polish-uncased-v1\snapshots\62be9821055981deafb23f217b68cc41f38cdb76\tokenizer_config.json
loading configuration file config.json from cache at C:\Users\macty/.cache\huggingface\hub\models--dkleczek--bert-base-polish-uncased-v1\snapshots\62be9821055981deafb23f217b68cc41f38cdb76\config.json
Model config BertConfig {
  "_name_or_path": "dkleczek/bert-base-polish-uncased-v1",
  "architectures": [
    "BertForMaskedLM",
    "BertForPreTraining"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "output_past": true,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.28.1",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 60000
}

ASR

def asr(inputText: str) -> str:
    # Do something
    inputText

NLU

class NLU:
    def __init__(self, text: str):
        self.text = text
        self.act = ""

    def get_dialog_act(self): 
        predicted_classes_names=[]
        input = [self.text]
        encoded_input = tokenizer.batch_encode_plus(input, padding=True, truncation=True, return_tensors='tf')
        dataset = tf.data.Dataset.from_tensor_slices({
        'input_ids': encoded_input['input_ids'],
        'attention_mask': encoded_input['attention_mask'],
        'token_type_ids': encoded_input['token_type_ids']
        }).batch(2)
        predictions = loaded_model.predict(dataset)
        classes = ["ack","affirm","bye","hello","help","negate","null","repeat","reqalts","reqmore","restart","silence","thankyou","confirm","deny","inform","request"]
        for prediction in predictions: #trying to get predictions, if none it take maximum
            predicted_classes = (predictions[prediction]> 0.5).astype("int32")
            if predicted_classes.sum()==0:
                predicted_classes=max(predictions[prediction])
        predicted_classes_indexes= np.where(predicted_classes==1)[1]
        for p_classes in predicted_classes_indexes:
            predicted_classes_names.append(classes[p_classes])
            self.act=predicted_classes_names
            return self.act
nlu = NLU("Jaki pokój proponujesz w tym hotelu?")
nlu.get_dialog_act()
nlu.act
1/1 [==============================] - 0s 58ms/step
['request']

DST

class DialogueStateTracker:
    
    slots_dict: dict[tuple[str], str] = {
        ("osoby", "ludzie", "osób", "osobowy"): "people",
        ("miasto", "miasta", "miejsowość", "poznań", "warszawa", "warszawie", "poznaniu", "kraków", "krakowie"): "city",
        ("basen", "parking", "śniadania"): "facilities",
        ("data", "datę"): "date",
        ("pokój", "pokoje"): "room"
    }
    
    def __init__(self, nlu: NLU):
        self.slots = []
        self.act = nlu
        self.text = nlu.text
    
    def get_dialog_slots(self):
        for word in self.text.lower().split():
            for key in DialogueStateTracker.slots_dict:
                if word in key:
                    self.slots.append(DialogueStateTracker.slots_dict[key])
    
dst: DialogueStateTracker = DialogueStateTracker(nlu)
dst.get_dialog_slots()
dst.slots
['room']

Dialogue Policy

class DialoguePolicy:
    user_act_to_system_act_dict: dict[str, str] = {
        "ack": "reqmore",
        "bye": "bye",
        "hello": "welcomemsg",
        "help": "inform",
        "negate": "offer",
        "requalts": "offer",
        "reqmore": "inform",
        "restart": "welcomemsg",
        "thankyou": "reqmore",
        "confirm": "reqmore",
        "deny": "offer",
        "inform": "offer",
        "request": "inform",
        "null": "null"
    }
    
    def __init__(self, dst: DialogueStateTracker):
        self.user_text = dst.text
        self.user_act = dst.act
        self.user_slots = dst.slots
        self.system_act = ""
    
    def get_system_act(self):
        self.system_act = DialoguePolicy.user_act_to_system_act_dict[self.user_act]
        
dp: DialoguePolicy = DialoguePolicy(dst)
dp.get_system_act()
dp.system_act
'inform'

NLG

class NaturalLanguageGeneration:
    system_act_to_text = {
        "reqmore": "Informuje więcej o ",
        "bye": "Do widzenia",
        "welcomemsg": "Witaj w systemie rezerwacji hotelowych. W czym mogę pomóc?",
        "inform": "Informuje cię o ",
        "offer": "Co myślisz o hotlu z ",
        "reqmore": "Czy mogę jeszcze jakoś Ci pomóc?",
        "null": ""
    }
    user_slots_to_text = {
        "people": "pojemności pokoju",
        "city": "mieście",
        "facilities": "udogodnieniach",
        "date": "dacie",
        "room": "pokoju"
    }
    
    def __init__(self, dp: DialoguePolicy):
        self.user_text = dp.user_text
        self.user_act = dp.user_act
        self.user_slots = dp.user_slots
        self.system_act = dp.system_act
        self.system_text = ""
    
    def generate_system_text(self):
        text: str = NaturalLanguageGeneration.system_act_to_text[self.system_act]
        slots_transformed = [NaturalLanguageGeneration.user_slots_to_text[slot] for slot in self.user_slots]
        self.system_text = text + " i ".join(slots_transformed)
        
nlg: NaturalLanguageGeneration = NaturalLanguageGeneration(dp)
nlg.generate_system_text()
nlg.system_text
'Informuje cię o pokoju'