GOATS/reguly.ipynb

319 lines
12 KiB
Plaintext
Raw Normal View History

2024-05-27 00:10:00 +02:00
{
"cells": [
{
"cell_type": "code",
2024-05-27 20:53:07 +02:00
"execution_count": 212,
2024-05-27 00:10:00 +02:00
"id": "706dd5e1-57ee-416b-a77c-5d15df8dbdc8",
"metadata": {},
"outputs": [],
"source": [
"from convlab.base_models.t5.nlu import T5NLU\n",
"import requests\n",
"\n",
"\n",
"def translate_text(text, target_language='en'):\n",
" url = 'https://translate.googleapis.com/translate_a/single?client=gtx&sl=auto&tl={}&dt=t&q={}'.format(\n",
" target_language, text)\n",
" response = requests.get(url)\n",
" if response.status_code == 200:\n",
" translated_text = response.json()[0][0][0]\n",
" return translated_text\n",
" else:\n",
" return None\n",
"\n",
"\n",
"class NaturalLanguageAnalyzer: \n",
" def predict(self, text, context=None):\n",
" # Inicjalizacja modelu NLU\n",
" model_name = \"ConvLab/t5-small-nlu-multiwoz21\"\n",
" nlu_model = T5NLU(speaker='user', context_window_size=0, model_name_or_path=model_name)\n",
"\n",
" # Automatyczne tłumaczenie na język angielski\n",
" translated_input = translate_text(text)\n",
"\n",
" # Wygenerowanie odpowiedzi z modelu NLU\n",
" nlu_output = nlu_model.predict(translated_input)\n",
"\n",
" return nlu_output\n",
"\n",
" def init_session(self):\n",
" # Inicjalizacja sesji (jeśli konieczne)\n",
" pass"
]
},
{
"cell_type": "code",
2024-05-27 20:53:07 +02:00
"execution_count": 213,
2024-05-27 00:10:00 +02:00
"id": "06926543-cab1-48e7-8e82-0560fc0fa16a",
"metadata": {},
"outputs": [],
"source": [
"import json\n",
"import os\n",
"from convlab.dst.dst import DST\n",
"from convlab.dst.rule.multiwoz.dst_util import normalize_value\n",
"\n",
2024-05-27 20:53:07 +02:00
"\n",
"def default_state():\n",
" return {\n",
" 'belief_state': {\n",
" 'hotel': {\n",
" 'info': {\n",
" 'name': '',\n",
" 'area': '',\n",
" 'parking': '',\n",
" 'price range': '',\n",
" 'stars': '',\n",
" 'internet': '',\n",
" 'type': ''\n",
" },\n",
" 'booking': {\n",
" 'book stay': '',\n",
" 'book day': '',\n",
" 'book people': ''\n",
" }\n",
" }\n",
" },\n",
" 'request_state': {},\n",
" 'history': [],\n",
" 'user_action': [],\n",
" 'system_action': [],\n",
" 'terminated': False,\n",
" 'booked': []\n",
" }\n",
"\n",
"\n",
"class DialogueStateTracker(DST):\n",
2024-05-27 00:10:00 +02:00
" def __init__(self):\n",
" DST.__init__(self)\n",
" self.state = default_state()\n",
2024-05-27 20:53:07 +02:00
" with open('./hotels_data.json') as f:\n",
" self.value_dict = json.load(f)\n",
2024-05-27 00:10:00 +02:00
"\n",
" def update(self, user_act=None):\n",
" for intent, domain, slot, value in user_act:\n",
" domain = domain.lower()\n",
" intent = intent.lower()\n",
" slot = slot.lower()\n",
" \n",
" if domain not in self.state['belief_state']:\n",
" continue\n",
"\n",
" if intent == 'inform':\n",
2024-05-27 20:53:07 +02:00
" if slot == 'none' or slot == '' or value == 'dontcare':\n",
2024-05-27 00:10:00 +02:00
" continue\n",
"\n",
2024-05-27 20:53:07 +02:00
" domain_dic = self.state['belief_state'][domain]['info']\n",
2024-05-27 00:10:00 +02:00
"\n",
" if slot in domain_dic:\n",
2024-05-27 20:53:07 +02:00
" nvalue = self.normalize_value(self.value_dict, domain, slot, value)\n",
" self.state['belief_state'][domain]['info'][slot] = nvalue\n",
2024-05-27 00:10:00 +02:00
"\n",
" elif intent == 'request':\n",
" if domain not in self.state['request_state']:\n",
" self.state['request_state'][domain] = {}\n",
" if slot not in self.state['request_state'][domain]:\n",
" self.state['request_state'][domain][slot] = 0\n",
"\n",
" return self.state\n",
"\n",
2024-05-27 20:53:07 +02:00
" def normalize_value(self, value_dict, domain, slot, value):\n",
" normalized_value = value.lower().strip()\n",
" if domain in value_dict and slot in value_dict[domain]:\n",
" possible_values = value_dict[domain][slot]\n",
" if isinstance(possible_values, dict) and normalized_value in possible_values:\n",
" return possible_values[normalized_value]\n",
" return value\n",
"\n",
2024-05-27 00:10:00 +02:00
" def init_session(self):\n",
2024-05-27 20:53:07 +02:00
" self.state = default_state()\n"
2024-05-27 00:10:00 +02:00
]
},
{
"cell_type": "code",
2024-05-27 20:53:07 +02:00
"execution_count": 214,
2024-05-27 00:10:00 +02:00
"id": "a7f3d067-3a95-4ef5-b216-be5840bc8831",
"metadata": {},
"outputs": [],
"source": [
"from collections import defaultdict\n",
"import copy\n",
"import json\n",
"from copy import deepcopy\n",
"\n",
"from convlab.policy.policy import Policy\n",
"from convlab.util.multiwoz.dbquery import Database\n",
"\n",
2024-05-27 20:53:07 +02:00
"db_path = './hotels_data.json'\n",
2024-05-27 00:10:00 +02:00
"\n",
2024-05-27 20:53:07 +02:00
"class DialoguePolicy(Policy):\n",
2024-05-27 00:10:00 +02:00
" def __init__(self):\n",
" Policy.__init__(self)\n",
2024-05-27 20:53:07 +02:00
" self.db = self.load_database(db_path)\n",
"\n",
" def load_database(self, db_path):\n",
" with open(db_path, 'r', encoding='utf-8') as f:\n",
" return json.load(f)\n",
"\n",
" def query(self, domain, constraints):\n",
" if domain != 'hotel':\n",
" return []\n",
" \n",
" results = []\n",
" for entry in self.db:\n",
" match = all(entry.get(key) == value for key, value in constraints)\n",
" if match:\n",
" results.append(entry)\n",
" return results\n",
2024-05-27 00:10:00 +02:00
"\n",
" def predict(self, state):\n",
" self.results = []\n",
" system_action = defaultdict(list)\n",
" user_action = defaultdict(list)\n",
"\n",
" for intent, domain, slot, value in state['user_action']:\n",
" user_action[(domain.lower(), intent.lower())].append((slot.lower(), value))\n",
"\n",
" for user_act in user_action:\n",
" self.update_system_action(user_act, user_action, state, system_action)\n",
"\n",
" if any(True for slots in user_action.values() for (slot, _) in slots if slot in ['book stay', 'book day', 'book people']):\n",
" if self.results:\n",
" system_action = {('Booking', 'Book'): [[\"Ref\", self.results[0].get('Ref', 'N/A')]]}\n",
"\n",
" system_acts = [[intent, domain, slot, value] for (domain, intent), slots in system_action.items() for slot, value in slots]\n",
" state['system_action'] = system_acts\n",
" return system_acts\n",
"\n",
" def update_system_action(self, user_act, user_action, state, system_action):\n",
" domain, intent = user_act\n",
2024-05-27 20:53:07 +02:00
" constraints = [(slot, value) for slot, value in state['belief_state'][domain]['info'].items() if value != '']\n",
" print(f\"Constraints: {constraints}\")\n",
" self.results = deepcopy(self.query(domain.lower(), constraints))\n",
" print(f\"Query results: {self.results}\")\n",
2024-05-27 00:10:00 +02:00
"\n",
" if intent == 'request':\n",
" if len(self.results) == 0:\n",
" system_action[(domain, 'NoOffer')] = []\n",
" else:\n",
" for slot in user_action[user_act]: \n",
" if slot[0] in self.results[0]:\n",
" system_action[(domain, 'Inform')].append([slot[0], self.results[0].get(slot[0], 'unknown')])\n",
"\n",
" elif intent == 'inform':\n",
" if len(self.results) == 0:\n",
" system_action[(domain, 'NoOffer')] = []\n",
" else:\n",
" system_action[(domain, 'Inform')].append(['Choice', str(len(self.results))])\n",
" choice = self.results[0]\n",
"\n",
2024-05-27 20:53:07 +02:00
" if domain in [\"hotel\"]:\n",
" system_action[(domain, 'Recommend')].append(['Name', choice['name']])\n",
" for slot in state['belief_state'][domain]['info']:\n",
" if choice.get(slot):\n",
" state['belief_state'][domain]['info'][slot] = choice[slot]"
2024-05-27 00:10:00 +02:00
]
},
{
"cell_type": "code",
2024-05-27 20:53:07 +02:00
"execution_count": 218,
"id": "11f34b20-c5b0-4752-8610-21f5eef4b569",
2024-05-27 00:10:00 +02:00
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"WARNING:root:nlu info_dict is not initialized\n",
"WARNING:root:dst info_dict is not initialized\n",
"WARNING:root:policy info_dict is not initialized\n",
"WARNING:root:nlg info_dict is not initialized\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"NLG seed 0\n"
]
}
],
"source": [
"from convlab.nlg.template.multiwoz import TemplateNLG\n",
2024-05-27 20:53:07 +02:00
"from convlab.dialog_agent import PipelineAgent\n",
2024-05-27 00:10:00 +02:00
"\n",
"nlu = NaturalLanguageAnalyzer()\n",
2024-05-27 20:53:07 +02:00
"dst = DialogueStateTracker()\n",
"policy = DialoguePolicy()\n",
2024-05-27 00:10:00 +02:00
"nlg = TemplateNLG(is_user=False)\n",
2024-05-27 20:53:07 +02:00
"\n",
2024-05-27 00:10:00 +02:00
"agent = PipelineAgent(nlu=nlu, dst=dst, policy=policy, nlg=nlg, name='sys')"
]
},
{
"cell_type": "code",
2024-05-27 20:53:07 +02:00
"execution_count": 219,
"id": "faf05778-2bca-4044-97a7-d6facf853e10",
"metadata": {},
"outputs": [],
"source": [
"# nla = NaturalLanguageAnalyzer()\n",
"# nla_response = nla.predict(\"chciałbym zarezerwować drogi hotel bez parkingu 1 stycznia w Warszawie w centrum\")\n",
"# print(nla_response)\n",
"# response = agent.response(nla_response)\n",
"# print(response)"
]
},
{
"cell_type": "code",
"execution_count": 220,
"id": "6c837788-e7d5-483e-b873-00061f118619",
2024-05-27 00:10:00 +02:00
"metadata": {},
"outputs": [
{
2024-05-27 20:53:07 +02:00
"name": "stdout",
"output_type": "stream",
"text": [
"Constraints: [('area', 'centre'), ('parking', 'yes'), ('price range', 'expensive'), ('type', 'hotel')]\n",
"Query results: [{'name': 'Four Seasons Hotel', 'area': 'centre', 'parking': 'yes', 'price range': 'expensive', 'stars': '5', 'internet': 'yes', 'type': 'hotel'}, {'name': 'The Ritz Hotel', 'area': 'centre', 'parking': 'yes', 'price range': 'expensive', 'stars': '5', 'internet': 'yes', 'type': 'hotel'}, {'name': 'The Savoy Hotel', 'area': 'centre', 'parking': 'yes', 'price range': 'expensive', 'stars': '5', 'internet': 'yes', 'type': 'hotel'}, {'name': 'Shangri-La Hotel', 'area': 'centre', 'parking': 'yes', 'price range': 'expensive', 'stars': '5', 'internet': 'yes', 'type': 'hotel'}]\n",
"We have 4 such places . Four Seasons Hotel looks like it would be a good choice .\n"
]
2024-05-27 00:10:00 +02:00
}
],
"source": [
2024-05-27 20:53:07 +02:00
"response = agent.response(\"chciałbym zarezerwować drogi hotel z parkingiem 1 stycznia w Warszawie w centrum\")\n",
"print(response)"
2024-05-27 00:10:00 +02:00
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7f831f56-10ba-40da-a89c-baeed37df81e",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.8"
}
},
"nbformat": 4,
"nbformat_minor": 5
}