{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "c:\\Users\\macty\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\tensorflow_addons\\utils\\tfa_eol_msg.py:23: UserWarning: \n", "\n", "TensorFlow Addons (TFA) has ended development and introduction of new features.\n", "TFA has entered a minimal maintenance and release mode until a planned end of life in May 2024.\n", "Please modify downstream libraries to take dependencies from other repositories in our TensorFlow community (e.g. Keras, Keras-CV, and Keras-NLP). \n", "\n", "For more information see: https://github.com/tensorflow/addons/issues/2807 \n", "\n", " warnings.warn(\n" ] } ], "source": [ "import pandas as pd\n", "import numpy as np\n", "import tokenization\n", "\n", "import tensorflow as tf\n", "import tensorflow_hub as hub\n", "import tensorflow_addons as tfa\n", "\n", "import sklearn\n", "from sklearn.model_selection import train_test_split\n", "import glob\n", "import os" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "path = os.getcwd()+'\\data'\n", "tsv_files = glob.glob(os.path.join(path, \"*.tsv\"))\n" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "dfs = []\n", "for filename in tsv_files:\n", " df = pd.read_csv(filename, index_col=None, header=None, delimiter='\\t',names=[\"speaker\", \"sentence\", \"dialogue_act\"])\n", " dfs.append(df)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "combined_df = pd.concat(dfs, axis=0, ignore_index=True)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | speaker | \n", "sentence | \n", "dialogue_act | \n", "
---|---|---|---|
0 | \n", "user | \n", "Co proszę? | \n", "null()/hello() | \n", "
1 | \n", "system | \n", "Witam w systemie rezerwacji hotelu. Gdzie chci... | \n", "welcomemsg() | \n", "
2 | \n", "user | \n", "W jakim kraju/B-country mogę zarezerwować hotel? | \n", "help(country) | \n", "
3 | \n", "system | \n", "Mamy szeroki wybór hoteli na całym świecie. | \n", "expl-conf() | \n", "
4 | \n", "user | \n", "Przedstaw proszę oferty z obszaru Górnego Kara... | \n", "request(country=Górny Karabuch) | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "
347 | \n", "system | \n", "Okej w takim razie, proponuję ten sam hotel w ... | \n", "offer(price=110, date=02.07.2023- 08.07.2023) | \n", "
348 | \n", "user | \n", "Jak najbardziej. Proszę o zarezerwowanie/B-res... | \n", "confirm() | \n", "
349 | \n", "system | \n", "Dobrze, numer rezerwacji to 912312. Dokładny A... | \n", "inform(reservation_number=912312, address=3 ma... | \n", "
350 | \n", "user | \n", "Nie, dziękuję i życzę miłego dnia | \n", "negate()&thankyou()&bye() | \n", "
351 | \n", "system | \n", "Dziękuję bardzo wzajemnie. | \n", "thankyou() | \n", "
352 rows × 3 columns
\n", "