{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import glob\n", "import pandas as pd\n", "import re\n", "import tensorflow as tf\n", "from collections import Counter\n", "import numpy as np\n", "count = Counter()" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "filenames1 = []\n", "for filename in glob.glob('Systemy_dialogowe/data/*.tsv'):\n", " filenames1.append(filename)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "def prep(filenames):\n", " df = pd.DataFrame(columns=['Rola', 'Wypowiedź', 'Act'])\n", " for filename in filenames:\n", " temp = pd.read_csv(filename, sep='\\t', names=[\"Rola\", \"Wypowiedź\", \"Act\"])\n", " df = pd.concat([df, temp], ignore_index=True)\n", " return df" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | Rola | \n", "Wypowiedź | \n", "Act | \n", "
---|---|---|---|
1 | \n", "user | \n", "Witam | \n", "hello() | \n", "
3 | \n", "user | \n", "Co możesz dla mnie zrobić? | \n", "help() | \n", "
5 | \n", "user | \n", "Jakie są moje repozytoria? | \n", "request(repos) | \n", "
7 | \n", "user | \n", "ok. co nowego w Zajęcia AI? | \n", "affirm() & request(repo = Zajecia AI) | \n", "
9 | \n", "user | \n", "Tylko tyle? | \n", "reqmore() | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "
1409 | \n", "user | \n", "upewniam się | \n", "null | \n", "
1411 | \n", "user | \n", "pokaż mi raport | \n", "request(repo) | \n", "
1414 | \n", "user | \n", "zmienić | \n", "null | \n", "
1416 | \n", "user | \n", "Tak | \n", "ack | \n", "
1466 | \n", "user | \n", "elo | \n", "hello() | \n", "
585 rows × 3 columns
\n", "\n", " | Wypowiedź | \n", "Act | \n", "
---|---|---|
0 | \n", "czesc | \n", "[0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, ... | \n", "
1 | \n", "chcial odrzuc pr | \n", "[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, ... | \n", "
2 | \n", "capric | \n", "[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... | \n", "
3 | \n", "oki to bedzie tyle | \n", "[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... | \n", "
4 | \n", "qgphjd | \n", "[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... | \n", "
... | \n", "... | \n", "... | \n", "
586 | \n", "wyjasn mi komend pokaz mi liste komend | \n", "[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ... | \n", "
587 | \n", "wyjasn mi komend | \n", "[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ... | \n", "
588 | \n", "zmieni konfig | \n", "[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, ... | \n", "
589 | \n", "ggphjd | \n", "[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... | \n", "
590 | \n", "chce zmieni konfig | \n", "[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, ... | \n", "
591 rows × 2 columns
\n", "\n", " | Wypowiedź | \n", "Act | \n", "
---|---|---|
413 | \n", "[2, 1, 34, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,... | \n", "[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ... | \n", "
414 | \n", "[40, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,... | \n", "[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... | \n", "
415 | \n", "[2, 1, 12, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,... | \n", "[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ... | \n", "
416 | \n", "[67, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,... | \n", "[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... | \n", "
417 | \n", "[10, 13, 17, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,... | \n", "[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ... | \n", "
... | \n", "... | \n", "... | \n", "
497 | \n", "[9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... | \n", "[0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, ... | \n", "
498 | \n", "[7, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... | \n", "[0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, ... | \n", "
499 | \n", "[58, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,... | \n", "[0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, ... | \n", "
500 | \n", "[9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... | \n", "[0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, ... | \n", "
501 | \n", "[7, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... | \n", "[0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, ... | \n", "
89 rows × 2 columns
\n", "