1727 lines
61 KiB
Plaintext
1727 lines
61 KiB
Plaintext
{
|
||
"cells": [
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 1,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"import glob\n",
|
||
"import pandas as pd\n",
|
||
"import re\n",
|
||
"import tensorflow as tf\n",
|
||
"from collections import Counter\n",
|
||
"import numpy as np\n",
|
||
"count = Counter()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 2,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"filenames1 = []\n",
|
||
"for filename in glob.glob('Systemy_dialogowe/data/*.tsv'):\n",
|
||
" filenames1.append(filename)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 3,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def prep(filenames):\n",
|
||
" df = pd.DataFrame(columns=['Rola', 'Wypowiedź', 'Act'])\n",
|
||
" for filename in filenames:\n",
|
||
" temp = pd.read_csv(filename, sep='\\t', names=[\"Rola\", \"Wypowiedź\", \"Act\"])\n",
|
||
" df = pd.concat([df, temp], ignore_index=True)\n",
|
||
" return df"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 4,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>Rola</th>\n",
|
||
" <th>Wypowiedź</th>\n",
|
||
" <th>Act</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>user</td>\n",
|
||
" <td>Witam</td>\n",
|
||
" <td>hello()</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>user</td>\n",
|
||
" <td>Co możesz dla mnie zrobić?</td>\n",
|
||
" <td>help()</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>5</th>\n",
|
||
" <td>user</td>\n",
|
||
" <td>Jakie są moje repozytoria?</td>\n",
|
||
" <td>request(repos)</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>7</th>\n",
|
||
" <td>user</td>\n",
|
||
" <td>ok. co nowego w Zajęcia AI?</td>\n",
|
||
" <td>affirm() & request(repo = Zajecia AI)</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>9</th>\n",
|
||
" <td>user</td>\n",
|
||
" <td>Tylko tyle?</td>\n",
|
||
" <td>reqmore()</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1409</th>\n",
|
||
" <td>user</td>\n",
|
||
" <td>upewniam się</td>\n",
|
||
" <td>null</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1411</th>\n",
|
||
" <td>user</td>\n",
|
||
" <td>pokaż mi raport</td>\n",
|
||
" <td>request(repo)</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1414</th>\n",
|
||
" <td>user</td>\n",
|
||
" <td>zmienić</td>\n",
|
||
" <td>null</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1416</th>\n",
|
||
" <td>user</td>\n",
|
||
" <td>Tak</td>\n",
|
||
" <td>ack</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1466</th>\n",
|
||
" <td>user</td>\n",
|
||
" <td>elo</td>\n",
|
||
" <td>hello()</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>585 rows × 3 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" Rola Wypowiedź Act\n",
|
||
"1 user Witam hello()\n",
|
||
"3 user Co możesz dla mnie zrobić? help()\n",
|
||
"5 user Jakie są moje repozytoria? request(repos)\n",
|
||
"7 user ok. co nowego w Zajęcia AI? affirm() & request(repo = Zajecia AI)\n",
|
||
"9 user Tylko tyle? reqmore()\n",
|
||
"... ... ... ...\n",
|
||
"1409 user upewniam się null\n",
|
||
"1411 user pokaż mi raport request(repo)\n",
|
||
"1414 user zmienić null\n",
|
||
"1416 user Tak ack\n",
|
||
"1466 user elo hello()\n",
|
||
"\n",
|
||
"[585 rows x 3 columns]"
|
||
]
|
||
},
|
||
"execution_count": 4,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"df = prep(filenames1)\n",
|
||
"df = df.loc[df['Rola'] == 'user']\n",
|
||
"df = df.fillna('null')\n",
|
||
"df"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 5,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"array(['hello()', 'help()', 'request(repos)',\n",
|
||
" 'affirm() & request(repo = Zajecia AI)', 'reqmore()',\n",
|
||
" 'request(notifications)', 'request(notification = 1)',\n",
|
||
" 'request(link)', 'affirm() & reqmore()',\n",
|
||
" 'request(repo = Projekt – Sklep)', 'request(issues)', 'ack()',\n",
|
||
" 'request(commits = mattyl34)', 'request(commits = -5)',\n",
|
||
" 'affirm() & request(repo = Gra - kółko i krzyżyk)', 'thankyou()',\n",
|
||
" 'inform()', 'helpresponse()', 'request(repo = Zajecia AI)',\n",
|
||
" 'request(delete)', 'ack() & inform()',\n",
|
||
" 'request(repo = Projekt-sklep)', 'request(files = 1:3)',\n",
|
||
" 'request()', 'bye()', 'request(file)',\n",
|
||
" 'helpresponse() & request(repo)', 'request(command)',\n",
|
||
" 'request(repo = Projekt - Sklep)', 'request(authors)',\n",
|
||
" 'request(Bob)', 'request(repo = system)',\n",
|
||
" 'request(repo = super_stronka_internetowa)',\n",
|
||
" 'request(date, pr = 2)', 'request(repo = nazwaRepozytorium)',\n",
|
||
" 'request(repo = zadania)', 'request(author, pr = 1)',\n",
|
||
" 'request(deny, pr = 1)', 'request(rollback, commit = last)',\n",
|
||
" 'request(repo = zajecia)', 'request(newPR)', 'inform(branches)',\n",
|
||
" 'inform(title)', 'request(repo = pizza)', 'request(bilet)',\n",
|
||
" 'request(repo)', 'inform(capriciosa)', 'inform(gGphJD)', 'affirm',\n",
|
||
" 'help', 'hello', 'inform(qgphjd)', 'null', 'bye',\n",
|
||
" 'inform(qwdqwdqaswdaqdfqfwqwfq)', 'inform(qGphJs)',\n",
|
||
" 'inform(qgphid)', 'inform(pGphJD)', 'thankyou', 'inform(qGphJ0)',\n",
|
||
" 'inform(qGphJ)', 'inform(DJhpGq)', 'inform(phgdj)',\n",
|
||
" 'inform(QgPHjd)', 'ack'], dtype=object)"
|
||
]
|
||
},
|
||
"execution_count": 5,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"df['Act'].unique()"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "markdown",
|
||
"metadata": {},
|
||
"source": [
|
||
"### split by &\n",
|
||
"### ignore args and ()\n",
|
||
"### {'request', 'inform', 'bye', 'reqmore', 'help', 'ack', 'affirm', 'hello', 'thankyou', 'null'}"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 6,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"{'help', 'reqmore', 'bye', 'thankyou', 'hello', 'affirm', 'ack', 'inform', 'request', 'null'}\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"new = pd.DataFrame(columns=['Wypowiedź', 'Act'])\n",
|
||
"values = set()\n",
|
||
"for index, row in df.iterrows():\n",
|
||
" act = row[2].split('&')\n",
|
||
" act = [re.sub('\\(.*\\)', '', x) for x in act]\n",
|
||
" act = [re.sub(' ', '', x) for x in act]\n",
|
||
" act = [re.sub('helpresponse', 'help', x) for x in act]\n",
|
||
" [values.add(x) for x in act]\n",
|
||
" temp = pd.DataFrame({'Wypowiedź':row[1], 'Act': act})\n",
|
||
" new = pd.concat([new, temp], ignore_index=True)\n",
|
||
"new.head(10)\n",
|
||
"print(values)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 7,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"{'request': Wypowiedź Act\n",
|
||
" 2 Jakie są moje repozytoria? request\n",
|
||
" 4 ok. co nowego w Zajęcia AI? request\n",
|
||
" 6 Mam jakieś nowe powiadomienia? request\n",
|
||
" 8 Opowiedz mi o powiadomieniu 1 request\n",
|
||
" 9 Podaj mi linka do tego powiadomienia request\n",
|
||
" .. ... ...\n",
|
||
" 571 Próbuję ponownie request\n",
|
||
" 573 próbuję request\n",
|
||
" 577 pokaż mi raport projektu request\n",
|
||
" 584 pokaż mi raport projektu request\n",
|
||
" 587 pokaż mi raport request\n",
|
||
" \n",
|
||
" [130 rows x 2 columns],\n",
|
||
" 'inform': Wypowiedź Act\n",
|
||
" 26 to wszystko inform\n",
|
||
" 33 Oki, to będzie tyle inform\n",
|
||
" 76 To nie, to już wszystko inform\n",
|
||
" 103 moja gałąź, \"master\" inform\n",
|
||
" 104 moja PR inform\n",
|
||
" .. ... ...\n",
|
||
" 535 gGphJD inform\n",
|
||
" 545 qGphJ inform\n",
|
||
" 552 DJhpGq inform\n",
|
||
" 578 phgdj inform\n",
|
||
" 579 QgPHjd inform\n",
|
||
" \n",
|
||
" [68 rows x 2 columns],\n",
|
||
" 'bye': Wypowiedź Act\n",
|
||
" 50 Do widzenia bye\n",
|
||
" 56 Dziękuję, do widzenia bye\n",
|
||
" 77 Papa bye\n",
|
||
" 141 exit bye\n",
|
||
" 164 exit bye\n",
|
||
" 172 Nic z tych komend mnie nie interesuje bye\n",
|
||
" 195 exit bye\n",
|
||
" 203 Nic z tych komend mnie nie interesuje bye\n",
|
||
" 234 exit bye\n",
|
||
" 242 Nic z tych komend mnie nie interesuje bye\n",
|
||
" 284 exit bye\n",
|
||
" 292 Nic z tych komend mnie nie interesuje bye\n",
|
||
" 322 Nie chcę bye\n",
|
||
" 345 exit bye\n",
|
||
" 353 Nic z tych komend mnie nie interesuje bye\n",
|
||
" 383 Nie chcę bye\n",
|
||
" 393 To wszystko, dziękuje bye\n",
|
||
" 416 exit bye\n",
|
||
" 424 Nic z tych komend mnie nie interesuje bye\n",
|
||
" 454 Nie chcę bye\n",
|
||
" 464 To wszystko, dziękuje bye\n",
|
||
" 471 Do widzenia bye\n",
|
||
" 494 exit bye\n",
|
||
" 502 Nic z tych komend mnie nie interesuje bye\n",
|
||
" 532 Nie chcę bye\n",
|
||
" 542 To wszystko, dziękuje bye\n",
|
||
" 549 Do widzenia bye,\n",
|
||
" 'reqmore': Wypowiedź Act\n",
|
||
" 5 Tylko tyle? reqmore\n",
|
||
" 7 Jakie? reqmore\n",
|
||
" 11 okej. jakie jeszcze informacje możesz mi przek... reqmore\n",
|
||
" 12 O tym samym reqmore\n",
|
||
" 15 Co jeszcze możesz dla mnie zrobić? reqmore\n",
|
||
" 18 Jakie? reqmore\n",
|
||
" 20 A pozostałe dwa? reqmore\n",
|
||
" 24 ok; A jakby były to powiedziałbyś mi o nich? reqmore,\n",
|
||
" 'help': Wypowiedź Act\n",
|
||
" 1 Co możesz dla mnie zrobić? help\n",
|
||
" 28 Jakie są dostępne funkcje? help\n",
|
||
" 35 Chciałabym się dowiedzieć jakie usługi oferujecie help\n",
|
||
" 42 Chciałbym poznać funkcję systemu help\n",
|
||
" 58 Jak mi możesz pomóc; Chcę nowe repo help\n",
|
||
" .. ... ...\n",
|
||
" 567 pokaż listę komend help\n",
|
||
" 568 pokaż listę komend help\n",
|
||
" 581 pokaż help\n",
|
||
" 582 pokaż help\n",
|
||
" 583 wyjaśnij mi komendę pokaż mi listę komend help\n",
|
||
" \n",
|
||
" [140 rows x 2 columns],\n",
|
||
" 'ack': Wypowiedź Act\n",
|
||
" 16 Tak ack\n",
|
||
" 32 Oki, to będzie tyle ack\n",
|
||
" 39 tak ack\n",
|
||
" 48 Rozumiem ack\n",
|
||
" 73 tak ack\n",
|
||
" 80 Tak ack\n",
|
||
" 85 Tak ack\n",
|
||
" 91 tak ack\n",
|
||
" 94 Tak, poproszę ack\n",
|
||
" 101 tak ack\n",
|
||
" 107 tak ack\n",
|
||
" 110 tak ack\n",
|
||
" 113 tak ack\n",
|
||
" 589 Tak ack,\n",
|
||
" 'affirm': Wypowiedź Act\n",
|
||
" 3 ok. co nowego w Zajęcia AI? affirm\n",
|
||
" 10 okej. jakie jeszcze informacje możesz mi przek... affirm\n",
|
||
" 21 ok. Są jakieś failujące testy w tym repo? affirm\n",
|
||
" 23 ok; A jakby były to powiedziałbyś mi o nich? affirm\n",
|
||
" 117 kontynuuj affirm\n",
|
||
" 122 kontynuuj affirm\n",
|
||
" 137 chcę kontynuować affirm\n",
|
||
" 145 kontynuuj affirm\n",
|
||
" 160 chcę kontynuować affirm\n",
|
||
" 169 kontynuować affirm\n",
|
||
" 176 kontynuuj affirm\n",
|
||
" 191 chcę kontynuować affirm\n",
|
||
" 200 kontynuować affirm\n",
|
||
" 211 Chcę kontynuować affirm\n",
|
||
" 215 kontynuuj affirm\n",
|
||
" 230 chcę kontynuować affirm\n",
|
||
" 239 kontynuować affirm\n",
|
||
" 250 Chcę kontynuować affirm\n",
|
||
" 259 to nie chce zmieniać konfiguracji affirm\n",
|
||
" 265 kontynuuj affirm\n",
|
||
" 280 chcę kontynuować affirm\n",
|
||
" 289 kontynuować affirm\n",
|
||
" 300 Chcę kontynuować affirm\n",
|
||
" 309 to nie chce zmieniać konfiguracji affirm\n",
|
||
" 316 kontynuuj affirm\n",
|
||
" 326 kontynuuj affirm\n",
|
||
" 341 chcę kontynuować affirm\n",
|
||
" 350 kontynuować affirm\n",
|
||
" 361 Chcę kontynuować affirm\n",
|
||
" 370 to nie chce zmieniać konfiguracji affirm\n",
|
||
" 377 kontynuuj affirm\n",
|
||
" 388 kontynuuj affirm\n",
|
||
" 397 kontynuuj affirm\n",
|
||
" 412 chcę kontynuować affirm\n",
|
||
" 421 kontynuować affirm\n",
|
||
" 432 Chcę kontynuować affirm\n",
|
||
" 441 to nie chce zmieniać konfiguracji affirm\n",
|
||
" 448 kontynuuj affirm\n",
|
||
" 459 kontynuuj affirm\n",
|
||
" 475 kontynuuj affirm\n",
|
||
" 490 chcę kontynuować affirm\n",
|
||
" 499 kontynuować affirm\n",
|
||
" 510 Chcę kontynuować affirm\n",
|
||
" 519 to nie chce zmieniać konfiguracji affirm\n",
|
||
" 526 kontynuuj affirm\n",
|
||
" 537 kontynuuj affirm\n",
|
||
" 566 No dobra, to kontynuujemy affirm\n",
|
||
" 570 kontynuować affirm\n",
|
||
" 572 Kontynuować affirm\n",
|
||
" 585 kontynuujmy affirm,\n",
|
||
" 'hello': Wypowiedź Act\n",
|
||
" 0 Witam hello\n",
|
||
" 27 Dzień dobry! hello\n",
|
||
" 34 Dzień dobry hello\n",
|
||
" 41 Cześć hello\n",
|
||
" 51 Dzień dobry panie bocie hello\n",
|
||
" 57 Elo hello\n",
|
||
" 78 Dzień dobry hello\n",
|
||
" 82 Witam hello\n",
|
||
" 88 Dzień dobry hello\n",
|
||
" 92 Dzień dobry hello\n",
|
||
" 99 Dzień dobry hello\n",
|
||
" 105 Dzień dobry hello\n",
|
||
" 111 Dzień dobry hello\n",
|
||
" 124 Widam hello\n",
|
||
" 147 Widam hello\n",
|
||
" 165 Witam hello\n",
|
||
" 178 Widam hello\n",
|
||
" 196 Witam hello\n",
|
||
" 204 Dzień dobry! hello\n",
|
||
" 217 Widam hello\n",
|
||
" 235 Witam hello\n",
|
||
" 243 Dzień dobry! hello\n",
|
||
" 252 Dzień dobry hello\n",
|
||
" 267 Widam hello\n",
|
||
" 285 Witam hello\n",
|
||
" 293 Dzień dobry! hello\n",
|
||
" 302 Dzień dobry hello\n",
|
||
" 312 Dzień dobry hello\n",
|
||
" 328 Widam hello\n",
|
||
" 346 Witam hello\n",
|
||
" 354 Dzień dobry! hello\n",
|
||
" 363 Dzień dobry hello\n",
|
||
" 373 Dzień dobry hello\n",
|
||
" 384 Cześć hello\n",
|
||
" 399 Widam hello\n",
|
||
" 417 Witam hello\n",
|
||
" 425 Dzień dobry! hello\n",
|
||
" 434 Dzień dobry hello\n",
|
||
" 444 Dzień dobry hello\n",
|
||
" 455 Cześć hello\n",
|
||
" 465 Cześć hello\n",
|
||
" 477 Widam hello\n",
|
||
" 495 Witam hello\n",
|
||
" 503 Dzień dobry! hello\n",
|
||
" 512 Dzień dobry hello\n",
|
||
" 522 Dzień dobry hello\n",
|
||
" 533 Cześć hello\n",
|
||
" 543 Cześć hello\n",
|
||
" 550 Elo hello\n",
|
||
" 576 Dzień dobry hello\n",
|
||
" 590 elo hello,\n",
|
||
" 'thankyou': Wypowiedź Act\n",
|
||
" 25 dziękuję thankyou\n",
|
||
" 40 dziękuję za informację thankyou\n",
|
||
" 49 To wszystko, dziękuje thankyou\n",
|
||
" 74 Dzięki thankyou\n",
|
||
" 81 Nie thankyou\n",
|
||
" 87 Rozumiem, to wszystko thankyou\n",
|
||
" 98 Nie thankyou\n",
|
||
" 108 Nie thankyou\n",
|
||
" 260 Dziękuje thankyou\n",
|
||
" 310 Dziękuje thankyou\n",
|
||
" 371 Dziękuje thankyou\n",
|
||
" 389 podoba mi się raport thankyou\n",
|
||
" 392 Dobrze thankyou\n",
|
||
" 442 Dziękuje thankyou\n",
|
||
" 460 podoba mi się raport thankyou\n",
|
||
" 463 Dobrze thankyou\n",
|
||
" 520 Dziękuje thankyou\n",
|
||
" 538 podoba mi się raport thankyou\n",
|
||
" 541 Dobrze thankyou\n",
|
||
" 575 Dzięki za pomoc thankyou,\n",
|
||
" 'null': Wypowiedź Act\n",
|
||
" 128 chcę zmienić projekt null\n",
|
||
" 130 A jak mogę zmienić konfigurację? null\n",
|
||
" 131 CHCĘ INNY PROJEKT null\n",
|
||
" 132 zgłoś błąd null\n",
|
||
" 135 Chcę zmienić konfigurację null\n",
|
||
" .. ... ...\n",
|
||
" 562 Chcę zmienić null\n",
|
||
" 574 zmienić null\n",
|
||
" 580 zmienic null\n",
|
||
" 586 upewniam się null\n",
|
||
" 588 zmienić null\n",
|
||
" \n",
|
||
" [83 rows x 2 columns]}"
|
||
]
|
||
},
|
||
"execution_count": 7,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"unique = ['request', 'inform', 'bye', 'reqmore', 'help', 'ack', 'affirm', 'hello', 'thankyou', 'null']\n",
|
||
"sorted_values = {}\n",
|
||
"for item in unique:\n",
|
||
" temp = new.loc[new['Act'] == item]\n",
|
||
"# print(new.loc[new['Act'] == item])\n",
|
||
" sorted_values[item] = temp\n",
|
||
" temp.to_csv(f'data_sorted//{item}', sep='\\t', index=False)\n",
|
||
"sorted_values"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 8,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"def preprocess(line):\n",
|
||
" txt = line\n",
|
||
" txt = re.sub(\n",
|
||
" \"(£|§|!|@|#|\\$|%|\\^|&|\\*|\\(|\\)|_|-|\\+|=|\\{|\\[|\\}|\\]|:|;|\\\"|'|\\|\\\\|\\<|,|\\>|\\.|\\?|/|~|`|\\|–|–|)\",\n",
|
||
" \"\",\n",
|
||
" txt,\n",
|
||
" )\n",
|
||
" txt = txt.lower()\n",
|
||
" txt = re.sub(\"[0-9]\", \"\", txt)\n",
|
||
" txt = re.sub(\"[ \\t]+\", \" \", txt)\n",
|
||
" txt = re.sub(\" +$\", \"\", txt)\n",
|
||
" txt = re.sub(\"ą\", \"a\", txt)\n",
|
||
" txt = re.sub(\"ć\", \"c\", txt)\n",
|
||
" txt = re.sub(\"ę\", \"e\", txt)\n",
|
||
" txt = re.sub(\"ł\", \"l\", txt)\n",
|
||
" txt = re.sub(\"ń\", \"n\", txt)\n",
|
||
" txt = re.sub(\"ó\", \"o\", txt)\n",
|
||
" txt = re.sub(\"ś\", \"s\", txt)\n",
|
||
" txt = re.sub(\"ź\", \"z\", txt)\n",
|
||
" txt = re.sub(\"ż\", \"z\", txt)\n",
|
||
" words = txt.split()\n",
|
||
" words = [w[:6] if len(w) > 6 else w for w in words]\n",
|
||
" out = []\n",
|
||
" for word in words:\n",
|
||
"# if word not in stopwords:\n",
|
||
" out.append(word)\n",
|
||
" for stem in out:\n",
|
||
" count[stem] += 1\n",
|
||
" text = \" \".join(out)\n",
|
||
"# print(text)\n",
|
||
" return text\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 9,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"from tensorflow.keras.utils import to_categorical\n",
|
||
"from numpy import argmax\n",
|
||
"\n",
|
||
"acts = ['inform', 'reqmore', 'thankyou', 'ack', 'affirm', 'hello', 'request', 'help', 'null', 'bye']\n",
|
||
"to_num = {act: idx for idx, act in enumerate(acts)}"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 10,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"53"
|
||
]
|
||
},
|
||
"execution_count": 10,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"prep = pd.DataFrame(columns=['Wypowiedź', 'Act'])\n",
|
||
"values = set()\n",
|
||
"max_len = 0\n",
|
||
"for index, row in new.iterrows():\n",
|
||
" temp = preprocess(row[0])\n",
|
||
" one_hot_encode = to_categorical(to_num[row[1]], num_classes=len(acts))\n",
|
||
"# one_hot_encode = np.asarray(one_hot_encode).astype('float32')\n",
|
||
" max_len = max(max_len, len(temp))\n",
|
||
" frame = pd.DataFrame({'Wypowiedź': temp, 'Act': [np.asarray(one_hot_encode).astype('float32')]})\n",
|
||
" prep = pd.concat([prep, frame], ignore_index=True)\n",
|
||
"# print(prep)\n",
|
||
"max_len"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 11,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>Wypowiedź</th>\n",
|
||
" <th>Act</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>0</th>\n",
|
||
" <td>czesc</td>\n",
|
||
" <td>[0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, ...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>1</th>\n",
|
||
" <td>chcial odrzuc pr</td>\n",
|
||
" <td>[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, ...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>2</th>\n",
|
||
" <td>capric</td>\n",
|
||
" <td>[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>3</th>\n",
|
||
" <td>oki to bedzie tyle</td>\n",
|
||
" <td>[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>4</th>\n",
|
||
" <td>qgphjd</td>\n",
|
||
" <td>[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>586</th>\n",
|
||
" <td>wyjasn mi komend pokaz mi liste komend</td>\n",
|
||
" <td>[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>587</th>\n",
|
||
" <td>wyjasn mi komend</td>\n",
|
||
" <td>[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>588</th>\n",
|
||
" <td>zmieni konfig</td>\n",
|
||
" <td>[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, ...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>589</th>\n",
|
||
" <td>ggphjd</td>\n",
|
||
" <td>[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>590</th>\n",
|
||
" <td>chce zmieni konfig</td>\n",
|
||
" <td>[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, ...</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>591 rows × 2 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" Wypowiedź \\\n",
|
||
"0 czesc \n",
|
||
"1 chcial odrzuc pr \n",
|
||
"2 capric \n",
|
||
"3 oki to bedzie tyle \n",
|
||
"4 qgphjd \n",
|
||
".. ... \n",
|
||
"586 wyjasn mi komend pokaz mi liste komend \n",
|
||
"587 wyjasn mi komend \n",
|
||
"588 zmieni konfig \n",
|
||
"589 ggphjd \n",
|
||
"590 chce zmieni konfig \n",
|
||
"\n",
|
||
" Act \n",
|
||
"0 [0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, ... \n",
|
||
"1 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, ... \n",
|
||
"2 [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... \n",
|
||
"3 [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... \n",
|
||
"4 [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... \n",
|
||
".. ... \n",
|
||
"586 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ... \n",
|
||
"587 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ... \n",
|
||
"588 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, ... \n",
|
||
"589 [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... \n",
|
||
"590 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, ... \n",
|
||
"\n",
|
||
"[591 rows x 2 columns]"
|
||
]
|
||
},
|
||
"execution_count": 11,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"prep = prep.sample(frac=1).reset_index(drop=True)\n",
|
||
"prep"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 12,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"('mi', 219)\n",
|
||
"('pokaz', 133)\n",
|
||
"('komend', 124)\n",
|
||
"('projek', 97)\n",
|
||
"('raport', 84)\n",
|
||
"('wyjasn', 68)\n",
|
||
"('chce', 61)\n",
|
||
"('zmieni', 57)\n",
|
||
"('konfig', 45)\n",
|
||
"('jakie', 43)\n",
|
||
"('kontyn', 41)\n",
|
||
"('liste', 41)\n",
|
||
"('sa', 31)\n",
|
||
"('w', 27)\n",
|
||
"('funkcj', 26)\n",
|
||
"('dzien', 25)\n",
|
||
"('dobry', 25)\n",
|
||
"('dostep', 25)\n",
|
||
"('to', 24)\n",
|
||
"('mozesz', 22)\n",
|
||
"('repozy', 21)\n",
|
||
"('nie', 21)\n",
|
||
"('pomoc', 19)\n",
|
||
"('tak', 17)\n",
|
||
"('a', 16)\n",
|
||
"('czym', 16)\n",
|
||
"('moge', 15)\n",
|
||
"('chcial', 14)\n",
|
||
"('jeszcz', 12)\n",
|
||
"('dzieku', 12)\n",
|
||
"('ggphjd', 12)\n",
|
||
"('o', 11)\n",
|
||
"('jak', 10)\n",
|
||
"('witam', 9)\n",
|
||
"('co', 9)\n",
|
||
"('mnie', 9)\n",
|
||
"('repo', 9)\n",
|
||
"('wszyst', 9)\n",
|
||
"('z', 9)\n",
|
||
"('capric', 9)\n",
|
||
"('qgphjd', 9)\n",
|
||
"('lista', 9)\n",
|
||
"('sie', 8)\n",
|
||
"('system', 8)\n",
|
||
"('widam', 8)\n",
|
||
"('inny', 8)\n",
|
||
"('zglos', 8)\n",
|
||
"('blad', 8)\n",
|
||
"('exit', 8)\n",
|
||
"('inform', 7)\n",
|
||
"('adawda', 7)\n",
|
||
"('qwdqwd', 7)\n",
|
||
"('qgphjs', 7)\n",
|
||
"('nic', 7)\n",
|
||
"('tych', 7)\n",
|
||
"('intere', 7)\n",
|
||
"('ok', 6)\n",
|
||
"('mam', 6)\n",
|
||
"('nowe', 6)\n",
|
||
"('podaj', 6)\n",
|
||
"('do', 6)\n",
|
||
"('powied', 6)\n",
|
||
"('czesc', 6)\n",
|
||
"('qgphid', 6)\n",
|
||
"('dialog', 6)\n",
|
||
"('qgphj', 6)\n",
|
||
"('zajeci', 5)\n",
|
||
"('issue', 5)\n",
|
||
"('jest', 5)\n",
|
||
"('pr', 5)\n",
|
||
"('pgphjd', 5)\n",
|
||
"('napraw', 5)\n",
|
||
"('bylem', 5)\n",
|
||
"('botem', 5)\n",
|
||
"('i', 5)\n",
|
||
"('zle', 5)\n",
|
||
"('przepi', 5)\n",
|
||
"('kod', 5)\n",
|
||
"('moje', 4)\n",
|
||
"('ai', 4)\n",
|
||
"('powiad', 4)\n",
|
||
"('ostatn', 4)\n",
|
||
"('uslugi', 4)\n",
|
||
"('oferuj', 4)\n",
|
||
"('zobacz', 4)\n",
|
||
"('widzen', 4)\n",
|
||
"('no', 4)\n",
|
||
"('zrobic', 3)\n",
|
||
"('tyle', 3)\n",
|
||
"('jakies', 3)\n",
|
||
"('tym', 3)\n",
|
||
"('sklep', 3)\n",
|
||
"('pierws', 3)\n",
|
||
"('elo', 3)\n",
|
||
"('help', 3)\n",
|
||
"('status', 3)\n",
|
||
"('podoba', 3)\n",
|
||
"('dobrze', 3)\n",
|
||
"('dla', 2)\n",
|
||
"('nowego', 2)\n",
|
||
"('opowie', 2)\n",
|
||
"('okej', 2)\n",
|
||
"('przeka', 2)\n",
|
||
"('commit', 2)\n",
|
||
"('failuj', 2)\n",
|
||
"('testy', 2)\n",
|
||
"('jakby', 2)\n",
|
||
"('byly', 2)\n",
|
||
"('nich', 2)\n",
|
||
"('powiaz', 2)\n",
|
||
"('oki', 2)\n",
|
||
"('bedzie', 2)\n",
|
||
"('moich', 2)\n",
|
||
"('za', 2)\n",
|
||
"('na', 2)\n",
|
||
"('temat', 2)\n",
|
||
"('pliku', 2)\n",
|
||
"('rozumi', 2)\n",
|
||
"('zatem', 2)\n",
|
||
"('dzieki', 2)\n",
|
||
"('potraf', 2)\n",
|
||
"('moim', 2)\n",
|
||
"('moja', 2)\n",
|
||
"('pizza', 2)\n",
|
||
"('github', 2)\n",
|
||
"('briefi', 2)\n",
|
||
"('poka', 2)\n",
|
||
"('probuj', 2)\n",
|
||
"('tylko', 1)\n",
|
||
"('linka', 1)\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"num_words = 130\n",
|
||
"\n",
|
||
"for item in count.most_common(num_words):\n",
|
||
" print(item)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 13,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"train_size = int(prep.shape[0] * 0.70)\n",
|
||
"\n",
|
||
"validation_size = int(prep.shape[0] * 0.85)\n",
|
||
"\n",
|
||
"train_sentences = prep.Wypowiedź[:train_size]\n",
|
||
"train_labels = prep.Act[:train_size]\n",
|
||
"\n",
|
||
"test_sentences = prep.Wypowiedź[train_size:validation_size]\n",
|
||
"test_labels = prep.Act[train_size:validation_size]\n",
|
||
"\n",
|
||
"validation_sentences = prep.Wypowiedź[validation_size:]\n",
|
||
"validation_labels = prep.Act[validation_size:]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 14,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"( Wypowiedź \\\n",
|
||
" 0 czesc \n",
|
||
" 1 chcial odrzuc pr \n",
|
||
" 2 capric \n",
|
||
" 3 oki to bedzie tyle \n",
|
||
" 4 qgphjd \n",
|
||
" .. ... \n",
|
||
" 408 o tym samym \n",
|
||
" 409 pgphjd \n",
|
||
" 410 pokaz mi raport projek adawda \n",
|
||
" 411 w czym jeszcz mozesz mi pomoc \n",
|
||
" 412 opowie mi o zajeci ai \n",
|
||
" \n",
|
||
" Act \n",
|
||
" 0 [0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, ... \n",
|
||
" 1 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, ... \n",
|
||
" 2 [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... \n",
|
||
" 3 [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... \n",
|
||
" 4 [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... \n",
|
||
" .. ... \n",
|
||
" 408 [0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... \n",
|
||
" 409 [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... \n",
|
||
" 410 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, ... \n",
|
||
" 411 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ... \n",
|
||
" 412 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, ... \n",
|
||
" \n",
|
||
" [413 rows x 2 columns],\n",
|
||
" Wypowiedź Act\n",
|
||
" 413 pokaz mi lista komend [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ...\n",
|
||
" 414 qgphjd [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...\n",
|
||
" 415 pokaz mi liste komend [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ...\n",
|
||
" 416 qgphjs [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...\n",
|
||
" 417 jakie sa dostep funkcj [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ...\n",
|
||
" .. ... ...\n",
|
||
" 497 kontyn [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, ...\n",
|
||
" 498 chce kontyn [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, ...\n",
|
||
" 499 czesc [0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, ...\n",
|
||
" 500 kontyn [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, ...\n",
|
||
" 501 chce kontyn [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, ...\n",
|
||
" \n",
|
||
" [89 rows x 2 columns])"
|
||
]
|
||
},
|
||
"execution_count": 14,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"train = pd.concat([train_sentences, train_labels], ignore_index=False, axis=1)\n",
|
||
"test = pd.concat([test_sentences, test_labels], ignore_index=False, axis=1)\n",
|
||
"validation = pd.concat([validation_sentences, validation_labels], ignore_index=False, axis=1)\n",
|
||
"\n",
|
||
"train, test\n",
|
||
"# pd.Series(array) "
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 15,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"from keras.preprocessing.text import Tokenizer\n",
|
||
"\n",
|
||
"tokenizer = Tokenizer(num_words=num_words)\n",
|
||
"tokenizer.fit_on_texts(train['Wypowiedź'])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 16,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"word_index = tokenizer.word_index"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 17,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"413 pokaz mi lista komend\n",
|
||
"414 qgphjd\n",
|
||
"415 pokaz mi liste komend\n",
|
||
"416 qgphjs\n",
|
||
"417 jakie sa dostep funkcj\n",
|
||
" ... \n",
|
||
"497 kontyn\n",
|
||
"498 chce kontyn\n",
|
||
"499 czesc\n",
|
||
"500 kontyn\n",
|
||
"501 chce kontyn\n",
|
||
"Name: Wypowiedź, Length: 89, dtype: object"
|
||
]
|
||
},
|
||
"execution_count": 17,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"test['Wypowiedź']"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 18,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# train['Wypowiedź'] = train['Wypowiedź'].apply(func = lambda row : tokenizer.texts_to_matrix(tokenizer.texts_to_sequences(row))) \n",
|
||
"# test['Wypowiedź'] = test['Wypowiedź'].apply(func = lambda row : tokenizer.texts_to_matrix(tokenizer.texts_to_sequences(row))) \n",
|
||
"\n",
|
||
"\n",
|
||
"# train['Wypowiedź'] = train['Wypowiedź'].apply(tokenizer.texts_to_matrix(train['Wypowiedź']))#.apply(func = lambda row : tokenizer.texts_to_matrix(tokenizer.texts_to_sequences(row))) \n",
|
||
"# test['Wypowiedź'] = test['Wypowiedź'].apply(func = lambda row : tokenizer.texts_to_matrix(tokenizer.texts_to_sequences(row))) \n",
|
||
"train['Wypowiedź'] = (tokenizer.texts_to_sequences(train['Wypowiedź']))\n",
|
||
"test['Wypowiedź'] = (tokenizer.texts_to_sequences(test['Wypowiedź']))\n",
|
||
"validation['Wypowiedź'] = (tokenizer.texts_to_sequences(validation['Wypowiedź']))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 19,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"[69]"
|
||
]
|
||
},
|
||
"execution_count": 19,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"# train[train['Wypowiedź'].str.len() == 1]\n",
|
||
"train['Wypowiedź'][5]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 20,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"(1, 4)"
|
||
]
|
||
},
|
||
"execution_count": 20,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"len(train['Wypowiedź'][5]), len(train['Wypowiedź'][200])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 21,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# type(train['Wypowiedź'][0]),type(test['Wypowiedź'][588]),type(train['Act'][0]),type(test['Act'][588])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 22,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"from keras.preprocessing.sequence import pad_sequences\n",
|
||
"\n",
|
||
"temp_a = pad_sequences(\n",
|
||
" train['Wypowiedź'].tolist(), maxlen=max_len, padding=\"post\", truncating=\"post\"\n",
|
||
")\n",
|
||
"temp_b = pad_sequences(\n",
|
||
" test['Wypowiedź'].tolist(), maxlen=max_len, padding=\"post\", truncating=\"post\"\n",
|
||
")\n",
|
||
"temp_c = pad_sequences(\n",
|
||
" validation['Wypowiedź'].tolist(), maxlen=max_len, padding=\"post\", truncating=\"post\"\n",
|
||
")\n",
|
||
"train['Wypowiedź'] = temp_a.tolist()\n",
|
||
"test['Wypowiedź'] = temp_b.tolist()\n",
|
||
"validation['Wypowiedź'] = temp_c.tolist()\n",
|
||
"\n",
|
||
"# train=train.reshape(1,train.shape[0])\n",
|
||
"# mel=mel.reshape(1,mel.shape[0])\n",
|
||
"\n",
|
||
"# train['Wypowiedź'] = train['Wypowiedź'].apply(lambda row : pad_sequences(\n",
|
||
"# row, maxlen=max_len, padding=\"post\", truncating=\"post\"\n",
|
||
"# ))\n",
|
||
"# test['Wypowiedź'] = test['Wypowiedź'].apply(lambda row : pad_sequences(\n",
|
||
"# row, maxlen=max_len, padding=\"post\", truncating=\"post\"\n",
|
||
"# )) "
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 23,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"413 [2, 1, 34, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...\n",
|
||
"414 [40, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...\n",
|
||
"415 [2, 1, 12, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...\n",
|
||
"416 [67, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...\n",
|
||
"417 [10, 13, 17, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...\n",
|
||
" ... \n",
|
||
"497 [9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...\n",
|
||
"498 [7, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...\n",
|
||
"499 [58, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...\n",
|
||
"500 [9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...\n",
|
||
"501 [7, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...\n",
|
||
"Name: Wypowiedź, Length: 89, dtype: object"
|
||
]
|
||
},
|
||
"execution_count": 23,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"test['Wypowiedź']"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 24,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"import keras\n",
|
||
"early_stopping = keras.callbacks.EarlyStopping(\n",
|
||
" monitor=\"val_loss\", patience=5, restore_best_weights=True, verbose=0\n",
|
||
")\n",
|
||
"\n",
|
||
"# checkpoint_callback = keras.callbacks.ModelCheckpoint(\n",
|
||
"# filepath='.', monitor='val_loss', verbose=0, save_weights_only=True,\n",
|
||
"# save_freq='epoch', mode='auto', save_best_only=True)\n",
|
||
"\n",
|
||
"reduce_lr_on_plateau = keras.callbacks.ReduceLROnPlateau(\n",
|
||
" monitor=\"acc\", factor=0.1, patience=2, verbose=0\n",
|
||
")\n",
|
||
"\n",
|
||
"callbacks_list = [early_stopping, reduce_lr_on_plateau]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 25,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/html": [
|
||
"<div>\n",
|
||
"<style scoped>\n",
|
||
" .dataframe tbody tr th:only-of-type {\n",
|
||
" vertical-align: middle;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe tbody tr th {\n",
|
||
" vertical-align: top;\n",
|
||
" }\n",
|
||
"\n",
|
||
" .dataframe thead th {\n",
|
||
" text-align: right;\n",
|
||
" }\n",
|
||
"</style>\n",
|
||
"<table border=\"1\" class=\"dataframe\">\n",
|
||
" <thead>\n",
|
||
" <tr style=\"text-align: right;\">\n",
|
||
" <th></th>\n",
|
||
" <th>Wypowiedź</th>\n",
|
||
" <th>Act</th>\n",
|
||
" </tr>\n",
|
||
" </thead>\n",
|
||
" <tbody>\n",
|
||
" <tr>\n",
|
||
" <th>413</th>\n",
|
||
" <td>[2, 1, 34, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...</td>\n",
|
||
" <td>[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>414</th>\n",
|
||
" <td>[40, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...</td>\n",
|
||
" <td>[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>415</th>\n",
|
||
" <td>[2, 1, 12, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...</td>\n",
|
||
" <td>[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>416</th>\n",
|
||
" <td>[67, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...</td>\n",
|
||
" <td>[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>417</th>\n",
|
||
" <td>[10, 13, 17, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...</td>\n",
|
||
" <td>[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>...</th>\n",
|
||
" <td>...</td>\n",
|
||
" <td>...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>497</th>\n",
|
||
" <td>[9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n",
|
||
" <td>[0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, ...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>498</th>\n",
|
||
" <td>[7, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n",
|
||
" <td>[0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, ...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>499</th>\n",
|
||
" <td>[58, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...</td>\n",
|
||
" <td>[0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, ...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>500</th>\n",
|
||
" <td>[9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n",
|
||
" <td>[0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, ...</td>\n",
|
||
" </tr>\n",
|
||
" <tr>\n",
|
||
" <th>501</th>\n",
|
||
" <td>[7, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n",
|
||
" <td>[0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, ...</td>\n",
|
||
" </tr>\n",
|
||
" </tbody>\n",
|
||
"</table>\n",
|
||
"<p>89 rows × 2 columns</p>\n",
|
||
"</div>"
|
||
],
|
||
"text/plain": [
|
||
" Wypowiedź \\\n",
|
||
"413 [2, 1, 34, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,... \n",
|
||
"414 [40, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,... \n",
|
||
"415 [2, 1, 12, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,... \n",
|
||
"416 [67, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,... \n",
|
||
"417 [10, 13, 17, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,... \n",
|
||
".. ... \n",
|
||
"497 [9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... \n",
|
||
"498 [7, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... \n",
|
||
"499 [58, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,... \n",
|
||
"500 [9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... \n",
|
||
"501 [7, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... \n",
|
||
"\n",
|
||
" Act \n",
|
||
"413 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ... \n",
|
||
"414 [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... \n",
|
||
"415 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ... \n",
|
||
"416 [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... \n",
|
||
"417 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ... \n",
|
||
".. ... \n",
|
||
"497 [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, ... \n",
|
||
"498 [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, ... \n",
|
||
"499 [0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, ... \n",
|
||
"500 [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, ... \n",
|
||
"501 [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, ... \n",
|
||
"\n",
|
||
"[89 rows x 2 columns]"
|
||
]
|
||
},
|
||
"execution_count": 25,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"test"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 26,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# train['Wypowiedź'] = train['Wypowiedź'].tolist()\n",
|
||
"# test['Wypowiedź'] = test['Wypowiedź'].tolist()\n",
|
||
"\n",
|
||
"train['Wypowiedź'] = train['Wypowiedź'].apply(lambda row: np.asarray(row).astype('float32'))\n",
|
||
"test['Wypowiedź'] = test['Wypowiedź'].apply(lambda row : np.asarray(row).astype('float32'))\n",
|
||
"validation['Wypowiedź'] = validation['Wypowiedź'].apply(lambda row : np.asarray(row).astype('float32'))\n",
|
||
"\n",
|
||
"train['Act'] = train['Act'].apply(lambda row : np.asarray(row).astype('float32'))\n",
|
||
"test['Act'] = test['Act'].apply(lambda row : np.asarray(row).astype('float32'))\n",
|
||
"validation['Act'] = validation['Act'].apply(lambda row : np.asarray(row).astype('float32'))"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 27,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# train['Wypowiedź'] = train['Wypowiedź'].apply(lambda row: tf.convert_to_tensor(np.asarray(row).astype('float32')))\n",
|
||
"# test['Wypowiedź'] = test['Wypowiedź'].apply(lambda row : tf.convert_to_tensor(np.asarray(row).astype('float32')))\n",
|
||
"\n",
|
||
"# train['Act'] = train['Act'].apply(lambda row : tf.convert_to_tensor(np.asarray(row).astype('float32')))\n",
|
||
"# test['Act'] = test['Act'].apply(lambda row : tf.convert_to_tensor(np.asarray(row).astype('float32')))\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 28,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"array([60., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
|
||
" 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
|
||
" 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
|
||
" 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
|
||
" 0.], dtype=float32)"
|
||
]
|
||
},
|
||
"execution_count": 28,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
" train['Wypowiedź'][2]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 29,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"(pandas.core.series.Series,\n",
|
||
" pandas.core.series.Series,\n",
|
||
" pandas.core.series.Series,\n",
|
||
" pandas.core.series.Series)"
|
||
]
|
||
},
|
||
"execution_count": 29,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"type(train['Wypowiedź']),type(test['Wypowiedź']),type(train['Act']),type(test['Act'])"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 30,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"train_x = tf.convert_to_tensor(train['Wypowiedź'].tolist())\n",
|
||
"train_y = tf.convert_to_tensor(train['Act'].tolist())\n",
|
||
"test_x = tf.convert_to_tensor(test['Wypowiedź'].tolist())\n",
|
||
"test_y = tf.convert_to_tensor(test['Act'].tolist())\n",
|
||
"\n",
|
||
"validation_x = tf.convert_to_tensor(validation['Wypowiedź'].tolist())\n",
|
||
"validation_y = tf.convert_to_tensor(validation['Act'].tolist())"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 31,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"89"
|
||
]
|
||
},
|
||
"execution_count": 31,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"len(validation_y)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 32,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"<tf.Tensor: shape=(53,), dtype=float32, numpy=\n",
|
||
"array([58., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
|
||
" 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
|
||
" 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
|
||
" 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
|
||
" 0.], dtype=float32)>"
|
||
]
|
||
},
|
||
"execution_count": 32,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"train_x[0]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 33,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# # import keras_tuner as kt\n",
|
||
"# from tensorflow.keras.models import Sequential\n",
|
||
"# from tensorflow.keras.layers import (\n",
|
||
"# Flatten,\n",
|
||
"# Dense,\n",
|
||
"# Embedding,\n",
|
||
"# Conv1D,\n",
|
||
"# GlobalMaxPooling1D,\n",
|
||
"# MaxPooling1D,\n",
|
||
"# )\n",
|
||
"\n",
|
||
"\n",
|
||
"# model = Sequential()\n",
|
||
"# model.add(\n",
|
||
"# Embedding(\n",
|
||
"# num_words,\n",
|
||
"# output_dim=128,\n",
|
||
"# input_length=max_len,\n",
|
||
"# )\n",
|
||
"# )\n",
|
||
"# model.add(\n",
|
||
"# Conv1D(\n",
|
||
"# filters=64,\n",
|
||
"# kernel_size=3,\n",
|
||
"# padding=\"same\",\n",
|
||
"# activation=\"relu\",\n",
|
||
"# strides=1,\n",
|
||
"# )\n",
|
||
"# )\n",
|
||
"# model.add(MaxPooling1D(pool_size=2, padding='same'))\n",
|
||
"# model.add(Flatten())\n",
|
||
"# model.add(\n",
|
||
"# Dense(\n",
|
||
"# units=128,\n",
|
||
"# activation=\"relu\",\n",
|
||
"# )\n",
|
||
"# )\n",
|
||
"# model.add(\n",
|
||
"# Dense(\n",
|
||
"# units=128,\n",
|
||
"# activation=\"relu\",\n",
|
||
"# )\n",
|
||
"# ) \n",
|
||
"# model.add(Dense(10, activation=\"softmax\"))\n",
|
||
"# model.compile(optimizer=\"rmsprop\", loss=\"categorical_crossentropy\", metrics=[\"acc\"])\n",
|
||
"# ########################################units????\n",
|
||
"# model.summary()\n",
|
||
"# # build_model(kt.HyperParameters())"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 34,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"(<tf.Tensor: shape=(1,), dtype=int32, numpy=array([53])>,\n",
|
||
" <tf.Tensor: shape=(1,), dtype=int32, numpy=array([10])>,\n",
|
||
" <tf.Tensor: shape=(1,), dtype=int32, numpy=array([53])>,\n",
|
||
" <tf.Tensor: shape=(1,), dtype=int32, numpy=array([10])>)"
|
||
]
|
||
},
|
||
"execution_count": 34,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
" tf.shape(train_x[0]), tf.shape(train_y[0]), tf.shape(test_x[0]), tf.shape(test_y[0]),\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 35,
|
||
"metadata": {
|
||
"scrolled": false
|
||
},
|
||
"outputs": [],
|
||
"source": [
|
||
"# model.fit(train_x, train_y, validation_data=(test_x, test_y), callbacks=callbacks_list, verbose=1, epochs=100)\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 36,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"import keras_tuner as kt\n",
|
||
"from tensorflow.keras.models import Sequential\n",
|
||
"from tensorflow.keras.layers import (\n",
|
||
" Flatten,\n",
|
||
" Dense,\n",
|
||
" Embedding,\n",
|
||
" Conv1D,\n",
|
||
" GlobalMaxPooling1D,\n",
|
||
" MaxPooling1D,\n",
|
||
")\n",
|
||
"\n",
|
||
"\n",
|
||
"def build_model(hp):\n",
|
||
"\n",
|
||
" model = Sequential()\n",
|
||
" model.add(\n",
|
||
" Embedding(\n",
|
||
" num_words,\n",
|
||
" output_dim=hp.Int(\"output_dim\", min_value=128, max_value=1024, step=128),\n",
|
||
" input_length=max_len,\n",
|
||
" )\n",
|
||
" )\n",
|
||
" model.add(\n",
|
||
" Conv1D(\n",
|
||
" filters=hp.Int(\"filters0\", min_value=64, max_value=512, step=64),\n",
|
||
" kernel_size=hp.Int(\"kernel_size0\", min_value=1, max_value=3, step=1),\n",
|
||
" padding=\"same\",\n",
|
||
" activation=\"relu\",\n",
|
||
" strides=hp.Int(\"strides0\", min_value=1, max_value=4, step=1),\n",
|
||
" )\n",
|
||
" )\n",
|
||
" model.add(MaxPooling1D(pool_size=2, padding='same'))\n",
|
||
" if hp.Boolean(\"conv1\"):\n",
|
||
" model.add(\n",
|
||
" Conv1D(\n",
|
||
" filters=hp.Int(\"filters1\", min_value=32, max_value=256, step=32),\n",
|
||
" kernel_size=hp.Int(\"kernel_size1\", min_value=1, max_value=3, step=1),\n",
|
||
" padding=\"same\",\n",
|
||
" activation=\"relu\",\n",
|
||
" strides=hp.Int(\"strides1\", min_value=1, max_value=4, step=1),\n",
|
||
" )\n",
|
||
" )\n",
|
||
" model.add(MaxPooling1D(pool_size=2, padding='same'))\n",
|
||
" model.add(Flatten())\n",
|
||
" model.add(\n",
|
||
" Dense(\n",
|
||
" units=hp.Int(\"units0\", min_value=128, max_value=512, step=64),\n",
|
||
" activation=\"relu\",\n",
|
||
" )\n",
|
||
" )\n",
|
||
"# if hp.Boolean(\"dense1\"):\n",
|
||
" model.add(\n",
|
||
" Dense(\n",
|
||
" units=hp.Int(\"units1\", min_value=64, max_value=512, step=64),\n",
|
||
" activation=\"relu\",\n",
|
||
" )\n",
|
||
" )\n",
|
||
" if hp.Boolean(\"dense2\"):\n",
|
||
" model.add(\n",
|
||
" Dense(\n",
|
||
" units=hp.Int(\"units2\", min_value=64, max_value=256, step=32),\n",
|
||
" activation=\"relu\",\n",
|
||
" )\n",
|
||
" ) \n",
|
||
" model.add(Dense(10, activation=\"softmax\"))\n",
|
||
" model.compile(optimizer=\"rmsprop\", loss=\"binary_crossentropy\", metrics=[\"acc\"])\n",
|
||
" return model\n",
|
||
"########################################units????\n",
|
||
"\n",
|
||
"\n",
|
||
"# model.add(GlobalMaxPooling1D())\n",
|
||
"# model.compile(optimizer=\"rmsprop\", loss=\"binary_crossentropy\", metrics=[\"acc\"])\n",
|
||
"# model.summary()\n",
|
||
" build_model(kt.HyperParameters())"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 37,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"tuner = kt.Hyperband(\n",
|
||
" build_model,\n",
|
||
" \"val_loss\",\n",
|
||
" 30,\n",
|
||
" factor=3,\n",
|
||
" hyperband_iterations=3,\n",
|
||
")"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 38,
|
||
"metadata": {
|
||
"scrolled": true
|
||
},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"Trial 270 Complete [00h 00m 05s]\n",
|
||
"val_loss: 0.045697689056396484\n",
|
||
"\n",
|
||
"Best val_loss So Far: 0.03326363489031792\n",
|
||
"Total elapsed time: 00h 20m 11s\n",
|
||
"INFO:tensorflow:Oracle triggered exit\n"
|
||
]
|
||
}
|
||
],
|
||
"source": [
|
||
"tuner.search(train_x, train_y, validation_data=(test_x, test_y), callbacks=callbacks_list, verbose=1)\n"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 39,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"<keras.engine.sequential.Sequential at 0x2034189a530>"
|
||
]
|
||
},
|
||
"execution_count": 39,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"best_hps=tuner.get_best_models(num_models=1)\n",
|
||
"# model = tuner.hypermodel.build(best_hps)\n",
|
||
"best_hps[0]"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 42,
|
||
"metadata": {},
|
||
"outputs": [
|
||
{
|
||
"name": "stdout",
|
||
"output_type": "stream",
|
||
"text": [
|
||
"1/1 [==============================] - 0s 30ms/step - loss: 0.2305 - acc: 0.8315\n"
|
||
]
|
||
},
|
||
{
|
||
"data": {
|
||
"text/plain": [
|
||
"[0.23047222197055817, 0.8314606547355652]"
|
||
]
|
||
},
|
||
"execution_count": 42,
|
||
"metadata": {},
|
||
"output_type": "execute_result"
|
||
}
|
||
],
|
||
"source": [
|
||
"best_hps[0].evaluate(validation_x, validation_y, batch_size=128)"
|
||
]
|
||
},
|
||
{
|
||
"cell_type": "code",
|
||
"execution_count": 41,
|
||
"metadata": {},
|
||
"outputs": [],
|
||
"source": [
|
||
"# model.save(classification.h5)"
|
||
]
|
||
}
|
||
],
|
||
"metadata": {
|
||
"kernelspec": {
|
||
"display_name": "Python 3",
|
||
"language": "python",
|
||
"name": "python3"
|
||
},
|
||
"language_info": {
|
||
"codemirror_mode": {
|
||
"name": "ipython",
|
||
"version": 3
|
||
},
|
||
"file_extension": ".py",
|
||
"mimetype": "text/x-python",
|
||
"name": "python",
|
||
"nbconvert_exporter": "python",
|
||
"pygments_lexer": "ipython3",
|
||
"version": "3.7.8"
|
||
}
|
||
},
|
||
"nbformat": 4,
|
||
"nbformat_minor": 5
|
||
}
|