paranormal-or-skeptic/.ipynb_checkpoints/Untitled (2)-Copy4-checkpoint.ipynb

1727 lines
61 KiB
Plaintext
Raw Permalink Normal View History

2022-06-21 11:49:53 +02:00
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import glob\n",
"import pandas as pd\n",
"import re\n",
"import tensorflow as tf\n",
"from collections import Counter\n",
"import numpy as np\n",
"count = Counter()"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"filenames1 = []\n",
"for filename in glob.glob('Systemy_dialogowe/data/*.tsv'):\n",
" filenames1.append(filename)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"def prep(filenames):\n",
" df = pd.DataFrame(columns=['Rola', 'Wypowiedź', 'Act'])\n",
" for filename in filenames:\n",
" temp = pd.read_csv(filename, sep='\\t', names=[\"Rola\", \"Wypowiedź\", \"Act\"])\n",
" df = pd.concat([df, temp], ignore_index=True)\n",
" return df"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Rola</th>\n",
" <th>Wypowiedź</th>\n",
" <th>Act</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>user</td>\n",
" <td>Witam</td>\n",
" <td>hello()</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>user</td>\n",
" <td>Co możesz dla mnie zrobić?</td>\n",
" <td>help()</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>user</td>\n",
" <td>Jakie są moje repozytoria?</td>\n",
" <td>request(repos)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>user</td>\n",
" <td>ok. co nowego w Zajęcia AI?</td>\n",
" <td>affirm() &amp; request(repo = Zajecia AI)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>user</td>\n",
" <td>Tylko tyle?</td>\n",
" <td>reqmore()</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1409</th>\n",
" <td>user</td>\n",
" <td>upewniam się</td>\n",
" <td>null</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1411</th>\n",
" <td>user</td>\n",
" <td>pokaż mi raport</td>\n",
" <td>request(repo)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1414</th>\n",
" <td>user</td>\n",
" <td>zmienić</td>\n",
" <td>null</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1416</th>\n",
" <td>user</td>\n",
" <td>Tak</td>\n",
" <td>ack</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1466</th>\n",
" <td>user</td>\n",
" <td>elo</td>\n",
" <td>hello()</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>585 rows × 3 columns</p>\n",
"</div>"
],
"text/plain": [
" Rola Wypowiedź Act\n",
"1 user Witam hello()\n",
"3 user Co możesz dla mnie zrobić? help()\n",
"5 user Jakie są moje repozytoria? request(repos)\n",
"7 user ok. co nowego w Zajęcia AI? affirm() & request(repo = Zajecia AI)\n",
"9 user Tylko tyle? reqmore()\n",
"... ... ... ...\n",
"1409 user upewniam się null\n",
"1411 user pokaż mi raport request(repo)\n",
"1414 user zmienić null\n",
"1416 user Tak ack\n",
"1466 user elo hello()\n",
"\n",
"[585 rows x 3 columns]"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = prep(filenames1)\n",
"df = df.loc[df['Rola'] == 'user']\n",
"df = df.fillna('null')\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array(['hello()', 'help()', 'request(repos)',\n",
" 'affirm() & request(repo = Zajecia AI)', 'reqmore()',\n",
" 'request(notifications)', 'request(notification = 1)',\n",
" 'request(link)', 'affirm() & reqmore()',\n",
" 'request(repo = Projekt Sklep)', 'request(issues)', 'ack()',\n",
" 'request(commits = mattyl34)', 'request(commits = -5)',\n",
" 'affirm() & request(repo = Gra - kółko i krzyżyk)', 'thankyou()',\n",
" 'inform()', 'helpresponse()', 'request(repo = Zajecia AI)',\n",
" 'request(delete)', 'ack() & inform()',\n",
" 'request(repo = Projekt-sklep)', 'request(files = 1:3)',\n",
" 'request()', 'bye()', 'request(file)',\n",
" 'helpresponse() & request(repo)', 'request(command)',\n",
" 'request(repo = Projekt - Sklep)', 'request(authors)',\n",
" 'request(Bob)', 'request(repo = system)',\n",
" 'request(repo = super_stronka_internetowa)',\n",
" 'request(date, pr = 2)', 'request(repo = nazwaRepozytorium)',\n",
" 'request(repo = zadania)', 'request(author, pr = 1)',\n",
" 'request(deny, pr = 1)', 'request(rollback, commit = last)',\n",
" 'request(repo = zajecia)', 'request(newPR)', 'inform(branches)',\n",
" 'inform(title)', 'request(repo = pizza)', 'request(bilet)',\n",
" 'request(repo)', 'inform(capriciosa)', 'inform(gGphJD)', 'affirm',\n",
" 'help', 'hello', 'inform(qgphjd)', 'null', 'bye',\n",
" 'inform(qwdqwdqaswdaqdfqfwqwfq)', 'inform(qGphJs)',\n",
" 'inform(qgphid)', 'inform(pGphJD)', 'thankyou', 'inform(qGphJ0)',\n",
" 'inform(qGphJ)', 'inform(DJhpGq)', 'inform(phgdj)',\n",
" 'inform(QgPHjd)', 'ack'], dtype=object)"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['Act'].unique()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### split by &\n",
"### ignore args and ()\n",
"### {'request', 'inform', 'bye', 'reqmore', 'help', 'ack', 'affirm', 'hello', 'thankyou', 'null'}"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'help', 'reqmore', 'bye', 'thankyou', 'hello', 'affirm', 'ack', 'inform', 'request', 'null'}\n"
]
}
],
"source": [
"new = pd.DataFrame(columns=['Wypowiedź', 'Act'])\n",
"values = set()\n",
"for index, row in df.iterrows():\n",
" act = row[2].split('&')\n",
" act = [re.sub('\\(.*\\)', '', x) for x in act]\n",
" act = [re.sub(' ', '', x) for x in act]\n",
" act = [re.sub('helpresponse', 'help', x) for x in act]\n",
" [values.add(x) for x in act]\n",
" temp = pd.DataFrame({'Wypowiedź':row[1], 'Act': act})\n",
" new = pd.concat([new, temp], ignore_index=True)\n",
"new.head(10)\n",
"print(values)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'request': Wypowiedź Act\n",
" 2 Jakie są moje repozytoria? request\n",
" 4 ok. co nowego w Zajęcia AI? request\n",
" 6 Mam jakieś nowe powiadomienia? request\n",
" 8 Opowiedz mi o powiadomieniu 1 request\n",
" 9 Podaj mi linka do tego powiadomienia request\n",
" .. ... ...\n",
" 571 Próbuję ponownie request\n",
" 573 próbuję request\n",
" 577 pokaż mi raport projektu request\n",
" 584 pokaż mi raport projektu request\n",
" 587 pokaż mi raport request\n",
" \n",
" [130 rows x 2 columns],\n",
" 'inform': Wypowiedź Act\n",
" 26 to wszystko inform\n",
" 33 Oki, to będzie tyle inform\n",
" 76 To nie, to już wszystko inform\n",
" 103 moja gałąź, \"master\" inform\n",
" 104 moja PR inform\n",
" .. ... ...\n",
" 535 gGphJD inform\n",
" 545 qGphJ inform\n",
" 552 DJhpGq inform\n",
" 578 phgdj inform\n",
" 579 QgPHjd inform\n",
" \n",
" [68 rows x 2 columns],\n",
" 'bye': Wypowiedź Act\n",
" 50 Do widzenia bye\n",
" 56 Dziękuję, do widzenia bye\n",
" 77 Papa bye\n",
" 141 exit bye\n",
" 164 exit bye\n",
" 172 Nic z tych komend mnie nie interesuje bye\n",
" 195 exit bye\n",
" 203 Nic z tych komend mnie nie interesuje bye\n",
" 234 exit bye\n",
" 242 Nic z tych komend mnie nie interesuje bye\n",
" 284 exit bye\n",
" 292 Nic z tych komend mnie nie interesuje bye\n",
" 322 Nie chcę bye\n",
" 345 exit bye\n",
" 353 Nic z tych komend mnie nie interesuje bye\n",
" 383 Nie chcę bye\n",
" 393 To wszystko, dziękuje bye\n",
" 416 exit bye\n",
" 424 Nic z tych komend mnie nie interesuje bye\n",
" 454 Nie chcę bye\n",
" 464 To wszystko, dziękuje bye\n",
" 471 Do widzenia bye\n",
" 494 exit bye\n",
" 502 Nic z tych komend mnie nie interesuje bye\n",
" 532 Nie chcę bye\n",
" 542 To wszystko, dziękuje bye\n",
" 549 Do widzenia bye,\n",
" 'reqmore': Wypowiedź Act\n",
" 5 Tylko tyle? reqmore\n",
" 7 Jakie? reqmore\n",
" 11 okej. jakie jeszcze informacje możesz mi przek... reqmore\n",
" 12 O tym samym reqmore\n",
" 15 Co jeszcze możesz dla mnie zrobić? reqmore\n",
" 18 Jakie? reqmore\n",
" 20 A pozostałe dwa? reqmore\n",
" 24 ok; A jakby były to powiedziałbyś mi o nich? reqmore,\n",
" 'help': Wypowiedź Act\n",
" 1 Co możesz dla mnie zrobić? help\n",
" 28 Jakie są dostępne funkcje? help\n",
" 35 Chciałabym się dowiedzieć jakie usługi oferujecie help\n",
" 42 Chciałbym poznać funkcję systemu help\n",
" 58 Jak mi możesz pomóc; Chcę nowe repo help\n",
" .. ... ...\n",
" 567 pokaż listę komend help\n",
" 568 pokaż listę komend help\n",
" 581 pokaż help\n",
" 582 pokaż help\n",
" 583 wyjaśnij mi komendę pokaż mi listę komend help\n",
" \n",
" [140 rows x 2 columns],\n",
" 'ack': Wypowiedź Act\n",
" 16 Tak ack\n",
" 32 Oki, to będzie tyle ack\n",
" 39 tak ack\n",
" 48 Rozumiem ack\n",
" 73 tak ack\n",
" 80 Tak ack\n",
" 85 Tak ack\n",
" 91 tak ack\n",
" 94 Tak, poproszę ack\n",
" 101 tak ack\n",
" 107 tak ack\n",
" 110 tak ack\n",
" 113 tak ack\n",
" 589 Tak ack,\n",
" 'affirm': Wypowiedź Act\n",
" 3 ok. co nowego w Zajęcia AI? affirm\n",
" 10 okej. jakie jeszcze informacje możesz mi przek... affirm\n",
" 21 ok. Są jakieś failujące testy w tym repo? affirm\n",
" 23 ok; A jakby były to powiedziałbyś mi o nich? affirm\n",
" 117 kontynuuj affirm\n",
" 122 kontynuuj affirm\n",
" 137 chcę kontynuować affirm\n",
" 145 kontynuuj affirm\n",
" 160 chcę kontynuować affirm\n",
" 169 kontynuować affirm\n",
" 176 kontynuuj affirm\n",
" 191 chcę kontynuować affirm\n",
" 200 kontynuować affirm\n",
" 211 Chcę kontynuować affirm\n",
" 215 kontynuuj affirm\n",
" 230 chcę kontynuować affirm\n",
" 239 kontynuować affirm\n",
" 250 Chcę kontynuować affirm\n",
" 259 to nie chce zmieniać konfiguracji affirm\n",
" 265 kontynuuj affirm\n",
" 280 chcę kontynuować affirm\n",
" 289 kontynuować affirm\n",
" 300 Chcę kontynuować affirm\n",
" 309 to nie chce zmieniać konfiguracji affirm\n",
" 316 kontynuuj affirm\n",
" 326 kontynuuj affirm\n",
" 341 chcę kontynuować affirm\n",
" 350 kontynuować affirm\n",
" 361 Chcę kontynuować affirm\n",
" 370 to nie chce zmieniać konfiguracji affirm\n",
" 377 kontynuuj affirm\n",
" 388 kontynuuj affirm\n",
" 397 kontynuuj affirm\n",
" 412 chcę kontynuować affirm\n",
" 421 kontynuować affirm\n",
" 432 Chcę kontynuować affirm\n",
" 441 to nie chce zmieniać konfiguracji affirm\n",
" 448 kontynuuj affirm\n",
" 459 kontynuuj affirm\n",
" 475 kontynuuj affirm\n",
" 490 chcę kontynuować affirm\n",
" 499 kontynuować affirm\n",
" 510 Chcę kontynuować affirm\n",
" 519 to nie chce zmieniać konfiguracji affirm\n",
" 526 kontynuuj affirm\n",
" 537 kontynuuj affirm\n",
" 566 No dobra, to kontynuujemy affirm\n",
" 570 kontynuować affirm\n",
" 572 Kontynuować affirm\n",
" 585 kontynuujmy affirm,\n",
" 'hello': Wypowiedź Act\n",
" 0 Witam hello\n",
" 27 Dzień dobry! hello\n",
" 34 Dzień dobry hello\n",
" 41 Cześć hello\n",
" 51 Dzień dobry panie bocie hello\n",
" 57 Elo hello\n",
" 78 Dzień dobry hello\n",
" 82 Witam hello\n",
" 88 Dzień dobry hello\n",
" 92 Dzień dobry hello\n",
" 99 Dzień dobry hello\n",
" 105 Dzień dobry hello\n",
" 111 Dzień dobry hello\n",
" 124 Widam hello\n",
" 147 Widam hello\n",
" 165 Witam hello\n",
" 178 Widam hello\n",
" 196 Witam hello\n",
" 204 Dzień dobry! hello\n",
" 217 Widam hello\n",
" 235 Witam hello\n",
" 243 Dzień dobry! hello\n",
" 252 Dzień dobry hello\n",
" 267 Widam hello\n",
" 285 Witam hello\n",
" 293 Dzień dobry! hello\n",
" 302 Dzień dobry hello\n",
" 312 Dzień dobry hello\n",
" 328 Widam hello\n",
" 346 Witam hello\n",
" 354 Dzień dobry! hello\n",
" 363 Dzień dobry hello\n",
" 373 Dzień dobry hello\n",
" 384 Cześć hello\n",
" 399 Widam hello\n",
" 417 Witam hello\n",
" 425 Dzień dobry! hello\n",
" 434 Dzień dobry hello\n",
" 444 Dzień dobry hello\n",
" 455 Cześć hello\n",
" 465 Cześć hello\n",
" 477 Widam hello\n",
" 495 Witam hello\n",
" 503 Dzień dobry! hello\n",
" 512 Dzień dobry hello\n",
" 522 Dzień dobry hello\n",
" 533 Cześć hello\n",
" 543 Cześć hello\n",
" 550 Elo hello\n",
" 576 Dzień dobry hello\n",
" 590 elo hello,\n",
" 'thankyou': Wypowiedź Act\n",
" 25 dziękuję thankyou\n",
" 40 dziękuję za informację thankyou\n",
" 49 To wszystko, dziękuje thankyou\n",
" 74 Dzięki thankyou\n",
" 81 Nie thankyou\n",
" 87 Rozumiem, to wszystko thankyou\n",
" 98 Nie thankyou\n",
" 108 Nie thankyou\n",
" 260 Dziękuje thankyou\n",
" 310 Dziękuje thankyou\n",
" 371 Dziękuje thankyou\n",
" 389 podoba mi się raport thankyou\n",
" 392 Dobrze thankyou\n",
" 442 Dziękuje thankyou\n",
" 460 podoba mi się raport thankyou\n",
" 463 Dobrze thankyou\n",
" 520 Dziękuje thankyou\n",
" 538 podoba mi się raport thankyou\n",
" 541 Dobrze thankyou\n",
" 575 Dzięki za pomoc thankyou,\n",
" 'null': Wypowiedź Act\n",
" 128 chcę zmienić projekt null\n",
" 130 A jak mogę zmienić konfigurację? null\n",
" 131 CHCĘ INNY PROJEKT null\n",
" 132 zgłoś błąd null\n",
" 135 Chcę zmienić konfigurację null\n",
" .. ... ...\n",
" 562 Chcę zmienić null\n",
" 574 zmienić null\n",
" 580 zmienic null\n",
" 586 upewniam się null\n",
" 588 zmienić null\n",
" \n",
" [83 rows x 2 columns]}"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"unique = ['request', 'inform', 'bye', 'reqmore', 'help', 'ack', 'affirm', 'hello', 'thankyou', 'null']\n",
"sorted_values = {}\n",
"for item in unique:\n",
" temp = new.loc[new['Act'] == item]\n",
"# print(new.loc[new['Act'] == item])\n",
" sorted_values[item] = temp\n",
" temp.to_csv(f'data_sorted//{item}', sep='\\t', index=False)\n",
"sorted_values"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"def preprocess(line):\n",
" txt = line\n",
" txt = re.sub(\n",
" \"(£|§|!|@|#|\\$|%|\\^|&|\\*|\\(|\\)|_|-|\\+|=|\\{|\\[|\\}|\\]|:|;|\\\"|'|\\|\\\\|\\<|,|\\>|\\.|\\?|/|~|`|\\|||)\",\n",
" \"\",\n",
" txt,\n",
" )\n",
" txt = txt.lower()\n",
" txt = re.sub(\"[0-9]\", \"\", txt)\n",
" txt = re.sub(\"[ \\t]+\", \" \", txt)\n",
" txt = re.sub(\" +$\", \"\", txt)\n",
" txt = re.sub(\"ą\", \"a\", txt)\n",
" txt = re.sub(\"ć\", \"c\", txt)\n",
" txt = re.sub(\"ę\", \"e\", txt)\n",
" txt = re.sub(\"ł\", \"l\", txt)\n",
" txt = re.sub(\"ń\", \"n\", txt)\n",
" txt = re.sub(\"ó\", \"o\", txt)\n",
" txt = re.sub(\"ś\", \"s\", txt)\n",
" txt = re.sub(\"ź\", \"z\", txt)\n",
" txt = re.sub(\"ż\", \"z\", txt)\n",
" words = txt.split()\n",
" words = [w[:6] if len(w) > 6 else w for w in words]\n",
" out = []\n",
" for word in words:\n",
"# if word not in stopwords:\n",
" out.append(word)\n",
" for stem in out:\n",
" count[stem] += 1\n",
" text = \" \".join(out)\n",
"# print(text)\n",
" return text\n"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"from tensorflow.keras.utils import to_categorical\n",
"from numpy import argmax\n",
"\n",
"acts = ['inform', 'reqmore', 'thankyou', 'ack', 'affirm', 'hello', 'request', 'help', 'null', 'bye']\n",
"to_num = {act: idx for idx, act in enumerate(acts)}"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"53"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"prep = pd.DataFrame(columns=['Wypowiedź', 'Act'])\n",
"values = set()\n",
"max_len = 0\n",
"for index, row in new.iterrows():\n",
" temp = preprocess(row[0])\n",
" one_hot_encode = to_categorical(to_num[row[1]], num_classes=len(acts))\n",
"# one_hot_encode = np.asarray(one_hot_encode).astype('float32')\n",
" max_len = max(max_len, len(temp))\n",
" frame = pd.DataFrame({'Wypowiedź': temp, 'Act': [np.asarray(one_hot_encode).astype('float32')]})\n",
" prep = pd.concat([prep, frame], ignore_index=True)\n",
"# print(prep)\n",
"max_len"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Wypowiedź</th>\n",
" <th>Act</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>czesc</td>\n",
" <td>[0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>chcial odrzuc pr</td>\n",
" <td>[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>capric</td>\n",
" <td>[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>oki to bedzie tyle</td>\n",
" <td>[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>qgphjd</td>\n",
" <td>[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>586</th>\n",
" <td>wyjasn mi komend pokaz mi liste komend</td>\n",
" <td>[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>587</th>\n",
" <td>wyjasn mi komend</td>\n",
" <td>[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>588</th>\n",
" <td>zmieni konfig</td>\n",
" <td>[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>589</th>\n",
" <td>ggphjd</td>\n",
" <td>[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>590</th>\n",
" <td>chce zmieni konfig</td>\n",
" <td>[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, ...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>591 rows × 2 columns</p>\n",
"</div>"
],
"text/plain": [
" Wypowiedź \\\n",
"0 czesc \n",
"1 chcial odrzuc pr \n",
"2 capric \n",
"3 oki to bedzie tyle \n",
"4 qgphjd \n",
".. ... \n",
"586 wyjasn mi komend pokaz mi liste komend \n",
"587 wyjasn mi komend \n",
"588 zmieni konfig \n",
"589 ggphjd \n",
"590 chce zmieni konfig \n",
"\n",
" Act \n",
"0 [0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, ... \n",
"1 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, ... \n",
"2 [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... \n",
"3 [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... \n",
"4 [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... \n",
".. ... \n",
"586 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ... \n",
"587 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ... \n",
"588 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, ... \n",
"589 [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... \n",
"590 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, ... \n",
"\n",
"[591 rows x 2 columns]"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"prep = prep.sample(frac=1).reset_index(drop=True)\n",
"prep"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"('mi', 219)\n",
"('pokaz', 133)\n",
"('komend', 124)\n",
"('projek', 97)\n",
"('raport', 84)\n",
"('wyjasn', 68)\n",
"('chce', 61)\n",
"('zmieni', 57)\n",
"('konfig', 45)\n",
"('jakie', 43)\n",
"('kontyn', 41)\n",
"('liste', 41)\n",
"('sa', 31)\n",
"('w', 27)\n",
"('funkcj', 26)\n",
"('dzien', 25)\n",
"('dobry', 25)\n",
"('dostep', 25)\n",
"('to', 24)\n",
"('mozesz', 22)\n",
"('repozy', 21)\n",
"('nie', 21)\n",
"('pomoc', 19)\n",
"('tak', 17)\n",
"('a', 16)\n",
"('czym', 16)\n",
"('moge', 15)\n",
"('chcial', 14)\n",
"('jeszcz', 12)\n",
"('dzieku', 12)\n",
"('ggphjd', 12)\n",
"('o', 11)\n",
"('jak', 10)\n",
"('witam', 9)\n",
"('co', 9)\n",
"('mnie', 9)\n",
"('repo', 9)\n",
"('wszyst', 9)\n",
"('z', 9)\n",
"('capric', 9)\n",
"('qgphjd', 9)\n",
"('lista', 9)\n",
"('sie', 8)\n",
"('system', 8)\n",
"('widam', 8)\n",
"('inny', 8)\n",
"('zglos', 8)\n",
"('blad', 8)\n",
"('exit', 8)\n",
"('inform', 7)\n",
"('adawda', 7)\n",
"('qwdqwd', 7)\n",
"('qgphjs', 7)\n",
"('nic', 7)\n",
"('tych', 7)\n",
"('intere', 7)\n",
"('ok', 6)\n",
"('mam', 6)\n",
"('nowe', 6)\n",
"('podaj', 6)\n",
"('do', 6)\n",
"('powied', 6)\n",
"('czesc', 6)\n",
"('qgphid', 6)\n",
"('dialog', 6)\n",
"('qgphj', 6)\n",
"('zajeci', 5)\n",
"('issue', 5)\n",
"('jest', 5)\n",
"('pr', 5)\n",
"('pgphjd', 5)\n",
"('napraw', 5)\n",
"('bylem', 5)\n",
"('botem', 5)\n",
"('i', 5)\n",
"('zle', 5)\n",
"('przepi', 5)\n",
"('kod', 5)\n",
"('moje', 4)\n",
"('ai', 4)\n",
"('powiad', 4)\n",
"('ostatn', 4)\n",
"('uslugi', 4)\n",
"('oferuj', 4)\n",
"('zobacz', 4)\n",
"('widzen', 4)\n",
"('no', 4)\n",
"('zrobic', 3)\n",
"('tyle', 3)\n",
"('jakies', 3)\n",
"('tym', 3)\n",
"('sklep', 3)\n",
"('pierws', 3)\n",
"('elo', 3)\n",
"('help', 3)\n",
"('status', 3)\n",
"('podoba', 3)\n",
"('dobrze', 3)\n",
"('dla', 2)\n",
"('nowego', 2)\n",
"('opowie', 2)\n",
"('okej', 2)\n",
"('przeka', 2)\n",
"('commit', 2)\n",
"('failuj', 2)\n",
"('testy', 2)\n",
"('jakby', 2)\n",
"('byly', 2)\n",
"('nich', 2)\n",
"('powiaz', 2)\n",
"('oki', 2)\n",
"('bedzie', 2)\n",
"('moich', 2)\n",
"('za', 2)\n",
"('na', 2)\n",
"('temat', 2)\n",
"('pliku', 2)\n",
"('rozumi', 2)\n",
"('zatem', 2)\n",
"('dzieki', 2)\n",
"('potraf', 2)\n",
"('moim', 2)\n",
"('moja', 2)\n",
"('pizza', 2)\n",
"('github', 2)\n",
"('briefi', 2)\n",
"('poka', 2)\n",
"('probuj', 2)\n",
"('tylko', 1)\n",
"('linka', 1)\n"
]
}
],
"source": [
"num_words = 130\n",
"\n",
"for item in count.most_common(num_words):\n",
" print(item)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"train_size = int(prep.shape[0] * 0.70)\n",
"\n",
"validation_size = int(prep.shape[0] * 0.85)\n",
"\n",
"train_sentences = prep.Wypowiedź[:train_size]\n",
"train_labels = prep.Act[:train_size]\n",
"\n",
"test_sentences = prep.Wypowiedź[train_size:validation_size]\n",
"test_labels = prep.Act[train_size:validation_size]\n",
"\n",
"validation_sentences = prep.Wypowiedź[validation_size:]\n",
"validation_labels = prep.Act[validation_size:]"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"( Wypowiedź \\\n",
" 0 czesc \n",
" 1 chcial odrzuc pr \n",
" 2 capric \n",
" 3 oki to bedzie tyle \n",
" 4 qgphjd \n",
" .. ... \n",
" 408 o tym samym \n",
" 409 pgphjd \n",
" 410 pokaz mi raport projek adawda \n",
" 411 w czym jeszcz mozesz mi pomoc \n",
" 412 opowie mi o zajeci ai \n",
" \n",
" Act \n",
" 0 [0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, ... \n",
" 1 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, ... \n",
" 2 [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... \n",
" 3 [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... \n",
" 4 [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... \n",
" .. ... \n",
" 408 [0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... \n",
" 409 [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... \n",
" 410 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, ... \n",
" 411 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ... \n",
" 412 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, ... \n",
" \n",
" [413 rows x 2 columns],\n",
" Wypowiedź Act\n",
" 413 pokaz mi lista komend [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ...\n",
" 414 qgphjd [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...\n",
" 415 pokaz mi liste komend [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ...\n",
" 416 qgphjs [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...\n",
" 417 jakie sa dostep funkcj [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ...\n",
" .. ... ...\n",
" 497 kontyn [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, ...\n",
" 498 chce kontyn [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, ...\n",
" 499 czesc [0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, ...\n",
" 500 kontyn [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, ...\n",
" 501 chce kontyn [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, ...\n",
" \n",
" [89 rows x 2 columns])"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"train = pd.concat([train_sentences, train_labels], ignore_index=False, axis=1)\n",
"test = pd.concat([test_sentences, test_labels], ignore_index=False, axis=1)\n",
"validation = pd.concat([validation_sentences, validation_labels], ignore_index=False, axis=1)\n",
"\n",
"train, test\n",
"# pd.Series(array) "
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"from keras.preprocessing.text import Tokenizer\n",
"\n",
"tokenizer = Tokenizer(num_words=num_words)\n",
"tokenizer.fit_on_texts(train['Wypowiedź'])"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
"word_index = tokenizer.word_index"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"413 pokaz mi lista komend\n",
"414 qgphjd\n",
"415 pokaz mi liste komend\n",
"416 qgphjs\n",
"417 jakie sa dostep funkcj\n",
" ... \n",
"497 kontyn\n",
"498 chce kontyn\n",
"499 czesc\n",
"500 kontyn\n",
"501 chce kontyn\n",
"Name: Wypowiedź, Length: 89, dtype: object"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"test['Wypowiedź']"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
"# train['Wypowiedź'] = train['Wypowiedź'].apply(func = lambda row : tokenizer.texts_to_matrix(tokenizer.texts_to_sequences(row))) \n",
"# test['Wypowiedź'] = test['Wypowiedź'].apply(func = lambda row : tokenizer.texts_to_matrix(tokenizer.texts_to_sequences(row))) \n",
"\n",
"\n",
"# train['Wypowiedź'] = train['Wypowiedź'].apply(tokenizer.texts_to_matrix(train['Wypowiedź']))#.apply(func = lambda row : tokenizer.texts_to_matrix(tokenizer.texts_to_sequences(row))) \n",
"# test['Wypowiedź'] = test['Wypowiedź'].apply(func = lambda row : tokenizer.texts_to_matrix(tokenizer.texts_to_sequences(row))) \n",
"train['Wypowiedź'] = (tokenizer.texts_to_sequences(train['Wypowiedź']))\n",
"test['Wypowiedź'] = (tokenizer.texts_to_sequences(test['Wypowiedź']))\n",
"validation['Wypowiedź'] = (tokenizer.texts_to_sequences(validation['Wypowiedź']))"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[69]"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# train[train['Wypowiedź'].str.len() == 1]\n",
"train['Wypowiedź'][5]"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(1, 4)"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(train['Wypowiedź'][5]), len(train['Wypowiedź'][200])"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [],
"source": [
"# type(train['Wypowiedź'][0]),type(test['Wypowiedź'][588]),type(train['Act'][0]),type(test['Act'][588])"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [],
"source": [
"from keras.preprocessing.sequence import pad_sequences\n",
"\n",
"temp_a = pad_sequences(\n",
" train['Wypowiedź'].tolist(), maxlen=max_len, padding=\"post\", truncating=\"post\"\n",
")\n",
"temp_b = pad_sequences(\n",
" test['Wypowiedź'].tolist(), maxlen=max_len, padding=\"post\", truncating=\"post\"\n",
")\n",
"temp_c = pad_sequences(\n",
" validation['Wypowiedź'].tolist(), maxlen=max_len, padding=\"post\", truncating=\"post\"\n",
")\n",
"train['Wypowiedź'] = temp_a.tolist()\n",
"test['Wypowiedź'] = temp_b.tolist()\n",
"validation['Wypowiedź'] = temp_c.tolist()\n",
"\n",
"# train=train.reshape(1,train.shape[0])\n",
"# mel=mel.reshape(1,mel.shape[0])\n",
"\n",
"# train['Wypowiedź'] = train['Wypowiedź'].apply(lambda row : pad_sequences(\n",
"# row, maxlen=max_len, padding=\"post\", truncating=\"post\"\n",
"# ))\n",
"# test['Wypowiedź'] = test['Wypowiedź'].apply(lambda row : pad_sequences(\n",
"# row, maxlen=max_len, padding=\"post\", truncating=\"post\"\n",
"# )) "
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"413 [2, 1, 34, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...\n",
"414 [40, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...\n",
"415 [2, 1, 12, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...\n",
"416 [67, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...\n",
"417 [10, 13, 17, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...\n",
" ... \n",
"497 [9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...\n",
"498 [7, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...\n",
"499 [58, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...\n",
"500 [9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...\n",
"501 [7, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...\n",
"Name: Wypowiedź, Length: 89, dtype: object"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"test['Wypowiedź']"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [],
"source": [
"import keras\n",
"early_stopping = keras.callbacks.EarlyStopping(\n",
" monitor=\"val_loss\", patience=5, restore_best_weights=True, verbose=0\n",
")\n",
"\n",
"# checkpoint_callback = keras.callbacks.ModelCheckpoint(\n",
"# filepath='.', monitor='val_loss', verbose=0, save_weights_only=True,\n",
"# save_freq='epoch', mode='auto', save_best_only=True)\n",
"\n",
"reduce_lr_on_plateau = keras.callbacks.ReduceLROnPlateau(\n",
" monitor=\"acc\", factor=0.1, patience=2, verbose=0\n",
")\n",
"\n",
"callbacks_list = [early_stopping, reduce_lr_on_plateau]"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Wypowiedź</th>\n",
" <th>Act</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>413</th>\n",
" <td>[2, 1, 34, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...</td>\n",
" <td>[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>414</th>\n",
" <td>[40, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...</td>\n",
" <td>[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>415</th>\n",
" <td>[2, 1, 12, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...</td>\n",
" <td>[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>416</th>\n",
" <td>[67, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...</td>\n",
" <td>[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>417</th>\n",
" <td>[10, 13, 17, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...</td>\n",
" <td>[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>497</th>\n",
" <td>[9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n",
" <td>[0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>498</th>\n",
" <td>[7, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n",
" <td>[0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>499</th>\n",
" <td>[58, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...</td>\n",
" <td>[0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>500</th>\n",
" <td>[9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n",
" <td>[0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, ...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>501</th>\n",
" <td>[7, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n",
" <td>[0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, ...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>89 rows × 2 columns</p>\n",
"</div>"
],
"text/plain": [
" Wypowiedź \\\n",
"413 [2, 1, 34, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,... \n",
"414 [40, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,... \n",
"415 [2, 1, 12, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,... \n",
"416 [67, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,... \n",
"417 [10, 13, 17, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,... \n",
".. ... \n",
"497 [9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... \n",
"498 [7, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... \n",
"499 [58, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,... \n",
"500 [9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... \n",
"501 [7, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ... \n",
"\n",
" Act \n",
"413 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ... \n",
"414 [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... \n",
"415 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ... \n",
"416 [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ... \n",
"417 [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ... \n",
".. ... \n",
"497 [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, ... \n",
"498 [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, ... \n",
"499 [0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, ... \n",
"500 [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, ... \n",
"501 [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, ... \n",
"\n",
"[89 rows x 2 columns]"
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"test"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [],
"source": [
"# train['Wypowiedź'] = train['Wypowiedź'].tolist()\n",
"# test['Wypowiedź'] = test['Wypowiedź'].tolist()\n",
"\n",
"train['Wypowiedź'] = train['Wypowiedź'].apply(lambda row: np.asarray(row).astype('float32'))\n",
"test['Wypowiedź'] = test['Wypowiedź'].apply(lambda row : np.asarray(row).astype('float32'))\n",
"validation['Wypowiedź'] = validation['Wypowiedź'].apply(lambda row : np.asarray(row).astype('float32'))\n",
"\n",
"train['Act'] = train['Act'].apply(lambda row : np.asarray(row).astype('float32'))\n",
"test['Act'] = test['Act'].apply(lambda row : np.asarray(row).astype('float32'))\n",
"validation['Act'] = validation['Act'].apply(lambda row : np.asarray(row).astype('float32'))"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [],
"source": [
"# train['Wypowiedź'] = train['Wypowiedź'].apply(lambda row: tf.convert_to_tensor(np.asarray(row).astype('float32')))\n",
"# test['Wypowiedź'] = test['Wypowiedź'].apply(lambda row : tf.convert_to_tensor(np.asarray(row).astype('float32')))\n",
"\n",
"# train['Act'] = train['Act'].apply(lambda row : tf.convert_to_tensor(np.asarray(row).astype('float32')))\n",
"# test['Act'] = test['Act'].apply(lambda row : tf.convert_to_tensor(np.asarray(row).astype('float32')))\n"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([60., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
" 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
" 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
" 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
" 0.], dtype=float32)"
]
},
"execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
" train['Wypowiedź'][2]"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(pandas.core.series.Series,\n",
" pandas.core.series.Series,\n",
" pandas.core.series.Series,\n",
" pandas.core.series.Series)"
]
},
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"type(train['Wypowiedź']),type(test['Wypowiedź']),type(train['Act']),type(test['Act'])"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [],
"source": [
"train_x = tf.convert_to_tensor(train['Wypowiedź'].tolist())\n",
"train_y = tf.convert_to_tensor(train['Act'].tolist())\n",
"test_x = tf.convert_to_tensor(test['Wypowiedź'].tolist())\n",
"test_y = tf.convert_to_tensor(test['Act'].tolist())\n",
"\n",
"validation_x = tf.convert_to_tensor(validation['Wypowiedź'].tolist())\n",
"validation_y = tf.convert_to_tensor(validation['Act'].tolist())"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"89"
]
},
"execution_count": 31,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(validation_y)"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<tf.Tensor: shape=(53,), dtype=float32, numpy=\n",
"array([58., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
" 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
" 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
" 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,\n",
" 0.], dtype=float32)>"
]
},
"execution_count": 32,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"train_x[0]"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {},
"outputs": [],
"source": [
"# # import keras_tuner as kt\n",
"# from tensorflow.keras.models import Sequential\n",
"# from tensorflow.keras.layers import (\n",
"# Flatten,\n",
"# Dense,\n",
"# Embedding,\n",
"# Conv1D,\n",
"# GlobalMaxPooling1D,\n",
"# MaxPooling1D,\n",
"# )\n",
"\n",
"\n",
"# model = Sequential()\n",
"# model.add(\n",
"# Embedding(\n",
"# num_words,\n",
"# output_dim=128,\n",
"# input_length=max_len,\n",
"# )\n",
"# )\n",
"# model.add(\n",
"# Conv1D(\n",
"# filters=64,\n",
"# kernel_size=3,\n",
"# padding=\"same\",\n",
"# activation=\"relu\",\n",
"# strides=1,\n",
"# )\n",
"# )\n",
"# model.add(MaxPooling1D(pool_size=2, padding='same'))\n",
"# model.add(Flatten())\n",
"# model.add(\n",
"# Dense(\n",
"# units=128,\n",
"# activation=\"relu\",\n",
"# )\n",
"# )\n",
"# model.add(\n",
"# Dense(\n",
"# units=128,\n",
"# activation=\"relu\",\n",
"# )\n",
"# ) \n",
"# model.add(Dense(10, activation=\"softmax\"))\n",
"# model.compile(optimizer=\"rmsprop\", loss=\"categorical_crossentropy\", metrics=[\"acc\"])\n",
"# ########################################units????\n",
"# model.summary()\n",
"# # build_model(kt.HyperParameters())"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(<tf.Tensor: shape=(1,), dtype=int32, numpy=array([53])>,\n",
" <tf.Tensor: shape=(1,), dtype=int32, numpy=array([10])>,\n",
" <tf.Tensor: shape=(1,), dtype=int32, numpy=array([53])>,\n",
" <tf.Tensor: shape=(1,), dtype=int32, numpy=array([10])>)"
]
},
"execution_count": 34,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
" tf.shape(train_x[0]), tf.shape(train_y[0]), tf.shape(test_x[0]), tf.shape(test_y[0]),\n"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {
"scrolled": false
},
"outputs": [],
"source": [
"# model.fit(train_x, train_y, validation_data=(test_x, test_y), callbacks=callbacks_list, verbose=1, epochs=100)\n"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {},
"outputs": [],
"source": [
"import keras_tuner as kt\n",
"from tensorflow.keras.models import Sequential\n",
"from tensorflow.keras.layers import (\n",
" Flatten,\n",
" Dense,\n",
" Embedding,\n",
" Conv1D,\n",
" GlobalMaxPooling1D,\n",
" MaxPooling1D,\n",
")\n",
"\n",
"\n",
"def build_model(hp):\n",
"\n",
" model = Sequential()\n",
" model.add(\n",
" Embedding(\n",
" num_words,\n",
" output_dim=hp.Int(\"output_dim\", min_value=128, max_value=1024, step=128),\n",
" input_length=max_len,\n",
" )\n",
" )\n",
" model.add(\n",
" Conv1D(\n",
" filters=hp.Int(\"filters0\", min_value=64, max_value=512, step=64),\n",
" kernel_size=hp.Int(\"kernel_size0\", min_value=1, max_value=3, step=1),\n",
" padding=\"same\",\n",
" activation=\"relu\",\n",
" strides=hp.Int(\"strides0\", min_value=1, max_value=4, step=1),\n",
" )\n",
" )\n",
" model.add(MaxPooling1D(pool_size=2, padding='same'))\n",
" if hp.Boolean(\"conv1\"):\n",
" model.add(\n",
" Conv1D(\n",
" filters=hp.Int(\"filters1\", min_value=32, max_value=256, step=32),\n",
" kernel_size=hp.Int(\"kernel_size1\", min_value=1, max_value=3, step=1),\n",
" padding=\"same\",\n",
" activation=\"relu\",\n",
" strides=hp.Int(\"strides1\", min_value=1, max_value=4, step=1),\n",
" )\n",
" )\n",
" model.add(MaxPooling1D(pool_size=2, padding='same'))\n",
" model.add(Flatten())\n",
" model.add(\n",
" Dense(\n",
" units=hp.Int(\"units0\", min_value=128, max_value=512, step=64),\n",
" activation=\"relu\",\n",
" )\n",
" )\n",
"# if hp.Boolean(\"dense1\"):\n",
" model.add(\n",
" Dense(\n",
" units=hp.Int(\"units1\", min_value=64, max_value=512, step=64),\n",
" activation=\"relu\",\n",
" )\n",
" )\n",
" if hp.Boolean(\"dense2\"):\n",
" model.add(\n",
" Dense(\n",
" units=hp.Int(\"units2\", min_value=64, max_value=256, step=32),\n",
" activation=\"relu\",\n",
" )\n",
" ) \n",
" model.add(Dense(10, activation=\"softmax\"))\n",
" model.compile(optimizer=\"rmsprop\", loss=\"binary_crossentropy\", metrics=[\"acc\"])\n",
" return model\n",
"########################################units????\n",
"\n",
"\n",
"# model.add(GlobalMaxPooling1D())\n",
"# model.compile(optimizer=\"rmsprop\", loss=\"binary_crossentropy\", metrics=[\"acc\"])\n",
"# model.summary()\n",
" build_model(kt.HyperParameters())"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {},
"outputs": [],
"source": [
"tuner = kt.Hyperband(\n",
" build_model,\n",
" \"val_loss\",\n",
" 30,\n",
" factor=3,\n",
" hyperband_iterations=3,\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Trial 270 Complete [00h 00m 05s]\n",
"val_loss: 0.045697689056396484\n",
"\n",
"Best val_loss So Far: 0.03326363489031792\n",
"Total elapsed time: 00h 20m 11s\n",
"INFO:tensorflow:Oracle triggered exit\n"
]
}
],
"source": [
"tuner.search(train_x, train_y, validation_data=(test_x, test_y), callbacks=callbacks_list, verbose=1)\n"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<keras.engine.sequential.Sequential at 0x2034189a530>"
]
},
"execution_count": 39,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"best_hps=tuner.get_best_models(num_models=1)\n",
"# model = tuner.hypermodel.build(best_hps)\n",
"best_hps[0]"
]
},
{
"cell_type": "code",
"execution_count": 42,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1/1 [==============================] - 0s 30ms/step - loss: 0.2305 - acc: 0.8315\n"
]
},
{
"data": {
"text/plain": [
"[0.23047222197055817, 0.8314606547355652]"
]
},
"execution_count": 42,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"best_hps[0].evaluate(validation_x, validation_y, batch_size=128)"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {},
"outputs": [],
"source": [
"# model.save(classification.h5)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.8"
}
},
"nbformat": 4,
"nbformat_minor": 5
}