paranormal-or-skeptic/.ipynb_checkpoints/Untitled (2)-Copy4-checkpoint.ipynb

{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import glob\n",
    "import pandas as pd\n",
    "import re\n",
    "import tensorflow as tf\n",
    "from collections import Counter\n",
    "import numpy as np\n",
    "count = Counter()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "filenames1 = []\n",
    "for filename in glob.glob('Systemy_dialogowe/data/*.tsv'):\n",
    "    filenames1.append(filename)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "def prep(filenames):\n",
    "    df = pd.DataFrame(columns=['Rola', 'Wypowiedź', 'Act'])\n",
    "    for filename in filenames:\n",
    "        temp = pd.read_csv(filename, sep='\\t', names=[\"Rola\", \"Wypowiedź\", \"Act\"])\n",
    "        df = pd.concat([df, temp], ignore_index=True)\n",
    "    return df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Rola</th>\n",
       "      <th>Wypowiedź</th>\n",
       "      <th>Act</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>user</td>\n",
       "      <td>Witam</td>\n",
       "      <td>hello()</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>user</td>\n",
       "      <td>Co możesz dla mnie zrobić?</td>\n",
       "      <td>help()</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>user</td>\n",
       "      <td>Jakie są moje repozytoria?</td>\n",
       "      <td>request(repos)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>user</td>\n",
       "      <td>ok. co nowego w Zajęcia AI?</td>\n",
       "      <td>affirm() &amp; request(repo = Zajecia AI)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>user</td>\n",
       "      <td>Tylko tyle?</td>\n",
       "      <td>reqmore()</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1409</th>\n",
       "      <td>user</td>\n",
       "      <td>upewniam się</td>\n",
       "      <td>null</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1411</th>\n",
       "      <td>user</td>\n",
       "      <td>pokaż mi raport</td>\n",
       "      <td>request(repo)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1414</th>\n",
       "      <td>user</td>\n",
       "      <td>zmienić</td>\n",
       "      <td>null</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1416</th>\n",
       "      <td>user</td>\n",
       "      <td>Tak</td>\n",
       "      <td>ack</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1466</th>\n",
       "      <td>user</td>\n",
       "      <td>elo</td>\n",
       "      <td>hello()</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>585 rows × 3 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "      Rola                    Wypowiedź                                    Act\n",
       "1     user                        Witam                                hello()\n",
       "3     user   Co możesz dla mnie zrobić?                                 help()\n",
       "5     user   Jakie są moje repozytoria?                         request(repos)\n",
       "7     user  ok. co nowego w Zajęcia AI?  affirm() & request(repo = Zajecia AI)\n",
       "9     user                  Tylko tyle?                              reqmore()\n",
       "...    ...                          ...                                    ...\n",
       "1409  user               upewniam się                                     null\n",
       "1411  user           pokaż mi raport                             request(repo)\n",
       "1414  user                    zmienić                                     null\n",
       "1416  user                       Tak                                       ack\n",
       "1466  user                          elo                                hello()\n",
       "\n",
       "[585 rows x 3 columns]"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = prep(filenames1)\n",
    "df = df.loc[df['Rola'] == 'user']\n",
    "df = df.fillna('null')\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array(['hello()', 'help()', 'request(repos)',\n",
       "       'affirm() & request(repo = Zajecia AI)', 'reqmore()',\n",
       "       'request(notifications)', 'request(notification = 1)',\n",
       "       'request(link)', 'affirm() & reqmore()',\n",
       "       'request(repo = Projekt – Sklep)', 'request(issues)', 'ack()',\n",
       "       'request(commits = mattyl34)', 'request(commits = -5)',\n",
       "       'affirm() & request(repo = Gra - kółko i krzyżyk)', 'thankyou()',\n",
       "       'inform()', 'helpresponse()', 'request(repo = Zajecia AI)',\n",
       "       'request(delete)', 'ack() & inform()',\n",
       "       'request(repo = Projekt-sklep)', 'request(files = 1:3)',\n",
       "       'request()', 'bye()', 'request(file)',\n",
       "       'helpresponse() & request(repo)', 'request(command)',\n",
       "       'request(repo = Projekt - Sklep)', 'request(authors)',\n",
       "       'request(Bob)', 'request(repo = system)',\n",
       "       'request(repo = super_stronka_internetowa)',\n",
       "       'request(date, pr = 2)', 'request(repo = nazwaRepozytorium)',\n",
       "       'request(repo = zadania)', 'request(author, pr = 1)',\n",
       "       'request(deny, pr = 1)', 'request(rollback, commit = last)',\n",
       "       'request(repo = zajecia)', 'request(newPR)', 'inform(branches)',\n",
       "       'inform(title)', 'request(repo = pizza)', 'request(bilet)',\n",
       "       'request(repo)', 'inform(capriciosa)', 'inform(gGphJD)', 'affirm',\n",
       "       'help', 'hello', 'inform(qgphjd)', 'null', 'bye',\n",
       "       'inform(qwdqwdqaswdaqdfqfwqwfq)', 'inform(qGphJs)',\n",
       "       'inform(qgphid)', 'inform(pGphJD)', 'thankyou', 'inform(qGphJ0)',\n",
       "       'inform(qGphJ)', 'inform(DJhpGq)', 'inform(phgdj)',\n",
       "       'inform(QgPHjd)', 'ack'], dtype=object)"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df['Act'].unique()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### split by &\n",
    "### ignore args and ()\n",
    "### {'request', 'inform', 'bye', 'reqmore', 'help', 'ack', 'affirm', 'hello', 'thankyou', 'null'}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'help', 'reqmore', 'bye', 'thankyou', 'hello', 'affirm', 'ack', 'inform', 'request', 'null'}\n"
     ]
    }
   ],
   "source": [
    "new = pd.DataFrame(columns=['Wypowiedź', 'Act'])\n",
    "values = set()\n",
    "for index, row in df.iterrows():\n",
    "    act = row[2].split('&')\n",
    "    act = [re.sub('\\(.*\\)', '', x) for x in act]\n",
    "    act = [re.sub(' ', '', x) for x in act]\n",
    "    act = [re.sub('helpresponse', 'help', x) for x in act]\n",
    "    [values.add(x) for x in act]\n",
    "    temp = pd.DataFrame({'Wypowiedź':row[1], 'Act': act})\n",
    "    new = pd.concat([new, temp], ignore_index=True)\n",
    "new.head(10)\n",
    "print(values)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'request':                                 Wypowiedź      Act\n",
       " 2              Jakie są moje repozytoria?  request\n",
       " 4             ok. co nowego w Zajęcia AI?  request\n",
       " 6          Mam jakieś nowe powiadomienia?  request\n",
       " 8           Opowiedz mi o powiadomieniu 1  request\n",
       " 9    Podaj mi linka do tego powiadomienia  request\n",
       " ..                                    ...      ...\n",
       " 571                    Próbuję ponownie    request\n",
       " 573                             próbuję    request\n",
       " 577           pokaż mi raport projektu     request\n",
       " 584           pokaż mi raport projektu     request\n",
       " 587                    pokaż mi raport     request\n",
       " \n",
       " [130 rows x 2 columns],\n",
       " 'inform':                    Wypowiedź     Act\n",
       " 26               to wszystko  inform\n",
       " 33       Oki, to będzie tyle  inform\n",
       " 76   To nie, to już wszystko  inform\n",
       " 103     moja gałąź, \"master\"  inform\n",
       " 104                  moja PR  inform\n",
       " ..                       ...     ...\n",
       " 535                 gGphJD    inform\n",
       " 545                  qGphJ    inform\n",
       " 552                 DJhpGq    inform\n",
       " 578                  phgdj    inform\n",
       " 579                 QgPHjd    inform\n",
       " \n",
       " [68 rows x 2 columns],\n",
       " 'bye':                                     Wypowiedź  Act\n",
       " 50                                Do widzenia  bye\n",
       " 56                      Dziękuję, do widzenia  bye\n",
       " 77                                       Papa  bye\n",
       " 141                                    exit    bye\n",
       " 164                                    exit    bye\n",
       " 172  Nic z tych komend mnie nie interesuje     bye\n",
       " 195                                    exit    bye\n",
       " 203  Nic z tych komend mnie nie interesuje     bye\n",
       " 234                                    exit    bye\n",
       " 242  Nic z tych komend mnie nie interesuje     bye\n",
       " 284                                    exit    bye\n",
       " 292  Nic z tych komend mnie nie interesuje     bye\n",
       " 322                                Nie chcę    bye\n",
       " 345                                    exit    bye\n",
       " 353  Nic z tych komend mnie nie interesuje     bye\n",
       " 383                                Nie chcę    bye\n",
       " 393                   To wszystko, dziękuje    bye\n",
       " 416                                    exit    bye\n",
       " 424  Nic z tych komend mnie nie interesuje     bye\n",
       " 454                                Nie chcę    bye\n",
       " 464                   To wszystko, dziękuje    bye\n",
       " 471                             Do widzenia    bye\n",
       " 494                                    exit    bye\n",
       " 502  Nic z tych komend mnie nie interesuje     bye\n",
       " 532                                Nie chcę    bye\n",
       " 542                   To wszystko, dziękuje    bye\n",
       " 549                             Do widzenia    bye,\n",
       " 'reqmore':                                             Wypowiedź      Act\n",
       " 5                                         Tylko tyle?  reqmore\n",
       " 7                                              Jakie?  reqmore\n",
       " 11  okej. jakie jeszcze informacje możesz mi przek...  reqmore\n",
       " 12                                        O tym samym  reqmore\n",
       " 15                 Co jeszcze możesz dla mnie zrobić?  reqmore\n",
       " 18                                             Jakie?  reqmore\n",
       " 20                                   A pozostałe dwa?  reqmore\n",
       " 24       ok; A jakby były to powiedziałbyś mi o nich?  reqmore,\n",
       " 'help':                                              Wypowiedź   Act\n",
       " 1                           Co możesz dla mnie zrobić?  help\n",
       " 28                          Jakie są dostępne funkcje?  help\n",
       " 35   Chciałabym się dowiedzieć jakie usługi oferujecie  help\n",
       " 42                    Chciałbym poznać funkcję systemu  help\n",
       " 58                Jak mi możesz pomóc;  Chcę nowe repo  help\n",
       " ..                                                 ...   ...\n",
       " 567                              pokaż listę komend     help\n",
       " 568                               pokaż listę komend    help\n",
       " 581                                            pokaż    help\n",
       " 582                                            pokaż    help\n",
       " 583       wyjaśnij mi komendę pokaż mi listę komend     help\n",
       " \n",
       " [140 rows x 2 columns],\n",
       " 'ack':                Wypowiedź  Act\n",
       " 16                   Tak  ack\n",
       " 32   Oki, to będzie tyle  ack\n",
       " 39                   tak  ack\n",
       " 48              Rozumiem  ack\n",
       " 73                   tak  ack\n",
       " 80                   Tak  ack\n",
       " 85                   Tak  ack\n",
       " 91                   tak  ack\n",
       " 94         Tak, poproszę  ack\n",
       " 101                  tak  ack\n",
       " 107                  tak  ack\n",
       " 110                  tak  ack\n",
       " 113                  tak  ack\n",
       " 589               Tak     ack,\n",
       " 'affirm':                                              Wypowiedź     Act\n",
       " 3                          ok. co nowego w Zajęcia AI?  affirm\n",
       " 10   okej. jakie jeszcze informacje możesz mi przek...  affirm\n",
       " 21           ok. Są jakieś failujące testy w tym repo?  affirm\n",
       " 23        ok; A jakby były to powiedziałbyś mi o nich?  affirm\n",
       " 117                                       kontynuuj     affirm\n",
       " 122                                       kontynuuj     affirm\n",
       " 137                                 chcę kontynuować    affirm\n",
       " 145                                       kontynuuj     affirm\n",
       " 160                                 chcę kontynuować    affirm\n",
       " 169                                     kontynuować     affirm\n",
       " 176                                       kontynuuj     affirm\n",
       " 191                                 chcę kontynuować    affirm\n",
       " 200                                     kontynuować     affirm\n",
       " 211                                 Chcę kontynuować    affirm\n",
       " 215                                       kontynuuj     affirm\n",
       " 230                                 chcę kontynuować    affirm\n",
       " 239                                     kontynuować     affirm\n",
       " 250                                 Chcę kontynuować    affirm\n",
       " 259                to nie chce zmieniać konfiguracji    affirm\n",
       " 265                                       kontynuuj     affirm\n",
       " 280                                 chcę kontynuować    affirm\n",
       " 289                                     kontynuować     affirm\n",
       " 300                                 Chcę kontynuować    affirm\n",
       " 309                to nie chce zmieniać konfiguracji    affirm\n",
       " 316                                        kontynuuj    affirm\n",
       " 326                                       kontynuuj     affirm\n",
       " 341                                 chcę kontynuować    affirm\n",
       " 350                                     kontynuować     affirm\n",
       " 361                                 Chcę kontynuować    affirm\n",
       " 370                to nie chce zmieniać konfiguracji    affirm\n",
       " 377                                        kontynuuj    affirm\n",
       " 388                                        kontynuuj    affirm\n",
       " 397                                       kontynuuj     affirm\n",
       " 412                                 chcę kontynuować    affirm\n",
       " 421                                     kontynuować     affirm\n",
       " 432                                 Chcę kontynuować    affirm\n",
       " 441                to nie chce zmieniać konfiguracji    affirm\n",
       " 448                                        kontynuuj    affirm\n",
       " 459                                        kontynuuj    affirm\n",
       " 475                                       kontynuuj     affirm\n",
       " 490                                 chcę kontynuować    affirm\n",
       " 499                                     kontynuować     affirm\n",
       " 510                                 Chcę kontynuować    affirm\n",
       " 519                to nie chce zmieniać konfiguracji    affirm\n",
       " 526                                        kontynuuj    affirm\n",
       " 537                                        kontynuuj    affirm\n",
       " 566                       No dobra, to kontynuujemy     affirm\n",
       " 570                                      kontynuować    affirm\n",
       " 572                                      Kontynuować    affirm\n",
       " 585                                     kontynuujmy     affirm,\n",
       " 'hello':                    Wypowiedź    Act\n",
       " 0                      Witam  hello\n",
       " 27              Dzień dobry!  hello\n",
       " 34               Dzień dobry  hello\n",
       " 41                     Cześć  hello\n",
       " 51   Dzień dobry panie bocie  hello\n",
       " 57                       Elo  hello\n",
       " 78               Dzień dobry  hello\n",
       " 82                     Witam  hello\n",
       " 88               Dzień dobry  hello\n",
       " 92               Dzień dobry  hello\n",
       " 99               Dzień dobry  hello\n",
       " 105              Dzień dobry  hello\n",
       " 111              Dzień dobry  hello\n",
       " 124                  Widam    hello\n",
       " 147                  Widam    hello\n",
       " 165                  Witam    hello\n",
       " 178                  Widam    hello\n",
       " 196                  Witam    hello\n",
       " 204           Dzień dobry!    hello\n",
       " 217                  Widam    hello\n",
       " 235                  Witam    hello\n",
       " 243           Dzień dobry!    hello\n",
       " 252            Dzień dobry    hello\n",
       " 267                  Widam    hello\n",
       " 285                  Witam    hello\n",
       " 293           Dzień dobry!    hello\n",
       " 302            Dzień dobry    hello\n",
       " 312            Dzień dobry    hello\n",
       " 328                  Widam    hello\n",
       " 346                  Witam    hello\n",
       " 354           Dzień dobry!    hello\n",
       " 363            Dzień dobry    hello\n",
       " 373            Dzień dobry    hello\n",
       " 384                  Cześć    hello\n",
       " 399                  Widam    hello\n",
       " 417                  Witam    hello\n",
       " 425           Dzień dobry!    hello\n",
       " 434            Dzień dobry    hello\n",
       " 444            Dzień dobry    hello\n",
       " 455                  Cześć    hello\n",
       " 465                  Cześć    hello\n",
       " 477                  Widam    hello\n",
       " 495                  Witam    hello\n",
       " 503           Dzień dobry!    hello\n",
       " 512            Dzień dobry    hello\n",
       " 522            Dzień dobry    hello\n",
       " 533                  Cześć    hello\n",
       " 543                  Cześć    hello\n",
       " 550                    Elo    hello\n",
       " 576           Dzień dobry     hello\n",
       " 590                      elo  hello,\n",
       " 'thankyou':                   Wypowiedź       Act\n",
       " 25                 dziękuję  thankyou\n",
       " 40   dziękuję za informację  thankyou\n",
       " 49    To wszystko, dziękuje  thankyou\n",
       " 74                   Dzięki  thankyou\n",
       " 81                      Nie  thankyou\n",
       " 87    Rozumiem, to wszystko  thankyou\n",
       " 98                      Nie  thankyou\n",
       " 108                     Nie  thankyou\n",
       " 260              Dziękuje    thankyou\n",
       " 310              Dziękuje    thankyou\n",
       " 371              Dziękuje    thankyou\n",
       " 389  podoba mi się raport    thankyou\n",
       " 392                Dobrze    thankyou\n",
       " 442              Dziękuje    thankyou\n",
       " 460  podoba mi się raport    thankyou\n",
       " 463                Dobrze    thankyou\n",
       " 520              Dziękuje    thankyou\n",
       " 538  podoba mi się raport    thankyou\n",
       " 541                Dobrze    thankyou\n",
       " 575      Dzięki za pomoc     thankyou,\n",
       " 'null':                               Wypowiedź   Act\n",
       " 128              chcę zmienić projekt    null\n",
       " 130  A jak mogę zmienić konfigurację?    null\n",
       " 131                 CHCĘ INNY PROJEKT    null\n",
       " 132                        zgłoś błąd    null\n",
       " 135         Chcę zmienić konfigurację    null\n",
       " ..                                  ...   ...\n",
       " 562                     Chcę zmienić     null\n",
       " 574                           zmienić    null\n",
       " 580                           zmienic    null\n",
       " 586                      upewniam się    null\n",
       " 588                           zmienić    null\n",
       " \n",
       " [83 rows x 2 columns]}"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "unique = ['request', 'inform', 'bye', 'reqmore', 'help', 'ack', 'affirm', 'hello', 'thankyou', 'null']\n",
    "sorted_values = {}\n",
    "for item in unique:\n",
    "    temp = new.loc[new['Act'] == item]\n",
    "#     print(new.loc[new['Act'] == item])\n",
    "    sorted_values[item] = temp\n",
    "    temp.to_csv(f'data_sorted//{item}', sep='\\t', index=False)\n",
    "sorted_values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "def preprocess(line):\n",
    "    txt = line\n",
    "    txt = re.sub(\n",
    "        \"(£|§|!|@|#|\\$|%|\\^|&|\\*|\\(|\\)|_|-|\\+|=|\\{|\\[|\\}|\\]|:|;|\\\"|'|\\|\\\\|\\<|,|\\>|\\.|\\?|/|~|`|\\|–|–|)\",\n",
    "        \"\",\n",
    "        txt,\n",
    "    )\n",
    "    txt = txt.lower()\n",
    "    txt = re.sub(\"[0-9]\", \"\", txt)\n",
    "    txt = re.sub(\"[ \\t]+\", \" \", txt)\n",
    "    txt = re.sub(\" +$\", \"\", txt)\n",
    "    txt = re.sub(\"ą\", \"a\", txt)\n",
    "    txt = re.sub(\"ć\", \"c\", txt)\n",
    "    txt = re.sub(\"ę\", \"e\", txt)\n",
    "    txt = re.sub(\"ł\", \"l\", txt)\n",
    "    txt = re.sub(\"ń\", \"n\", txt)\n",
    "    txt = re.sub(\"ó\", \"o\", txt)\n",
    "    txt = re.sub(\"ś\", \"s\", txt)\n",
    "    txt = re.sub(\"ź\", \"z\", txt)\n",
    "    txt = re.sub(\"ż\", \"z\", txt)\n",
    "    words = txt.split()\n",
    "    words = [w[:6] if len(w) > 6 else w for w in words]\n",
    "    out = []\n",
    "    for word in words:\n",
    "#         if word not in stopwords:\n",
    "          out.append(word)\n",
    "    for stem in out:\n",
    "        count[stem] += 1\n",
    "    text = \" \".join(out)\n",
    "#     print(text)\n",
    "    return text\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "from tensorflow.keras.utils import to_categorical\n",
    "from numpy import argmax\n",
    "\n",
    "acts = ['inform', 'reqmore', 'thankyou', 'ack', 'affirm', 'hello', 'request', 'help', 'null', 'bye']\n",
    "to_num = {act: idx for idx, act in enumerate(acts)}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "53"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "prep = pd.DataFrame(columns=['Wypowiedź', 'Act'])\n",
    "values = set()\n",
    "max_len = 0\n",
    "for index, row in new.iterrows():\n",
    "    temp = preprocess(row[0])\n",
    "    one_hot_encode = to_categorical(to_num[row[1]], num_classes=len(acts))\n",
    "#     one_hot_encode = np.asarray(one_hot_encode).astype('float32')\n",
    "    max_len = max(max_len, len(temp))\n",
    "    frame = pd.DataFrame({'Wypowiedź': temp, 'Act': [np.asarray(one_hot_encode).astype('float32')]})\n",
    "    prep = pd.concat([prep, frame], ignore_index=True)\n",
    "# print(prep)\n",
    "max_len"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Wypowiedź</th>\n",
       "      <th>Act</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>czesc</td>\n",
       "      <td>[0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>chcial odrzuc pr</td>\n",
       "      <td>[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>capric</td>\n",
       "      <td>[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>oki to bedzie tyle</td>\n",
       "      <td>[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>qgphjd</td>\n",
       "      <td>[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>586</th>\n",
       "      <td>wyjasn mi komend pokaz mi liste komend</td>\n",
       "      <td>[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>587</th>\n",
       "      <td>wyjasn mi komend</td>\n",
       "      <td>[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>588</th>\n",
       "      <td>zmieni konfig</td>\n",
       "      <td>[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>589</th>\n",
       "      <td>ggphjd</td>\n",
       "      <td>[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>590</th>\n",
       "      <td>chce zmieni konfig</td>\n",
       "      <td>[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, ...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>591 rows × 2 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                  Wypowiedź  \\\n",
       "0                                     czesc   \n",
       "1                          chcial odrzuc pr   \n",
       "2                                    capric   \n",
       "3                        oki to bedzie tyle   \n",
       "4                                    qgphjd   \n",
       "..                                      ...   \n",
       "586  wyjasn mi komend pokaz mi liste komend   \n",
       "587                        wyjasn mi komend   \n",
       "588                           zmieni konfig   \n",
       "589                                  ggphjd   \n",
       "590                      chce zmieni konfig   \n",
       "\n",
       "                                                   Act  \n",
       "0    [0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, ...  \n",
       "1    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, ...  \n",
       "2    [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...  \n",
       "3    [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...  \n",
       "4    [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...  \n",
       "..                                                 ...  \n",
       "586  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ...  \n",
       "587  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ...  \n",
       "588  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, ...  \n",
       "589  [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...  \n",
       "590  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, ...  \n",
       "\n",
       "[591 rows x 2 columns]"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "prep = prep.sample(frac=1).reset_index(drop=True)\n",
    "prep"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "('mi', 219)\n",
      "('pokaz', 133)\n",
      "('komend', 124)\n",
      "('projek', 97)\n",
      "('raport', 84)\n",
      "('wyjasn', 68)\n",
      "('chce', 61)\n",
      "('zmieni', 57)\n",
      "('konfig', 45)\n",
      "('jakie', 43)\n",
      "('kontyn', 41)\n",
      "('liste', 41)\n",
      "('sa', 31)\n",
      "('w', 27)\n",
      "('funkcj', 26)\n",
      "('dzien', 25)\n",
      "('dobry', 25)\n",
      "('dostep', 25)\n",
      "('to', 24)\n",
      "('mozesz', 22)\n",
      "('repozy', 21)\n",
      "('nie', 21)\n",
      "('pomoc', 19)\n",
      "('tak', 17)\n",
      "('a', 16)\n",
      "('czym', 16)\n",
      "('moge', 15)\n",
      "('chcial', 14)\n",
      "('jeszcz', 12)\n",
      "('dzieku', 12)\n",
      "('ggphjd', 12)\n",
      "('o', 11)\n",
      "('jak', 10)\n",
      "('witam', 9)\n",
      "('co', 9)\n",
      "('mnie', 9)\n",
      "('repo', 9)\n",
      "('wszyst', 9)\n",
      "('z', 9)\n",
      "('capric', 9)\n",
      "('qgphjd', 9)\n",
      "('lista', 9)\n",
      "('sie', 8)\n",
      "('system', 8)\n",
      "('widam', 8)\n",
      "('inny', 8)\n",
      "('zglos', 8)\n",
      "('blad', 8)\n",
      "('exit', 8)\n",
      "('inform', 7)\n",
      "('adawda', 7)\n",
      "('qwdqwd', 7)\n",
      "('qgphjs', 7)\n",
      "('nic', 7)\n",
      "('tych', 7)\n",
      "('intere', 7)\n",
      "('ok', 6)\n",
      "('mam', 6)\n",
      "('nowe', 6)\n",
      "('podaj', 6)\n",
      "('do', 6)\n",
      "('powied', 6)\n",
      "('czesc', 6)\n",
      "('qgphid', 6)\n",
      "('dialog', 6)\n",
      "('qgphj', 6)\n",
      "('zajeci', 5)\n",
      "('issue', 5)\n",
      "('jest', 5)\n",
      "('pr', 5)\n",
      "('pgphjd', 5)\n",
      "('napraw', 5)\n",
      "('bylem', 5)\n",
      "('botem', 5)\n",
      "('i', 5)\n",
      "('zle', 5)\n",
      "('przepi', 5)\n",
      "('kod', 5)\n",
      "('moje', 4)\n",
      "('ai', 4)\n",
      "('powiad', 4)\n",
      "('ostatn', 4)\n",
      "('uslugi', 4)\n",
      "('oferuj', 4)\n",
      "('zobacz', 4)\n",
      "('widzen', 4)\n",
      "('no', 4)\n",
      "('zrobic', 3)\n",
      "('tyle', 3)\n",
      "('jakies', 3)\n",
      "('tym', 3)\n",
      "('sklep', 3)\n",
      "('pierws', 3)\n",
      "('elo', 3)\n",
      "('help', 3)\n",
      "('status', 3)\n",
      "('podoba', 3)\n",
      "('dobrze', 3)\n",
      "('dla', 2)\n",
      "('nowego', 2)\n",
      "('opowie', 2)\n",
      "('okej', 2)\n",
      "('przeka', 2)\n",
      "('commit', 2)\n",
      "('failuj', 2)\n",
      "('testy', 2)\n",
      "('jakby', 2)\n",
      "('byly', 2)\n",
      "('nich', 2)\n",
      "('powiaz', 2)\n",
      "('oki', 2)\n",
      "('bedzie', 2)\n",
      "('moich', 2)\n",
      "('za', 2)\n",
      "('na', 2)\n",
      "('temat', 2)\n",
      "('pliku', 2)\n",
      "('rozumi', 2)\n",
      "('zatem', 2)\n",
      "('dzieki', 2)\n",
      "('potraf', 2)\n",
      "('moim', 2)\n",
      "('moja', 2)\n",
      "('pizza', 2)\n",
      "('github', 2)\n",
      "('briefi', 2)\n",
      "('poka', 2)\n",
      "('probuj', 2)\n",
      "('tylko', 1)\n",
      "('linka', 1)\n"
     ]
    }
   ],
   "source": [
    "num_words = 130\n",
    "\n",
    "for item in count.most_common(num_words):\n",
    "    print(item)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "train_size = int(prep.shape[0] * 0.70)\n",
    "\n",
    "validation_size = int(prep.shape[0] * 0.85)\n",
    "\n",
    "train_sentences = prep.Wypowiedź[:train_size]\n",
    "train_labels = prep.Act[:train_size]\n",
    "\n",
    "test_sentences = prep.Wypowiedź[train_size:validation_size]\n",
    "test_labels = prep.Act[train_size:validation_size]\n",
    "\n",
    "validation_sentences = prep.Wypowiedź[validation_size:]\n",
    "validation_labels = prep.Act[validation_size:]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(                         Wypowiedź  \\\n",
       " 0                            czesc   \n",
       " 1                 chcial odrzuc pr   \n",
       " 2                           capric   \n",
       " 3               oki to bedzie tyle   \n",
       " 4                           qgphjd   \n",
       " ..                             ...   \n",
       " 408                    o tym samym   \n",
       " 409                         pgphjd   \n",
       " 410  pokaz mi raport projek adawda   \n",
       " 411  w czym jeszcz mozesz mi pomoc   \n",
       " 412          opowie mi o zajeci ai   \n",
       " \n",
       "                                                    Act  \n",
       " 0    [0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, ...  \n",
       " 1    [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, ...  \n",
       " 2    [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...  \n",
       " 3    [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...  \n",
       " 4    [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...  \n",
       " ..                                                 ...  \n",
       " 408  [0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...  \n",
       " 409  [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...  \n",
       " 410  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, ...  \n",
       " 411  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ...  \n",
       " 412  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, ...  \n",
       " \n",
       " [413 rows x 2 columns],\n",
       "                   Wypowiedź                                                Act\n",
       " 413   pokaz mi lista komend  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ...\n",
       " 414                  qgphjd  [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...\n",
       " 415   pokaz mi liste komend  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ...\n",
       " 416                  qgphjs  [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...\n",
       " 417  jakie sa dostep funkcj  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ...\n",
       " ..                      ...                                                ...\n",
       " 497                  kontyn  [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, ...\n",
       " 498             chce kontyn  [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, ...\n",
       " 499                   czesc  [0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, ...\n",
       " 500                  kontyn  [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, ...\n",
       " 501             chce kontyn  [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, ...\n",
       " \n",
       " [89 rows x 2 columns])"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train = pd.concat([train_sentences, train_labels], ignore_index=False, axis=1)\n",
    "test = pd.concat([test_sentences, test_labels], ignore_index=False, axis=1)\n",
    "validation = pd.concat([validation_sentences, validation_labels], ignore_index=False, axis=1)\n",
    "\n",
    "train, test\n",
    "# pd.Series(array) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "from keras.preprocessing.text import Tokenizer\n",
    "\n",
    "tokenizer = Tokenizer(num_words=num_words)\n",
    "tokenizer.fit_on_texts(train['Wypowiedź'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "word_index = tokenizer.word_index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "413     pokaz mi lista komend\n",
       "414                    qgphjd\n",
       "415     pokaz mi liste komend\n",
       "416                    qgphjs\n",
       "417    jakie sa dostep funkcj\n",
       "                ...          \n",
       "497                    kontyn\n",
       "498               chce kontyn\n",
       "499                     czesc\n",
       "500                    kontyn\n",
       "501               chce kontyn\n",
       "Name: Wypowiedź, Length: 89, dtype: object"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "test['Wypowiedź']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "# train['Wypowiedź'] = train['Wypowiedź'].apply(func = lambda row : tokenizer.texts_to_matrix(tokenizer.texts_to_sequences(row))) \n",
    "# test['Wypowiedź'] = test['Wypowiedź'].apply(func = lambda row : tokenizer.texts_to_matrix(tokenizer.texts_to_sequences(row))) \n",
    "\n",
    "\n",
    "# train['Wypowiedź'] = train['Wypowiedź'].apply(tokenizer.texts_to_matrix(train['Wypowiedź']))#.apply(func = lambda row : tokenizer.texts_to_matrix(tokenizer.texts_to_sequences(row))) \n",
    "# test['Wypowiedź'] = test['Wypowiedź'].apply(func = lambda row : tokenizer.texts_to_matrix(tokenizer.texts_to_sequences(row))) \n",
    "train['Wypowiedź'] = (tokenizer.texts_to_sequences(train['Wypowiedź']))\n",
    "test['Wypowiedź'] = (tokenizer.texts_to_sequences(test['Wypowiedź']))\n",
    "validation['Wypowiedź'] = (tokenizer.texts_to_sequences(validation['Wypowiedź']))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[69]"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# train[train['Wypowiedź'].str.len() == 1]\n",
    "train['Wypowiedź'][5]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(1, 4)"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(train['Wypowiedź'][5]), len(train['Wypowiedź'][200])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "# type(train['Wypowiedź'][0]),type(test['Wypowiedź'][588]),type(train['Act'][0]),type(test['Act'][588])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [],
   "source": [
    "from keras.preprocessing.sequence import pad_sequences\n",
    "\n",
    "temp_a = pad_sequences(\n",
    "    train['Wypowiedź'].tolist(), maxlen=max_len, padding=\"post\", truncating=\"post\"\n",
    ")\n",
    "temp_b = pad_sequences(\n",
    "    test['Wypowiedź'].tolist(), maxlen=max_len, padding=\"post\", truncating=\"post\"\n",
    ")\n",
    "temp_c = pad_sequences(\n",
    "    validation['Wypowiedź'].tolist(), maxlen=max_len, padding=\"post\", truncating=\"post\"\n",
    ")\n",
    "train['Wypowiedź'] = temp_a.tolist()\n",
    "test['Wypowiedź'] = temp_b.tolist()\n",
    "validation['Wypowiedź'] = temp_c.tolist()\n",
    "\n",
    "# train=train.reshape(1,train.shape[0])\n",
    "# mel=mel.reshape(1,mel.shape[0])\n",
    "\n",
    "# train['Wypowiedź'] = train['Wypowiedź'].apply(lambda row : pad_sequences(\n",
    "#     row, maxlen=max_len, padding=\"post\", truncating=\"post\"\n",
    "# ))\n",
    "# test['Wypowiedź'] = test['Wypowiedź'].apply(lambda row : pad_sequences(\n",
    "#     row, maxlen=max_len, padding=\"post\", truncating=\"post\"\n",
    "# )) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "413    [2, 1, 34, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...\n",
       "414    [40, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...\n",
       "415    [2, 1, 12, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...\n",
       "416    [67, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...\n",
       "417    [10, 13, 17, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...\n",
       "                             ...                        \n",
       "497    [9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...\n",
       "498    [7, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...\n",
       "499    [58, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...\n",
       "500    [9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...\n",
       "501    [7, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...\n",
       "Name: Wypowiedź, Length: 89, dtype: object"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "test['Wypowiedź']"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [],
   "source": [
    "import keras\n",
    "early_stopping = keras.callbacks.EarlyStopping(\n",
    "    monitor=\"val_loss\", patience=5, restore_best_weights=True, verbose=0\n",
    ")\n",
    "\n",
    "# checkpoint_callback = keras.callbacks.ModelCheckpoint(\n",
    "#         filepath='.', monitor='val_loss', verbose=0, save_weights_only=True,\n",
    "#         save_freq='epoch', mode='auto', save_best_only=True)\n",
    "\n",
    "reduce_lr_on_plateau = keras.callbacks.ReduceLROnPlateau(\n",
    "    monitor=\"acc\", factor=0.1, patience=2, verbose=0\n",
    ")\n",
    "\n",
    "callbacks_list = [early_stopping, reduce_lr_on_plateau]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Wypowiedź</th>\n",
       "      <th>Act</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>413</th>\n",
       "      <td>[2, 1, 34, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...</td>\n",
       "      <td>[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>414</th>\n",
       "      <td>[40, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...</td>\n",
       "      <td>[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>415</th>\n",
       "      <td>[2, 1, 12, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...</td>\n",
       "      <td>[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>416</th>\n",
       "      <td>[67, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...</td>\n",
       "      <td>[1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>417</th>\n",
       "      <td>[10, 13, 17, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...</td>\n",
       "      <td>[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>...</th>\n",
       "      <td>...</td>\n",
       "      <td>...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>497</th>\n",
       "      <td>[9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n",
       "      <td>[0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>498</th>\n",
       "      <td>[7, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n",
       "      <td>[0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>499</th>\n",
       "      <td>[58, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...</td>\n",
       "      <td>[0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>500</th>\n",
       "      <td>[9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n",
       "      <td>[0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>501</th>\n",
       "      <td>[7, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...</td>\n",
       "      <td>[0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, ...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>89 rows × 2 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "                                             Wypowiedź  \\\n",
       "413  [2, 1, 34, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...   \n",
       "414  [40, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...   \n",
       "415  [2, 1, 12, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...   \n",
       "416  [67, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...   \n",
       "417  [10, 13, 17, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...   \n",
       "..                                                 ...   \n",
       "497  [9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...   \n",
       "498  [7, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...   \n",
       "499  [58, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...   \n",
       "500  [9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...   \n",
       "501  [7, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...   \n",
       "\n",
       "                                                   Act  \n",
       "413  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ...  \n",
       "414  [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...  \n",
       "415  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ...  \n",
       "416  [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ...  \n",
       "417  [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, ...  \n",
       "..                                                 ...  \n",
       "497  [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, ...  \n",
       "498  [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, ...  \n",
       "499  [0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, ...  \n",
       "500  [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, ...  \n",
       "501  [0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, ...  \n",
       "\n",
       "[89 rows x 2 columns]"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "test"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [],
   "source": [
    "# train['Wypowiedź'] = train['Wypowiedź'].tolist()\n",
    "# test['Wypowiedź'] = test['Wypowiedź'].tolist()\n",
    "\n",
    "train['Wypowiedź'] = train['Wypowiedź'].apply(lambda row: np.asarray(row).astype('float32'))\n",
    "test['Wypowiedź'] = test['Wypowiedź'].apply(lambda row : np.asarray(row).astype('float32'))\n",
    "validation['Wypowiedź'] = validation['Wypowiedź'].apply(lambda row : np.asarray(row).astype('float32'))\n",
    "\n",
    "train['Act'] = train['Act'].apply(lambda row : np.asarray(row).astype('float32'))\n",
    "test['Act'] = test['Act'].apply(lambda row : np.asarray(row).astype('float32'))\n",
    "validation['Act'] = validation['Act'].apply(lambda row : np.asarray(row).astype('float32'))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [],
   "source": [
    "# train['Wypowiedź'] = train['Wypowiedź'].apply(lambda row: tf.convert_to_tensor(np.asarray(row).astype('float32')))\n",
    "# test['Wypowiedź'] = test['Wypowiedź'].apply(lambda row : tf.convert_to_tensor(np.asarray(row).astype('float32')))\n",
    "\n",
    "# train['Act'] = train['Act'].apply(lambda row : tf.convert_to_tensor(np.asarray(row).astype('float32')))\n",
    "# test['Act'] = test['Act'].apply(lambda row : tf.convert_to_tensor(np.asarray(row).astype('float32')))\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([60.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,\n",
       "        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,\n",
       "        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,\n",
       "        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,\n",
       "        0.], dtype=float32)"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    " train['Wypowiedź'][2]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(pandas.core.series.Series,\n",
       " pandas.core.series.Series,\n",
       " pandas.core.series.Series,\n",
       " pandas.core.series.Series)"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "type(train['Wypowiedź']),type(test['Wypowiedź']),type(train['Act']),type(test['Act'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [],
   "source": [
    "train_x = tf.convert_to_tensor(train['Wypowiedź'].tolist())\n",
    "train_y = tf.convert_to_tensor(train['Act'].tolist())\n",
    "test_x = tf.convert_to_tensor(test['Wypowiedź'].tolist())\n",
    "test_y = tf.convert_to_tensor(test['Act'].tolist())\n",
    "\n",
    "validation_x = tf.convert_to_tensor(validation['Wypowiedź'].tolist())\n",
    "validation_y = tf.convert_to_tensor(validation['Act'].tolist())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "89"
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(validation_y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<tf.Tensor: shape=(53,), dtype=float32, numpy=\n",
       "array([58.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,\n",
       "        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,\n",
       "        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,\n",
       "        0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,\n",
       "        0.], dtype=float32)>"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "train_x[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [],
   "source": [
    "# # import keras_tuner as kt\n",
    "# from tensorflow.keras.models import Sequential\n",
    "# from tensorflow.keras.layers import (\n",
    "#     Flatten,\n",
    "#     Dense,\n",
    "#     Embedding,\n",
    "#     Conv1D,\n",
    "#     GlobalMaxPooling1D,\n",
    "#     MaxPooling1D,\n",
    "# )\n",
    "\n",
    "\n",
    "# model = Sequential()\n",
    "# model.add(\n",
    "#     Embedding(\n",
    "#         num_words,\n",
    "#         output_dim=128,\n",
    "#         input_length=max_len,\n",
    "#     )\n",
    "# )\n",
    "# model.add(\n",
    "#     Conv1D(\n",
    "#         filters=64,\n",
    "#         kernel_size=3,\n",
    "#         padding=\"same\",\n",
    "#         activation=\"relu\",\n",
    "#         strides=1,\n",
    "#     )\n",
    "# )\n",
    "# model.add(MaxPooling1D(pool_size=2, padding='same'))\n",
    "# model.add(Flatten())\n",
    "# model.add(\n",
    "#     Dense(\n",
    "#         units=128,\n",
    "#         activation=\"relu\",\n",
    "#     )\n",
    "# )\n",
    "# model.add(\n",
    "#     Dense(\n",
    "#         units=128,\n",
    "#         activation=\"relu\",\n",
    "#     )\n",
    "# )   \n",
    "# model.add(Dense(10, activation=\"softmax\"))\n",
    "# model.compile(optimizer=\"rmsprop\", loss=\"categorical_crossentropy\", metrics=[\"acc\"])\n",
    "# ########################################units????\n",
    "# model.summary()\n",
    "# # build_model(kt.HyperParameters())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(<tf.Tensor: shape=(1,), dtype=int32, numpy=array([53])>,\n",
       " <tf.Tensor: shape=(1,), dtype=int32, numpy=array([10])>,\n",
       " <tf.Tensor: shape=(1,), dtype=int32, numpy=array([53])>,\n",
       " <tf.Tensor: shape=(1,), dtype=int32, numpy=array([10])>)"
      ]
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "  tf.shape(train_x[0]), tf.shape(train_y[0]), tf.shape(test_x[0]), tf.shape(test_y[0]),\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {
    "scrolled": false
   },
   "outputs": [],
   "source": [
    "# model.fit(train_x, train_y,  validation_data=(test_x, test_y), callbacks=callbacks_list, verbose=1, epochs=100)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [],
   "source": [
    "import keras_tuner as kt\n",
    "from tensorflow.keras.models import Sequential\n",
    "from tensorflow.keras.layers import (\n",
    "    Flatten,\n",
    "    Dense,\n",
    "    Embedding,\n",
    "    Conv1D,\n",
    "    GlobalMaxPooling1D,\n",
    "    MaxPooling1D,\n",
    ")\n",
    "\n",
    "\n",
    "def build_model(hp):\n",
    "\n",
    "    model = Sequential()\n",
    "    model.add(\n",
    "        Embedding(\n",
    "            num_words,\n",
    "            output_dim=hp.Int(\"output_dim\", min_value=128, max_value=1024, step=128),\n",
    "            input_length=max_len,\n",
    "        )\n",
    "    )\n",
    "    model.add(\n",
    "        Conv1D(\n",
    "            filters=hp.Int(\"filters0\", min_value=64, max_value=512, step=64),\n",
    "            kernel_size=hp.Int(\"kernel_size0\", min_value=1, max_value=3, step=1),\n",
    "            padding=\"same\",\n",
    "            activation=\"relu\",\n",
    "            strides=hp.Int(\"strides0\", min_value=1, max_value=4, step=1),\n",
    "        )\n",
    "    )\n",
    "    model.add(MaxPooling1D(pool_size=2, padding='same'))\n",
    "    if hp.Boolean(\"conv1\"):\n",
    "        model.add(\n",
    "            Conv1D(\n",
    "                filters=hp.Int(\"filters1\", min_value=32, max_value=256, step=32),\n",
    "                kernel_size=hp.Int(\"kernel_size1\", min_value=1, max_value=3, step=1),\n",
    "                padding=\"same\",\n",
    "                activation=\"relu\",\n",
    "                strides=hp.Int(\"strides1\", min_value=1, max_value=4, step=1),\n",
    "            )\n",
    "        )\n",
    "        model.add(MaxPooling1D(pool_size=2, padding='same'))\n",
    "    model.add(Flatten())\n",
    "    model.add(\n",
    "        Dense(\n",
    "            units=hp.Int(\"units0\", min_value=128, max_value=512, step=64),\n",
    "            activation=\"relu\",\n",
    "        )\n",
    "    )\n",
    "#     if hp.Boolean(\"dense1\"):\n",
    "    model.add(\n",
    "        Dense(\n",
    "            units=hp.Int(\"units1\", min_value=64, max_value=512, step=64),\n",
    "            activation=\"relu\",\n",
    "        )\n",
    "    )\n",
    "    if hp.Boolean(\"dense2\"):\n",
    "        model.add(\n",
    "            Dense(\n",
    "                units=hp.Int(\"units2\", min_value=64, max_value=256, step=32),\n",
    "                activation=\"relu\",\n",
    "            )\n",
    "        )    \n",
    "    model.add(Dense(10, activation=\"softmax\"))\n",
    "    model.compile(optimizer=\"rmsprop\", loss=\"binary_crossentropy\", metrics=[\"acc\"])\n",
    "    return model\n",
    "########################################units????\n",
    "\n",
    "\n",
    "# model.add(GlobalMaxPooling1D())\n",
    "# model.compile(optimizer=\"rmsprop\", loss=\"binary_crossentropy\", metrics=[\"acc\"])\n",
    "# model.summary()\n",
    "    build_model(kt.HyperParameters())"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [],
   "source": [
    "tuner = kt.Hyperband(\n",
    "    build_model,\n",
    "    \"val_loss\",\n",
    "    30,\n",
    "    factor=3,\n",
    "    hyperband_iterations=3,\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Trial 270 Complete [00h 00m 05s]\n",
      "val_loss: 0.045697689056396484\n",
      "\n",
      "Best val_loss So Far: 0.03326363489031792\n",
      "Total elapsed time: 00h 20m 11s\n",
      "INFO:tensorflow:Oracle triggered exit\n"
     ]
    }
   ],
   "source": [
    "tuner.search(train_x, train_y,  validation_data=(test_x, test_y), callbacks=callbacks_list, verbose=1)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "<keras.engine.sequential.Sequential at 0x2034189a530>"
      ]
     },
     "execution_count": 39,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "best_hps=tuner.get_best_models(num_models=1)\n",
    "# model = tuner.hypermodel.build(best_hps)\n",
    "best_hps[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "1/1 [==============================] - 0s 30ms/step - loss: 0.2305 - acc: 0.8315\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "[0.23047222197055817, 0.8314606547355652]"
      ]
     },
     "execution_count": 42,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "best_hps[0].evaluate(validation_x, validation_y, batch_size=128)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {},
   "outputs": [],
   "source": [
    "# model.save(classification.h5)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}