commit 37352ddc0d03a8364371813d3a79b7d7757fd64b Author: s464968 Date: Fri Feb 3 15:39:51 2023 +0100 finished project diff --git a/Jadalnosc_grzybow.pdf b/Jadalnosc_grzybow.pdf new file mode 100644 index 0000000..596fd41 Binary files /dev/null and b/Jadalnosc_grzybow.pdf differ diff --git a/README.md b/README.md new file mode 100644 index 0000000..e69de29 diff --git a/Wzór raportu.docx b/Wzór raportu.docx new file mode 100644 index 0000000..a580b94 Binary files /dev/null and b/Wzór raportu.docx differ diff --git a/Wzór raportu.pdf b/Wzór raportu.pdf new file mode 100644 index 0000000..1fe0f8e Binary files /dev/null and b/Wzór raportu.pdf differ diff --git a/dane_z_moodle.txt b/dane_z_moodle.txt new file mode 100644 index 0000000..781e9c7 --- /dev/null +++ b/dane_z_moodle.txt @@ -0,0 +1,21 @@ +Projekt realizowany jest indywidualnie. + +Projekt polega na zastosowaniu poznanych metod uczenia maszynowego do znalezienia rozwiązania wybranego problemu. + +Oprócz zbudowania odpowiedniego modelu, oceniane będzie odpowiednie przygotowanie danych uczących oraz ewaluacja stworzonego modelu. + +Dodatkowo, żeby otrzymać maksymalną liczbę punktów, przynajmniej jeden z modeli powinien być siecią neuronową. + +Pliki potrzebne do uruchomienia projektu (kod i dane) należy dołączyć na Moodle'u lub umieścić w repozytorium (wówczas wystarczy podać jego adres). + +Do oceny projektu proszę przygotować również krótki raport, zawierający: + + cel projektu / definicję problemu (jakie zagadnienie Państwo rozwiązywali) + opis użytych danych (skąd pochodzą dane, ile przykładów zawierają zbiory uczący i testowy) + opis wykorzystanych metod (jakich modeli Państwo użyli) + tabelkę z wynikami ewaluacji + ewentualne wnioski + +Wzór raportu znajduje się w pliku Wzór raportu.docx + +Maksymalna liczba punktów: 34 \ No newline at end of file diff --git a/mushrooms b/mushrooms new file mode 160000 index 0000000..88ec0ad --- /dev/null +++ b/mushrooms @@ -0,0 +1 @@ +Subproject commit 88ec0ad5fffcac6a6909099199c3ef659977e721 diff --git a/mushrooms.ipynb b/mushrooms.ipynb new file mode 100644 index 0000000..27d5d54 --- /dev/null +++ b/mushrooms.ipynb @@ -0,0 +1,1585 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 127, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 128, + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.read_csv('mushrooms/train/train.tsv',sep='\\t', header=None)\n", + "X_ver = pd.read_csv('mushrooms/dev-0/in.tsv',sep='\\t', header=None)\n", + "y_ver = pd.read_csv('mushrooms/dev-0/expected.tsv',sep='\\t', header=None)" + ] + }, + { + "cell_type": "code", + "execution_count": 129, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " 0 1 2 3 4 5 6 7 8 9 ... 13 14 15 16 17 18 19 20 21 22\n", + "0 p x s n t p f c n k ... s w w p w o p k s u\n", + "1 e x s y t a f c b k ... s w w p w o p n n g\n", + "2 p x y w t p f c n n ... s w w p w o p k s u\n", + "3 e x s g f n f w b k ... s w w p w o e n a g\n", + "4 e x y y t a f c b n ... s w w p w o p k n g\n", + "\n", + "[5 rows x 23 columns]\n", + "0 0\n", + "1 0\n", + "2 0\n", + "3 0\n", + "4 0\n", + "5 0\n", + "6 0\n", + "7 0\n", + "8 0\n", + "9 0\n", + "10 0\n", + "11 0\n", + "12 0\n", + "13 0\n", + "14 0\n", + "15 0\n", + "16 0\n", + "17 0\n", + "18 0\n", + "19 0\n", + "20 0\n", + "21 0\n", + "22 0\n", + "dtype: int64\n", + "(6465, 23)\n" + ] + } + ], + "source": [ + "print(df.head())\n", + "print(df.isna().sum())\n", + "print(df.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": 130, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 2\n", + "1 6\n", + "2 4\n", + "3 10\n", + "4 2\n", + "5 9\n", + "6 2\n", + "7 2\n", + "8 2\n", + "9 12\n", + "10 2\n", + "11 5\n", + "12 4\n", + "13 4\n", + "14 9\n", + "15 9\n", + "16 1\n", + "17 4\n", + "18 3\n", + "19 5\n", + "20 9\n", + "21 6\n", + "22 7\n", + "dtype: int64" + ] + }, + "execution_count": 130, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.nunique()" + ] + }, + { + "cell_type": "code", + "execution_count": 131, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 6\n", + "1 3\n", + "2 10\n", + "3 2\n", + "4 9\n", + "5 2\n", + "6 2\n", + "7 2\n", + "8 12\n", + "9 2\n", + "10 5\n", + "11 4\n", + "12 4\n", + "13 9\n", + "14 9\n", + "15 1\n", + "16 4\n", + "17 3\n", + "18 5\n", + "19 9\n", + "20 6\n", + "21 7\n", + "dtype: int64" + ] + }, + "execution_count": 131, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "X_ver.nunique()" + ] + }, + { + "cell_type": "code", + "execution_count": 132, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
0_b0_c0_f0_k0_s0_x1_s1_y1_f1_g...20_s20_v20_y21_d21_g21_l21_m21_p21_u21_w
01000001000...0000001000
10000010100...0100100000
21000000100...0000001000
31000001000...1000001000
40000010100...0000001000
..................................................................
7870000011000...0101000000
7880010001000...0100010000
7890001001000...0100010000
7900001000100...0100010000
7910000011000...0100010000
\n", + "

792 rows × 117 columns

\n", + "
" + ], + "text/plain": [ + " 0_b 0_c 0_f 0_k 0_s 0_x 1_s 1_y 1_f 1_g ... 20_s 20_v 20_y \\\n", + "0 1 0 0 0 0 0 1 0 0 0 ... 0 0 0 \n", + "1 0 0 0 0 0 1 0 1 0 0 ... 0 1 0 \n", + "2 1 0 0 0 0 0 0 1 0 0 ... 0 0 0 \n", + "3 1 0 0 0 0 0 1 0 0 0 ... 1 0 0 \n", + "4 0 0 0 0 0 1 0 1 0 0 ... 0 0 0 \n", + ".. ... ... ... ... ... ... ... ... ... ... ... ... ... ... \n", + "787 0 0 0 0 0 1 1 0 0 0 ... 0 1 0 \n", + "788 0 0 1 0 0 0 1 0 0 0 ... 0 1 0 \n", + "789 0 0 0 1 0 0 1 0 0 0 ... 0 1 0 \n", + "790 0 0 0 1 0 0 0 1 0 0 ... 0 1 0 \n", + "791 0 0 0 0 0 1 1 0 0 0 ... 0 1 0 \n", + "\n", + " 21_d 21_g 21_l 21_m 21_p 21_u 21_w \n", + "0 0 0 0 1 0 0 0 \n", + "1 0 1 0 0 0 0 0 \n", + "2 0 0 0 1 0 0 0 \n", + "3 0 0 0 1 0 0 0 \n", + "4 0 0 0 1 0 0 0 \n", + ".. ... ... ... ... ... ... ... \n", + "787 1 0 0 0 0 0 0 \n", + "788 0 0 1 0 0 0 0 \n", + "789 0 0 1 0 0 0 0 \n", + "790 0 0 1 0 0 0 0 \n", + "791 0 0 1 0 0 0 0 \n", + "\n", + "[792 rows x 117 columns]" + ] + }, + "execution_count": 132, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "y_ver = pd.get_dummies(y_ver,columns=[0],drop_first=True)\n", + "df2_unique = df[2].unique()\n", + "X_ver[1] = pd.Categorical(X_ver[1],categories=df2_unique)\n", + "X_ver = pd.get_dummies(X_ver)\n", + "X_ver" + ] + }, + { + "cell_type": "code", + "execution_count": 133, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
0_p1_b1_c1_f1_k1_s1_x2_f2_g2_s...21_s21_v21_y22_d22_g22_l22_m22_p22_u22_w
01000001001...1000000010
10000001001...0000100000
21000001000...1000000010
30000001001...0000100000
40000001000...0000100000
..................................................................
64601000100000...0101000000
64610000100001...0000010000
64620001000001...0000010000
64631000100000...0100010000
64640000001001...0000010000
\n", + "

6465 rows × 118 columns

\n", + "
" + ], + "text/plain": [ + " 0_p 1_b 1_c 1_f 1_k 1_s 1_x 2_f 2_g 2_s ... 21_s 21_v 21_y \\\n", + "0 1 0 0 0 0 0 1 0 0 1 ... 1 0 0 \n", + "1 0 0 0 0 0 0 1 0 0 1 ... 0 0 0 \n", + "2 1 0 0 0 0 0 1 0 0 0 ... 1 0 0 \n", + "3 0 0 0 0 0 0 1 0 0 1 ... 0 0 0 \n", + "4 0 0 0 0 0 0 1 0 0 0 ... 0 0 0 \n", + "... ... ... ... ... ... ... ... ... ... ... ... ... ... ... \n", + "6460 1 0 0 0 1 0 0 0 0 0 ... 0 1 0 \n", + "6461 0 0 0 0 1 0 0 0 0 1 ... 0 0 0 \n", + "6462 0 0 0 1 0 0 0 0 0 1 ... 0 0 0 \n", + "6463 1 0 0 0 1 0 0 0 0 0 ... 0 1 0 \n", + "6464 0 0 0 0 0 0 1 0 0 1 ... 0 0 0 \n", + "\n", + " 22_d 22_g 22_l 22_m 22_p 22_u 22_w \n", + "0 0 0 0 0 0 1 0 \n", + "1 0 1 0 0 0 0 0 \n", + "2 0 0 0 0 0 1 0 \n", + "3 0 1 0 0 0 0 0 \n", + "4 0 1 0 0 0 0 0 \n", + "... ... ... ... ... ... ... ... \n", + "6460 1 0 0 0 0 0 0 \n", + "6461 0 0 1 0 0 0 0 \n", + "6462 0 0 1 0 0 0 0 \n", + "6463 0 0 1 0 0 0 0 \n", + "6464 0 0 1 0 0 0 0 \n", + "\n", + "[6465 rows x 118 columns]" + ] + }, + "execution_count": 133, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.get_dummies(df,columns=[0],drop_first=True)\n", + "df = pd.get_dummies(df)\n", + "df" + ] + }, + { + "cell_type": "code", + "execution_count": 134, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "(4848, 117)\n", + "(1617, 117)\n" + ] + } + ], + "source": [ + "from sklearn.model_selection import train_test_split\n", + "\n", + "X = df.loc[:, df.columns != '0_p']\n", + "y = df['0_p']\n", + "X_train,X_test,y_train,y_test = train_test_split(X,y)\n", + "print(X_train.shape)\n", + "print(X_test.shape)" + ] + }, + { + "cell_type": "code", + "execution_count": 135, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
1_b1_c1_f1_k1_s1_x2_f2_g2_s2_y...21_s21_v21_y22_d22_g22_l22_m22_p22_u22_w
01000001000...0000001000
10000010100...0100100000
21000000100...0000001000
31000001000...1000001000
40000010100...0000001000
..................................................................
7870000011000...0101000000
7880010001000...0100010000
7890001001000...0100010000
7900001000100...0100010000
7910000011000...0100010000
\n", + "

792 rows × 117 columns

\n", + "
" + ], + "text/plain": [ + " 1_b 1_c 1_f 1_k 1_s 1_x 2_f 2_g 2_s 2_y ... 21_s 21_v 21_y \\\n", + "0 1 0 0 0 0 0 1 0 0 0 ... 0 0 0 \n", + "1 0 0 0 0 0 1 0 1 0 0 ... 0 1 0 \n", + "2 1 0 0 0 0 0 0 1 0 0 ... 0 0 0 \n", + "3 1 0 0 0 0 0 1 0 0 0 ... 1 0 0 \n", + "4 0 0 0 0 0 1 0 1 0 0 ... 0 0 0 \n", + ".. ... ... ... ... ... ... ... ... ... ... ... ... ... ... \n", + "787 0 0 0 0 0 1 1 0 0 0 ... 0 1 0 \n", + "788 0 0 1 0 0 0 1 0 0 0 ... 0 1 0 \n", + "789 0 0 0 1 0 0 1 0 0 0 ... 0 1 0 \n", + "790 0 0 0 1 0 0 0 1 0 0 ... 0 1 0 \n", + "791 0 0 0 0 0 1 1 0 0 0 ... 0 1 0 \n", + "\n", + " 22_d 22_g 22_l 22_m 22_p 22_u 22_w \n", + "0 0 0 0 1 0 0 0 \n", + "1 0 1 0 0 0 0 0 \n", + "2 0 0 0 1 0 0 0 \n", + "3 0 0 0 1 0 0 0 \n", + "4 0 0 0 1 0 0 0 \n", + ".. ... ... ... ... ... ... ... \n", + "787 1 0 0 0 0 0 0 \n", + "788 0 0 1 0 0 0 0 \n", + "789 0 0 1 0 0 0 0 \n", + "790 0 0 1 0 0 0 0 \n", + "791 0 0 1 0 0 0 0 \n", + "\n", + "[792 rows x 117 columns]" + ] + }, + "execution_count": 135, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "X_ver.columns = X_test.columns\n", + "X_ver" + ] + }, + { + "cell_type": "code", + "execution_count": 136, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1.00\n", + "1.00\n" + ] + } + ], + "source": [ + "from sklearn.linear_model import LogisticRegression\n", + "from sklearn.preprocessing import PolynomialFeatures\n", + "\n", + "poly = PolynomialFeatures(degree=2,include_bias=False)\n", + "X_poly = poly.fit_transform(X_train)\n", + "lr = LogisticRegression(C=10).fit(X_poly,y_train)\n", + "print('{:.2f}'.format(lr.score(X_poly,y_train)))\n", + "print('{:.2f}'.format(lr.score(poly.fit_transform(X_test),y_test)))" + ] + }, + { + "cell_type": "code", + "execution_count": 137, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0.96\n", + "0.96\n" + ] + } + ], + "source": [ + "from sklearn.naive_bayes import GaussianNB\n", + "\n", + "gnb = GaussianNB()\n", + "gnb.fit(X_train,y_train)\n", + "\n", + "print('{:.2f}'.format(gnb.score(X_train,y_train)))\n", + "print('{:.2f}'.format(gnb.score(X_test,y_test)))" + ] + }, + { + "cell_type": "code", + "execution_count": 138, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1.00\n", + "1.00\n" + ] + } + ], + "source": [ + "from sklearn.svm import SVC\n", + "\n", + "svc = SVC(kernel='rbf',C=10, gamma=0.1).fit(X_train,y_train)\n", + "\n", + "print('{:.2f}'.format(svc.score(X_train,y_train)))\n", + "print('{:.2f}'.format(svc.score(X_test,y_test)))" + ] + }, + { + "cell_type": "code", + "execution_count": 139, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/tonywesoly/anaconda3/lib/python3.9/site-packages/sklearn/neighbors/_classification.py:228: FutureWarning: Unlike other reduction functions (e.g. `skew`, `kurtosis`), the default behavior of `mode` typically preserves the axis it acts along. In SciPy 1.11.0, this behavior will change: the default value of `keepdims` will become False, the `axis` over which the statistic is taken will be eliminated, and the value None will no longer be accepted. Set `keepdims` to True or False to avoid this warning.\n", + " mode, _ = stats.mode(_y[neigh_ind, k], axis=1)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1.00\n", + "1.00\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/tonywesoly/anaconda3/lib/python3.9/site-packages/sklearn/neighbors/_classification.py:228: FutureWarning: Unlike other reduction functions (e.g. `skew`, `kurtosis`), the default behavior of `mode` typically preserves the axis it acts along. In SciPy 1.11.0, this behavior will change: the default value of `keepdims` will become False, the `axis` over which the statistic is taken will be eliminated, and the value None will no longer be accepted. Set `keepdims` to True or False to avoid this warning.\n", + " mode, _ = stats.mode(_y[neigh_ind, k], axis=1)\n" + ] + } + ], + "source": [ + "from sklearn.neighbors import KNeighborsClassifier\n", + "\n", + "knn = KNeighborsClassifier(n_neighbors=3).fit(X_train,y_train)\n", + "print('{:.2f}'.format(knn.score(X_train,y_train)))\n", + "print('{:.2f}'.format(knn.score(X_test,y_test)))" + ] + }, + { + "cell_type": "code", + "execution_count": 140, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1.00\n", + "1.00\n" + ] + } + ], + "source": [ + "from sklearn.neural_network import MLPClassifier\n", + "\n", + "mlp = MLPClassifier(activation='relu', hidden_layer_sizes=[10],solver='lbfgs').fit(X_train,y_train)\n", + "\n", + "print('{:.2f}'.format(mlp.score(X_train,y_train)))\n", + "print('{:.2f}'.format(mlp.score(X_test,y_test)))" + ] + }, + { + "cell_type": "code", + "execution_count": 141, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Bayes raport:\n", + "Accuracy score 0.91\n", + " precision recall f1-score support\n", + "\n", + " jadalne 1.00 0.82 0.90 406\n", + " trujące 0.84 1.00 0.91 386\n", + "\n", + " accuracy 0.91 792\n", + " macro avg 0.92 0.91 0.90 792\n", + "weighted avg 0.92 0.91 0.90 792\n", + "\n" + ] + } + ], + "source": [ + "from sklearn.metrics import classification_report,accuracy_score\n", + "\n", + "pred_bayes = gnb.predict(X_ver)\n", + "print('Bayes raport:')\n", + "print('Accuracy score {:.2f}'.format(accuracy_score(y_ver,pred_bayes)))\n", + "print(classification_report(y_ver,pred_bayes,target_names=['jadalne','trujące']))" + ] + }, + { + "cell_type": "code", + "execution_count": 142, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Logistic Regression raport:\n", + "Accuracy score 1.00\n", + " precision recall f1-score support\n", + "\n", + " jadalne 1.00 1.00 1.00 406\n", + " trujące 1.00 1.00 1.00 386\n", + "\n", + " accuracy 1.00 792\n", + " macro avg 1.00 1.00 1.00 792\n", + "weighted avg 1.00 1.00 1.00 792\n", + "\n" + ] + } + ], + "source": [ + "pred_log = lr.predict(poly.fit_transform(X_ver))\n", + "print('Logistic Regression raport:')\n", + "print('Accuracy score {:.2f}'.format(accuracy_score(y_ver,pred_log)))\n", + "print(classification_report(y_ver,pred_log,target_names=['jadalne','trujące']))" + ] + }, + { + "cell_type": "code", + "execution_count": 143, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Support Vector Machines raport:\n", + "Accuracy score 1.00\n", + " precision recall f1-score support\n", + "\n", + " jadalne 1.00 1.00 1.00 406\n", + " trujące 1.00 1.00 1.00 386\n", + "\n", + " accuracy 1.00 792\n", + " macro avg 1.00 1.00 1.00 792\n", + "weighted avg 1.00 1.00 1.00 792\n", + "\n" + ] + } + ], + "source": [ + "pred_svc = svc.predict(X_ver)\n", + "print('Support Vector Machines raport:')\n", + "print('Accuracy score {:.2f}'.format(accuracy_score(y_ver,pred_svc)))\n", + "print(classification_report(y_ver,pred_svc,target_names=['jadalne','trujące']))" + ] + }, + { + "cell_type": "code", + "execution_count": 144, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "K-nearest neighbors raport:\n", + "Accuracy score 1.00\n", + " precision recall f1-score support\n", + "\n", + " jadalne 1.00 1.00 1.00 406\n", + " trujące 1.00 1.00 1.00 386\n", + "\n", + " accuracy 1.00 792\n", + " macro avg 1.00 1.00 1.00 792\n", + "weighted avg 1.00 1.00 1.00 792\n", + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/tonywesoly/anaconda3/lib/python3.9/site-packages/sklearn/neighbors/_classification.py:228: FutureWarning: Unlike other reduction functions (e.g. `skew`, `kurtosis`), the default behavior of `mode` typically preserves the axis it acts along. In SciPy 1.11.0, this behavior will change: the default value of `keepdims` will become False, the `axis` over which the statistic is taken will be eliminated, and the value None will no longer be accepted. Set `keepdims` to True or False to avoid this warning.\n", + " mode, _ = stats.mode(_y[neigh_ind, k], axis=1)\n" + ] + } + ], + "source": [ + "pred_knn = knn.predict(X_ver)\n", + "print('K-nearest neighbors raport:')\n", + "print('Accuracy score {:.2f}'.format(accuracy_score(y_ver,pred_knn)))\n", + "print(classification_report(y_ver,pred_knn,target_names=['jadalne','trujące']))" + ] + }, + { + "cell_type": "code", + "execution_count": 145, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Neural network raport:\n", + "Accuracy score 1.00\n", + " precision recall f1-score support\n", + "\n", + " jadalne 1.00 1.00 1.00 406\n", + " trujące 1.00 1.00 1.00 386\n", + "\n", + " accuracy 1.00 792\n", + " macro avg 1.00 1.00 1.00 792\n", + "weighted avg 1.00 1.00 1.00 792\n", + "\n" + ] + } + ], + "source": [ + "pred_mlp = mlp.predict(X_ver)\n", + "print('Neural network raport:')\n", + "print('Accuracy score {:.2f}'.format(accuracy_score(y_ver,pred_mlp)))\n", + "print(classification_report(y_ver,pred_mlp,target_names=['jadalne','trujące']))" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "base", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.13" + }, + "orig_nbformat": 4, + "vscode": { + "interpreter": { + "hash": "fbbbb91f3443f337fad6219902aa19c75c8f48b69079f7de3a01210f85667a20" + } + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}