projekt_rozpoznywanie_grzybow/mushrooms.ipynb at 6fd4e1856052cb8ce29e81462e3ebef26e232e89

import pandas as pd

df = pd.read_csv('mushrooms/train/train.tsv',sep='\t', header=None)
X_ver = pd.read_csv('mushrooms/dev-0/in.tsv',sep='\t', header=None)
y_ver = pd.read_csv('mushrooms/dev-0/expected.tsv',sep='\t', header=None)

print(df.head())
print(df.isna().sum())
print(df.shape)

  0  1  2  3  4  5  6  7  8  9   ... 13 14 15 16 17 18 19 20 21 22
0  p  x  s  n  t  p  f  c  n  k  ...  s  w  w  p  w  o  p  k  s  u
1  e  x  s  y  t  a  f  c  b  k  ...  s  w  w  p  w  o  p  n  n  g
2  p  x  y  w  t  p  f  c  n  n  ...  s  w  w  p  w  o  p  k  s  u
3  e  x  s  g  f  n  f  w  b  k  ...  s  w  w  p  w  o  e  n  a  g
4  e  x  y  y  t  a  f  c  b  n  ...  s  w  w  p  w  o  p  k  n  g

[5 rows x 23 columns]
0     0
1     0
2     0
3     0
4     0
5     0
6     0
7     0
8     0
9     0
10    0
11    0
12    0
13    0
14    0
15    0
16    0
17    0
18    0
19    0
20    0
21    0
22    0
dtype: int64
(6465, 23)

df.nunique()

0      2
1      6
2      4
3     10
4      2
5      9
6      2
7      2
8      2
9     12
10     2
11     5
12     4
13     4
14     9
15     9
16     1
17     4
18     3
19     5
20     9
21     6
22     7
dtype: int64

X_ver.nunique()

0      6
1      3
2     10
3      2
4      9
5      2
6      2
7      2
8     12
9      2
10     5
11     4
12     4
13     9
14     9
15     1
16     4
17     3
18     5
19     9
20     6
21     7
dtype: int64

y_ver = pd.get_dummies(y_ver,columns=[0],drop_first=True)
df2_unique = df[2].unique()
X_ver[1] = pd.Categorical(X_ver[1],categories=df2_unique)
X_ver = pd.get_dummies(X_ver)
X_ver

	0_b	0_c	0_f	0_k	0_s	0_x	1_s	1_y	1_f	1_g	...	20_s	20_v	20_y	21_d	21_g	21_l	21_m	21_p	21_u	21_w
0	1	0	0	0	0	0	1	0	0	0	...	0	0	0	0	0	0	1	0	0	0
1	0	0	0	0	0	1	0	1	0	0	...	0	1	0	0	1	0	0	0	0	0
2	1	0	0	0	0	0	0	1	0	0	...	0	0	0	0	0	0	1	0	0	0
3	1	0	0	0	0	0	1	0	0	0	...	1	0	0	0	0	0	1	0	0	0
4	0	0	0	0	0	1	0	1	0	0	...	0	0	0	0	0	0	1	0	0	0
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
787	0	0	0	0	0	1	1	0	0	0	...	0	1	0	1	0	0	0	0	0	0
788	0	0	1	0	0	0	1	0	0	0	...	0	1	0	0	0	1	0	0	0	0
789	0	0	0	1	0	0	1	0	0	0	...	0	1	0	0	0	1	0	0	0	0
790	0	0	0	1	0	0	0	1	0	0	...	0	1	0	0	0	1	0	0	0	0
791	0	0	0	0	0	1	1	0	0	0	...	0	1	0	0	0	1	0	0	0	0

792 rows × 117 columns

df = pd.get_dummies(df,columns=[0],drop_first=True)
df = pd.get_dummies(df)
df

	0_p	1_b	1_c	1_f	1_k	1_s	1_x	2_f	2_g	2_s	...	21_s	21_v	21_y	22_d	22_g	22_l	22_m	22_p	22_u	22_w
0	1	0	0	0	0	0	1	0	0	1	...	1	0	0	0	0	0	0	0	1	0
1	0	0	0	0	0	0	1	0	0	1	...	0	0	0	0	1	0	0	0	0	0
2	1	0	0	0	0	0	1	0	0	0	...	1	0	0	0	0	0	0	0	1	0
3	0	0	0	0	0	0	1	0	0	1	...	0	0	0	0	1	0	0	0	0	0
4	0	0	0	0	0	0	1	0	0	0	...	0	0	0	0	1	0	0	0	0	0
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
6460	1	0	0	0	1	0	0	0	0	0	...	0	1	0	1	0	0	0	0	0	0
6461	0	0	0	0	1	0	0	0	0	1	...	0	0	0	0	0	1	0	0	0	0
6462	0	0	0	1	0	0	0	0	0	1	...	0	0	0	0	0	1	0	0	0	0
6463	1	0	0	0	1	0	0	0	0	0	...	0	1	0	0	0	1	0	0	0	0
6464	0	0	0	0	0	0	1	0	0	1	...	0	0	0	0	0	1	0	0	0	0

6465 rows × 118 columns

from sklearn.model_selection import train_test_split

X = df.loc[:, df.columns != '0_p']
y = df['0_p']
X_train,X_test,y_train,y_test = train_test_split(X,y)
print(X_train.shape)
print(X_test.shape)

(4848, 117)
(1617, 117)

X_ver.columns = X_test.columns
X_ver

	1_b	1_c	1_f	1_k	1_s	1_x	2_f	2_g	2_s	2_y	...	21_s	21_v	21_y	22_d	22_g	22_l	22_m	22_p	22_u	22_w
0	1	0	0	0	0	0	1	0	0	0	...	0	0	0	0	0	0	1	0	0	0
1	0	0	0	0	0	1	0	1	0	0	...	0	1	0	0	1	0	0	0	0	0
2	1	0	0	0	0	0	0	1	0	0	...	0	0	0	0	0	0	1	0	0	0
3	1	0	0	0	0	0	1	0	0	0	...	1	0	0	0	0	0	1	0	0	0
4	0	0	0	0	0	1	0	1	0	0	...	0	0	0	0	0	0	1	0	0	0
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
787	0	0	0	0	0	1	1	0	0	0	...	0	1	0	1	0	0	0	0	0	0
788	0	0	1	0	0	0	1	0	0	0	...	0	1	0	0	0	1	0	0	0	0
789	0	0	0	1	0	0	1	0	0	0	...	0	1	0	0	0	1	0	0	0	0
790	0	0	0	1	0	0	0	1	0	0	...	0	1	0	0	0	1	0	0	0	0
791	0	0	0	0	0	1	1	0	0	0	...	0	1	0	0	0	1	0	0	0	0

792 rows × 117 columns

from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import PolynomialFeatures

poly = PolynomialFeatures(degree=2,include_bias=False)
X_poly = poly.fit_transform(X_train)
lr = LogisticRegression(C=10).fit(X_poly,y_train)
print('{:.2f}'.format(lr.score(X_poly,y_train)))
print('{:.2f}'.format(lr.score(poly.fit_transform(X_test),y_test)))

1.00
1.00

from sklearn.naive_bayes import GaussianNB

gnb = GaussianNB()
gnb.fit(X_train,y_train)

print('{:.2f}'.format(gnb.score(X_train,y_train)))
print('{:.2f}'.format(gnb.score(X_test,y_test)))

0.95
0.97

from sklearn.svm import SVC

svc = SVC(kernel='rbf',C=10, gamma=0.1).fit(X_train,y_train)

print('{:.2f}'.format(svc.score(X_train,y_train)))
print('{:.2f}'.format(svc.score(X_test,y_test)))

1.00
1.00

from sklearn.neighbors import KNeighborsClassifier

knn = KNeighborsClassifier(n_neighbors=3).fit(X_train,y_train)
print('{:.2f}'.format(knn.score(X_train,y_train)))
print('{:.2f}'.format(knn.score(X_test,y_test)))

/home/tonywesoly/anaconda3/lib/python3.9/site-packages/sklearn/neighbors/_classification.py:228: FutureWarning: Unlike other reduction functions (e.g. `skew`, `kurtosis`), the default behavior of `mode` typically preserves the axis it acts along. In SciPy 1.11.0, this behavior will change: the default value of `keepdims` will become False, the `axis` over which the statistic is taken will be eliminated, and the value None will no longer be accepted. Set `keepdims` to True or False to avoid this warning.
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)

1.00
1.00

/home/tonywesoly/anaconda3/lib/python3.9/site-packages/sklearn/neighbors/_classification.py:228: FutureWarning: Unlike other reduction functions (e.g. `skew`, `kurtosis`), the default behavior of `mode` typically preserves the axis it acts along. In SciPy 1.11.0, this behavior will change: the default value of `keepdims` will become False, the `axis` over which the statistic is taken will be eliminated, and the value None will no longer be accepted. Set `keepdims` to True or False to avoid this warning.
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)

from sklearn.neural_network import MLPClassifier

mlp = MLPClassifier(activation='relu', hidden_layer_sizes=[10],solver='lbfgs').fit(X_train,y_train)

print('{:.2f}'.format(mlp.score(X_train,y_train)))
print('{:.2f}'.format(mlp.score(X_test,y_test)))

1.00
1.00

from sklearn.metrics import classification_report,accuracy_score

pred_bayes = gnb.predict(X_ver)
print('Bayes raport:')
print('Accuracy score {:.2f}'.format(accuracy_score(y_ver,pred_bayes)))
print(classification_report(y_ver,pred_bayes,target_names=['jadalne','trujące']))

Bayes raport:
Accuracy score 0.91
              precision    recall  f1-score   support

     jadalne       1.00      0.82      0.90       406
     trujące       0.84      1.00      0.91       386

    accuracy                           0.91       792
   macro avg       0.92      0.91      0.91       792
weighted avg       0.92      0.91      0.91       792

pred_log = lr.predict(poly.fit_transform(X_ver))
print('Logistic Regression raport:')
print('Accuracy score {:.2f}'.format(accuracy_score(y_ver,pred_log)))
print(classification_report(y_ver,pred_log,target_names=['jadalne','trujące']))

Logistic Regression raport:
Accuracy score 1.00
              precision    recall  f1-score   support

     jadalne       1.00      1.00      1.00       406
     trujące       1.00      1.00      1.00       386

    accuracy                           1.00       792
   macro avg       1.00      1.00      1.00       792
weighted avg       1.00      1.00      1.00       792

pred_svc = svc.predict(X_ver)
print('Support Vector Machines raport:')
print('Accuracy score {:.2f}'.format(accuracy_score(y_ver,pred_svc)))
print(classification_report(y_ver,pred_svc,target_names=['jadalne','trujące']))

Support Vector Machines raport:
Accuracy score 1.00
              precision    recall  f1-score   support

     jadalne       1.00      1.00      1.00       406
     trujące       1.00      1.00      1.00       386

    accuracy                           1.00       792
   macro avg       1.00      1.00      1.00       792
weighted avg       1.00      1.00      1.00       792

pred_knn = knn.predict(X_ver)
print('K-nearest neighbors raport:')
print('Accuracy score {:.2f}'.format(accuracy_score(y_ver,pred_knn)))
print(classification_report(y_ver,pred_knn,target_names=['jadalne','trujące']))

K-nearest neighbors raport:
Accuracy score 1.00
              precision    recall  f1-score   support

     jadalne       1.00      1.00      1.00       406
     trujące       1.00      1.00      1.00       386

    accuracy                           1.00       792
   macro avg       1.00      1.00      1.00       792
weighted avg       1.00      1.00      1.00       792

/home/tonywesoly/anaconda3/lib/python3.9/site-packages/sklearn/neighbors/_classification.py:228: FutureWarning: Unlike other reduction functions (e.g. `skew`, `kurtosis`), the default behavior of `mode` typically preserves the axis it acts along. In SciPy 1.11.0, this behavior will change: the default value of `keepdims` will become False, the `axis` over which the statistic is taken will be eliminated, and the value None will no longer be accepted. Set `keepdims` to True or False to avoid this warning.
  mode, _ = stats.mode(_y[neigh_ind, k], axis=1)

pred_mlp = mlp.predict(X_ver)
print('Neural network raport:')
print('Accuracy score {:.2f}'.format(accuracy_score(y_ver,pred_mlp)))
print(classification_report(y_ver,pred_mlp,target_names=['jadalne','trujące']))

Neural network raport:
Accuracy score 1.00
              precision    recall  f1-score   support

     jadalne       1.00      1.00      1.00       406
     trujące       1.00      1.00      1.00       386

    accuracy                           1.00       792
   macro avg       1.00      1.00      1.00       792
weighted avg       1.00      1.00      1.00       792

	0_b	0_c	0_f	0_k	0_s	0_x	1_s	1_y	1_f	1_g	...	20_s	20_v	20_y	21_d	21_g	21_l	21_m	21_p	21_u	21_w
0	1	0	0	0	0	0	1	0	0	0	...	0	0	0	0	0	0	1	0	0	0
1	0	0	0	0	0	1	0	1	0	0	...	0	1	0	0	1	0	0	0	0	0
2	1	0	0	0	0	0	0	1	0	0	...	0	0	0	0	0	0	1	0	0	0
3	1	0	0	0	0	0	1	0	0	0	...	1	0	0	0	0	0	1	0	0	0
4	0	0	0	0	0	1	0	1	0	0	...	0	0	0	0	0	0	1	0	0	0
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
787	0	0	0	0	0	1	1	0	0	0	...	0	1	0	1	0	0	0	0	0	0
788	0	0	1	0	0	0	1	0	0	0	...	0	1	0	0	0	1	0	0	0	0
789	0	0	0	1	0	0	1	0	0	0	...	0	1	0	0	0	1	0	0	0	0
790	0	0	0	1	0	0	0	1	0	0	...	0	1	0	0	0	1	0	0	0	0
791	0	0	0	0	0	1	1	0	0	0	...	0	1	0	0	0	1	0	0	0	0

	1_b	1_c	1_f	1_k	1_s	1_x	2_f	2_g	2_s	2_y	...	21_s	21_v	21_y	22_d	22_g	22_l	22_m	22_p	22_u	22_w
0	1	0	0	0	0	0	1	0	0	0	...	0	0	0	0	0	0	1	0	0	0
1	0	0	0	0	0	1	0	1	0	0	...	0	1	0	0	1	0	0	0	0	0
2	1	0	0	0	0	0	0	1	0	0	...	0	0	0	0	0	0	1	0	0	0
3	1	0	0	0	0	0	1	0	0	0	...	1	0	0	0	0	0	1	0	0	0
4	0	0	0	0	0	1	0	1	0	0	...	0	0	0	0	0	0	1	0	0	0
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
787	0	0	0	0	0	1	1	0	0	0	...	0	1	0	1	0	0	0	0	0	0
788	0	0	1	0	0	0	1	0	0	0	...	0	1	0	0	0	1	0	0	0	0
789	0	0	0	1	0	0	1	0	0	0	...	0	1	0	0	0	1	0	0	0	0
790	0	0	0	1	0	0	0	1	0	0	...	0	1	0	0	0	1	0	0	0	0
791	0	0	0	0	0	1	1	0	0	0	...	0	1	0	0	0	1	0	0	0	0

	0_b	0_c	0_f	0_k	0_s	0_x	1_s	1_y	1_f	1_g	...	20_s	20_v	20_y	21_d	21_g	21_l	21_m	21_p	21_u	21_w
0	1	0	0	0	0	0	1	0	0	0	...	0	0	0	0	0	0	1	0	0	0
1	0	0	0	0	0	1	0	1	0	0	...	0	1	0	0	1	0	0	0	0	0
2	1	0	0	0	0	0	0	1	0	0	...	0	0	0	0	0	0	1	0	0	0
3	1	0	0	0	0	0	1	0	0	0	...	1	0	0	0	0	0	1	0	0	0
4	0	0	0	0	0	1	0	1	0	0	...	0	0	0	0	0	0	1	0	0	0
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
787	0	0	0	0	0	1	1	0	0	0	...	0	1	0	1	0	0	0	0	0	0
788	0	0	1	0	0	0	1	0	0	0	...	0	1	0	0	0	1	0	0	0	0
789	0	0	0	1	0	0	1	0	0	0	...	0	1	0	0	0	1	0	0	0	0
790	0	0	0	1	0	0	0	1	0	0	...	0	1	0	0	0	1	0	0	0	0
791	0	0	0	0	0	1	1	0	0	0	...	0	1	0	0	0	1	0	0	0	0

	1_b	1_c	1_f	1_k	1_s	1_x	2_f	2_g	2_s	2_y	...	21_s	21_v	21_y	22_d	22_g	22_l	22_m	22_p	22_u	22_w
0	1	0	0	0	0	0	1	0	0	0	...	0	0	0	0	0	0	1	0	0	0
1	0	0	0	0	0	1	0	1	0	0	...	0	1	0	0	1	0	0	0	0	0
2	1	0	0	0	0	0	0	1	0	0	...	0	0	0	0	0	0	1	0	0	0
3	1	0	0	0	0	0	1	0	0	0	...	1	0	0	0	0	0	1	0	0	0
4	0	0	0	0	0	1	0	1	0	0	...	0	0	0	0	0	0	1	0	0	0
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
787	0	0	0	0	0	1	1	0	0	0	...	0	1	0	1	0	0	0	0	0	0
788	0	0	1	0	0	0	1	0	0	0	...	0	1	0	0	0	1	0	0	0	0
789	0	0	0	1	0	0	1	0	0	0	...	0	1	0	0	0	1	0	0	0	0
790	0	0	0	1	0	0	0	1	0	0	...	0	1	0	0	0	1	0	0	0	0
791	0	0	0	0	0	1	1	0	0	0	...	0	1	0	0	0	1	0	0	0	0

47 KiB Raw Blame History Unescape Escape

47 KiB

Raw Blame History

	0_b	0_c	0_f	0_k	0_s	0_x	1_s	1_y	1_f	1_g	...	20_s	20_v	20_y	21_d	21_g	21_l	21_m	21_p	21_u	21_w
0	1	0	0	0	0	0	1	0	0	0	...	0	0	0	0	0	0	1	0	0	0
1	0	0	0	0	0	1	0	1	0	0	...	0	1	0	0	1	0	0	0	0	0
2	1	0	0	0	0	0	0	1	0	0	...	0	0	0	0	0	0	1	0	0	0
3	1	0	0	0	0	0	1	0	0	0	...	1	0	0	0	0	0	1	0	0	0
4	0	0	0	0	0	1	0	1	0	0	...	0	0	0	0	0	0	1	0	0	0
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
787	0	0	0	0	0	1	1	0	0	0	...	0	1	0	1	0	0	0	0	0	0
788	0	0	1	0	0	0	1	0	0	0	...	0	1	0	0	0	1	0	0	0	0
789	0	0	0	1	0	0	1	0	0	0	...	0	1	0	0	0	1	0	0	0	0
790	0	0	0	1	0	0	0	1	0	0	...	0	1	0	0	0	1	0	0	0	0
791	0	0	0	0	0	1	1	0	0	0	...	0	1	0	0	0	1	0	0	0	0

	1_b	1_c	1_f	1_k	1_s	1_x	2_f	2_g	2_s	2_y	...	21_s	21_v	21_y	22_d	22_g	22_l	22_m	22_p	22_u	22_w
0	1	0	0	0	0	0	1	0	0	0	...	0	0	0	0	0	0	1	0	0	0
1	0	0	0	0	0	1	0	1	0	0	...	0	1	0	0	1	0	0	0	0	0
2	1	0	0	0	0	0	0	1	0	0	...	0	0	0	0	0	0	1	0	0	0
3	1	0	0	0	0	0	1	0	0	0	...	1	0	0	0	0	0	1	0	0	0
4	0	0	0	0	0	1	0	1	0	0	...	0	0	0	0	0	0	1	0	0	0
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
787	0	0	0	0	0	1	1	0	0	0	...	0	1	0	1	0	0	0	0	0	0
788	0	0	1	0	0	0	1	0	0	0	...	0	1	0	0	0	1	0	0	0	0
789	0	0	0	1	0	0	1	0	0	0	...	0	1	0	0	0	1	0	0	0	0
790	0	0	0	1	0	0	0	1	0	0	...	0	1	0	0	0	1	0	0	0	0
791	0	0	0	0	0	1	1	0	0	0	...	0	1	0	0	0	1	0	0	0	0