2022-05-17 17:30:50 +02:00
{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "TQqrOdkY6nsy"
},
"source": [
"# **Klasyfikacja za pomocą naiwnej metody bayesowskiej z rozkładem normalnym**"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "SSaJsYOhz8h8"
},
"source": [
"![rozklady.jpg](
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "AlcfRFCPSXIj"
},
"source": [
"# **Twierdzenie Bayesa**\n",
"![bayes.svg](
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "rcpTnWjOh5dq"
},
"source": [
"P(A) -- oznacza prawdopodobieństwo a-priori wystąpienia klasy A (tj. prawdopodobieństwo, że dowolny przykład należy do klasy A)\n",
"\n",
"P(B|A) -- oznacza prawdopodobieństwo a-posteriori, że B należy do \n",
"klasy A\n",
"\n",
"P(B) -- znacza prawdopodobieństwo a-priori wystąpienia przykładu B "
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "Yabcm4Rei2ue"
},
"source": [
"![GaussianNB.png](
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "dsf6FnlgjiOL"
},
"source": [
"# Funkcja gęstości prawdopodobieństwa rozkładu normalnego \n",
"![gestosc.svg](
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"id": "v0oeHebytjNp"
},
"outputs": [],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"import scipy.stats as stats\n",
"import matplotlib.pyplot as plt\n",
"import seaborn as sns\n",
"from sklearn.model_selection import train_test_split\n",
"sns.set(style=\"whitegrid\")"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"id": "fOYTA3VVtjNw"
},
"outputs": [],
"source": [
"class NaiveBayesClassifier():\n",
" def calc_prior(self, features, target):\n",
" '''\n",
" Wyliczenie prawdopodobieństwa a priori\n",
" '''\n",
" self.prior = (features.groupby(target).apply(lambda x: len(x)) / self.rows).to_numpy()\n",
"\n",
" return self.prior\n",
" \n",
" def calc_statistics(self, features, target):\n",
" '''\n",
" Wyliczenie średnich i wariancji dla danych\n",
" ''' \n",
" self.mean = features.groupby(target).apply(np.mean).to_numpy()\n",
" self.var = features.groupby(target).apply(np.var).to_numpy()\n",
" \n",
" return self.mean, self.var\n",
" \n",
" def gaussian_density(self, class_idx, x): \n",
" '''\n",
" Wyliczenie prawdopodobieństwa z rozkładu normalnego \n",
" (1/√2pi*σ ) * exp((-1/2)*((x-μ)^2)/(2*σ²))\n",
" μ -średnia\n",
" σ² - wariancja\n",
" σ - odchylenie standardowe\n",
" '''\n",
" mean = self.mean[class_idx]\n",
" var = self.var[class_idx]\n",
2022-05-17 18:19:47 +02:00
" \n",
" numerator = np.exp((-1/2)*((x-mean)**2) / (2 * var)) # Licznik wzoru na gęstość rozkładu normalnego \n",
" denominator = np.sqrt(2 * np.pi * var) # Mianownik wzoru na gęstość rozkładu normalnego \n",
2022-05-17 17:30:50 +02:00
" prob = numerator / denominator\n",
2022-05-17 18:19:47 +02:00
" \n",
2022-05-17 17:30:50 +02:00
" return prob\n",
" \n",
2022-05-17 18:19:47 +02:00
" def classify(self, x):\n",
2022-05-17 17:30:50 +02:00
" '''\n",
" Wyliczenie prawdopodobieństwa a posteriori i zwrócenie klasy, dla której prawdopodobieństwo jest najwyższe\n",
" '''\n",
" posteriors = []\n",
" posteriors_no_log = []\n",
"\n",
" # calculate posterior probability for each class\n",
" for i in range(self.count):\n",
" prior = np.log(self.prior[i]) # Do predykcji używane jest prawodopodobieństwo logarytmiczne\n",
" prior_no_log = self.prior[i] # Zwykłe prawdopodobieństwo liczymy, żeby zwrócić je z predykcjami\n",
"\n",
" conditional = np.sum(np.log(self.gaussian_density(i, x))) \n",
" conditional_no_log = np.prod(self.gaussian_density(i, x))\n",
"\n",
" posterior = prior + conditional\n",
" posterior_no_log = prior_no_log * conditional_no_log\n",
"\n",
" posteriors.append(posterior)\n",
" posteriors_no_log.append(posterior_no_log)\n",
"\n",
" # Zwracamy klasę o największym prawdopodobieństwie\n",
" return self.classes[np.argmax(posteriors)], np.max(posteriors_no_log)\n",
"\n",
" def fit(self, features, target):\n",
" '''\n",
" Główna metoda trenująca model\n",
" '''\n",
" self.classes = np.unique(target)\n",
" self.count = len(self.classes)\n",
" self.feature_nums = features.shape[1]\n",
" self.rows = features.shape[0]\n",
" \n",
" self.calc_statistics(features, target)\n",
" self.calc_prior(features, target)\n",
" \n",
" def predict(self, features):\n",
" '''\n",
" Predykcja wartości dla każdego wiersza\n",
" '''\n",
2022-05-17 18:19:47 +02:00
" preds = [self.classify(f) for f in features.to_numpy()]\n",
2022-05-17 17:30:50 +02:00
" return preds\n",
"\n",
" def accuracy(self, y_test, y_pred):\n",
" '''\n",
" Wyliczenie accuracy modelu\n",
" '''\n",
" accuracy = np.sum(y_test == y_pred) / len(y_test)\n",
" return accuracy\n",
"\n",
" def visualize(self, y_true, y_pred, target):\n",
" '''\n",
" Narysowanie wykresu porównującego rozkład klas prawdziwych i przewidzianych\n",
" '''\n",
" tr = pd.DataFrame(data=y_true, columns=[target])\n",
" pr = pd.DataFrame(data=y_pred, columns=[target])\n",
" \n",
" \n",
" fig, ax = plt.subplots(1, 2, sharex='col', sharey='row', figsize=(15,6))\n",
" \n",
2022-05-17 18:19:47 +02:00
" sns.countplot(x=target, data=tr, ax=ax[0], alpha=0.7, hue=target, dodge=False)\n",
" sns.countplot(x=target, data=pr, ax=ax[1], alpha=0.7, hue=target, dodge=False)\n",
2022-05-17 17:30:50 +02:00
" \n",
" ax[0].tick_params(labelsize=12)\n",
" ax[1].tick_params(labelsize=12)\n",
" ax[0].set_title(\"Prawdziwe wartości\", fontsize=18)\n",
2022-05-17 18:19:47 +02:00
" ax[1].set_title(\"Predykcje\", fontsize=18)\n",
2022-05-17 17:30:50 +02:00
" plt.show()\n"
]
},
2022-05-17 18:54:16 +02:00
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Pitność wody"
]
},
2022-05-17 17:30:50 +02:00
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 382
},
"id": "5-riUAGntjN2",
"outputId": "f87f047d-bc71-41ef-a43a-17b6f7cf84c3"
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(2948, 9) (2948,)\n",
"(328, 9) (328,)\n"
]
}
],
"source": [
"# Preprocessing danych\n",
"\n",
"# Uzupełnienie pustych wartości w kolumnach\n",
"def fill_nan(df):\n",
" for index, column in enumerate(df.columns[:9]):\n",
" df[column] = df[column].fillna(df.groupby('Potability')[column].transform('mean'))\n",
" return df\n",
"\n",
"# Wczytywanie danych\n",
"df = pd.read_csv(\"water_potability.csv\")\n",
"\n",
"df = fill_nan(df)\n",
"\n",
"# Zrandomizowanie kolejności danych w datasecie\n",
"df = df.sample(frac=1, random_state=10).reset_index(drop=True)\n",
"\n",
"# Podział na atrybuty i przewidywane wartości\n",
"X, y = df.iloc[:, :-1], df.iloc[:, -1]\n",
"\n",
"# Normalizacja i skalowanie danych\n",
"from sklearn.preprocessing import StandardScaler\n",
"sc = StandardScaler()\n",
"X = sc.fit_transform(X.to_numpy())\n",
"X = pd.DataFrame(X, columns=df.columns.values.tolist()[:-1])\n",
"\n",
"# Podział na dane trenujące i testowe, z uwzględnieniem równego rozłożenia danych\n",
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, stratify=y, random_state=1)\n",
"\n",
"print(X_train.shape, y_train.shape)\n",
"print(X_test.shape, y_test.shape)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"id": "O82SGzK6tjN5"
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>ph</th>\n",
" <th>Hardness</th>\n",
" <th>Solids</th>\n",
" <th>Chloramines</th>\n",
" <th>Sulfate</th>\n",
" <th>Conductivity</th>\n",
" <th>Organic_carbon</th>\n",
" <th>Trihalomethanes</th>\n",
" <th>Turbidity</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1022</th>\n",
" <td>0.003078</td>\n",
" <td>0.688791</td>\n",
" <td>0.846257</td>\n",
" <td>1.428934</td>\n",
" <td>-0.858263</td>\n",
" <td>0.002792</td>\n",
" <td>0.913790</td>\n",
" <td>0.232417</td>\n",
" <td>2.319505</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3191</th>\n",
" <td>-0.587365</td>\n",
" <td>0.223203</td>\n",
" <td>-0.731867</td>\n",
" <td>0.397503</td>\n",
" <td>0.759893</td>\n",
" <td>0.330607</td>\n",
" <td>0.094379</td>\n",
" <td>0.282563</td>\n",
" <td>0.235024</td>\n",
" </tr>\n",
" <tr>\n",
" <th>13</th>\n",
" <td>0.003078</td>\n",
" <td>-0.241037</td>\n",
" <td>0.773051</td>\n",
" <td>0.580019</td>\n",
" <td>1.334369</td>\n",
" <td>-0.049130</td>\n",
" <td>-1.121422</td>\n",
" <td>-0.200432</td>\n",
" <td>-0.946356</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2068</th>\n",
" <td>-2.176058</td>\n",
" <td>1.443006</td>\n",
" <td>-1.626771</td>\n",
" <td>-4.164610</td>\n",
" <td>-0.033706</td>\n",
" <td>-1.050763</td>\n",
" <td>-0.391328</td>\n",
" <td>-0.398649</td>\n",
" <td>-0.298341</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1484</th>\n",
" <td>0.213047</td>\n",
" <td>0.403036</td>\n",
" <td>-0.464729</td>\n",
" <td>0.070417</td>\n",
" <td>0.021560</td>\n",
" <td>-0.952776</td>\n",
" <td>-0.213330</td>\n",
" <td>0.111419</td>\n",
" <td>-0.235893</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>691</th>\n",
" <td>0.003078</td>\n",
" <td>1.199106</td>\n",
" <td>-0.003483</td>\n",
" <td>-0.670308</td>\n",
" <td>-0.069513</td>\n",
" <td>0.185754</td>\n",
" <td>-0.466010</td>\n",
" <td>0.031975</td>\n",
" <td>0.676276</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1283</th>\n",
" <td>-2.034004</td>\n",
" <td>-1.508135</td>\n",
" <td>0.255310</td>\n",
" <td>0.083839</td>\n",
" <td>-1.413707</td>\n",
" <td>0.694074</td>\n",
" <td>-1.110579</td>\n",
" <td>0.232996</td>\n",
" <td>2.544703</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2818</th>\n",
" <td>-0.702987</td>\n",
" <td>-0.575677</td>\n",
" <td>0.755056</td>\n",
" <td>0.664695</td>\n",
" <td>0.021560</td>\n",
" <td>-0.489334</td>\n",
" <td>0.371852</td>\n",
" <td>-2.272990</td>\n",
" <td>-1.764684</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1330</th>\n",
" <td>1.525943</td>\n",
" <td>0.497074</td>\n",
" <td>-0.714355</td>\n",
" <td>-1.024237</td>\n",
" <td>-1.022037</td>\n",
" <td>-0.327074</td>\n",
" <td>-1.107341</td>\n",
" <td>0.517432</td>\n",
" <td>-1.230528</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1926</th>\n",
" <td>-0.043558</td>\n",
" <td>-0.882359</td>\n",
" <td>-0.456141</td>\n",
" <td>-0.770271</td>\n",
" <td>0.795189</td>\n",
" <td>0.560306</td>\n",
" <td>-1.086081</td>\n",
" <td>-1.356820</td>\n",
" <td>0.172521</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>2948 rows × 9 columns</p>\n",
"</div>"
],
"text/plain": [
" ph Hardness Solids Chloramines Sulfate Conductivity \\\n",
"1022 0.003078 0.688791 0.846257 1.428934 -0.858263 0.002792 \n",
"3191 -0.587365 0.223203 -0.731867 0.397503 0.759893 0.330607 \n",
"13 0.003078 -0.241037 0.773051 0.580019 1.334369 -0.049130 \n",
"2068 -2.176058 1.443006 -1.626771 -4.164610 -0.033706 -1.050763 \n",
"1484 0.213047 0.403036 -0.464729 0.070417 0.021560 -0.952776 \n",
"... ... ... ... ... ... ... \n",
"691 0.003078 1.199106 -0.003483 -0.670308 -0.069513 0.185754 \n",
"1283 -2.034004 -1.508135 0.255310 0.083839 -1.413707 0.694074 \n",
"2818 -0.702987 -0.575677 0.755056 0.664695 0.021560 -0.489334 \n",
"1330 1.525943 0.497074 -0.714355 -1.024237 -1.022037 -0.327074 \n",
"1926 -0.043558 -0.882359 -0.456141 -0.770271 0.795189 0.560306 \n",
"\n",
" Organic_carbon Trihalomethanes Turbidity \n",
"1022 0.913790 0.232417 2.319505 \n",
"3191 0.094379 0.282563 0.235024 \n",
"13 -1.121422 -0.200432 -0.946356 \n",
"2068 -0.391328 -0.398649 -0.298341 \n",
"1484 -0.213330 0.111419 -0.235893 \n",
"... ... ... ... \n",
"691 -0.466010 0.031975 0.676276 \n",
"1283 -1.110579 0.232996 2.544703 \n",
"2818 0.371852 -2.272990 -1.764684 \n",
"1330 -1.107341 0.517432 -1.230528 \n",
"1926 -1.086081 -1.356820 0.172521 \n",
"\n",
"[2948 rows x 9 columns]"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X_train"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"id": "a3jkTMFLtjN6"
},
"outputs": [],
"source": [
"# Trenowanie modelu klasyfikatora\n",
"x = NaiveBayesClassifier()\n",
"x.fit(X_train, y_train)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"id": "CoC22aNgtjN9"
},
"outputs": [
{
"data": {
"text/plain": [
"0"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Predykcja wartości dla danych testowych\n",
"predictions = x.predict(X_test)\n",
"\n",
"# Prawdopodobieństwa kolejnych predykcji\n",
"probabilities = [p[1] for p in predictions]\n",
"\n",
"# Przewidziana wartość\n",
"predictions = [p[0] for p in predictions]\n",
"predictions[0]"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"id": "JR06zodmtjN9"
},
"outputs": [
{
"data": {
"text/plain": [
"0.6280487804878049"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Wyliczenie accuracy modelu\n",
"x.accuracy(y_test, predictions)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"id": "1jW0QPootjN_"
},
"outputs": [
{
"data": {
"text/plain": [
"0.14084507042253522"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from sklearn.metrics import f1_score\n",
"\n",
"f1_score(y_test, predictions)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"id": "vEVogTmAtjOA"
},
"outputs": [
{
"data": {
"text/plain": [
"0 0.609756\n",
"1 0.390244\n",
"Name: Potability, dtype: float64"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"y_test.value_counts(normalize=True)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"id": "jCVOdBZytjOB"
},
"outputs": [
{
"data": {
2022-05-17 18:19:47 +02:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA4oAAAGQCAYAAADoX54sAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAIABJREFUeJzs3XtYlHX+//EXIKcBRPFcahggWHhAAfFE4SE1c9XMsjXbykpTUXNd11Or61qutqlFhu3qama5pkWeSlNzPSJmlq4pqZSiZpEiGIwNp/n94Zf5eQcq6MBweD6uy+uCz316z5jz7jX3575vJ6vVahUAAAAAAP/H2dEFAAAAAAAqFoIiAAAAAMCAoAgAAAAAMCAoAgAAAAAMCIoAAAAAAAOCIoBq5b333tM777yjgoICR5cCAABQYREUATuYNGmSgoODS71dUlKSgoOD9dFHH5VBVfitpUuX6h//+Idat24tZ+fSf/x17dpVQ4cOLYPKAAD29NFHHyk4OFhJSUm3tL29P+/j4uIUHByss2fP2m2fQFmr4egCUH0lJSXpySefNIyZTCY1a9ZM/fr10xNPPCEXFxcHVQdHOnv2rBISEtS9e3e1aNHCLvtMTEzUG2+8oUWLFqlNmzZ22ScA4P+jrwNVC0ERDvfQQw8pOjpaVqtVaWlpSkhI0CuvvKKTJ0/qb3/7m6PLK1MRERE6fPiwatTgn+K1zp07pzfffFN33nmn3YLid999p0WLFql9+/a3vI9NmzbZpRYAqMqqc1+/nhdeeEHPP/+83NzcHF0KUGL83ykc7p577lG/fv1sv//+979X7969tXr1ao0dO1Z169Ytdrvc3FwVFBTI3d29vEq1O2dn50pdv71lZWXJ29u7TPY9ZMiQ294HDR4Abu5W+npV6Ok3UqNGDb4URqXDNYqocLy9vRUWFiar1aozZ85I+v9z+0+cOKHZs2crOjparVq10tdffy1J+uSTTzRixAjdf//9Cg0NVfv27TVy5EglJycb9j1p0iS1bNlSFovFNnbw4EEFBwcrMjLScIOTHTt2KDg4WJ988oltzGKxaM6cOercubNatWqlRx55RLt37y7yGgqvjbjen8JrJn57jaLFYlGrVq00adIkw/5eeuklBQcH6+WXXzaMjxs3Tm3btlVeXp5t7JdfftGrr76qHj16KDQ0VFFRURo/frztvbyR4q7pyM3NVVhYmIKDg3Xs2DHbeFZWlu69917NmDHDNrZ7926NGzdO3bp1U6tWrRQeHq5nnnlG+/fvL3KsoUOHqmvXrjpz5ozGjBmjyMhItWvXTh999JFt6tLkyZNt79m114qYzWa99tpr6t69u0JDQ9WpUydNnDhR586dMxzDarVq2bJl6tu3r8LCwtS2bVv17NlTU6ZMUW5urmHdo0ePasyYMerYsaNCQ0N13333afz48UpNTbWtwzWKAFB6v+3rN+vpkrR3714988wzCg8PV8uWLdW3b1+tXLmy2P2vXr1avXr1UmhoqHr06KF33nlHVqvVsM7SpUsVHBysvXv3Ftk+JydHkZGR+sMf/nDD13HmzBn17NlTnTt3Nvz/RVZWlubPn6/evXurZcuWat++vR5//HFt3LjRts71rlG8nZ4NlDW+2kCFY7Vadfr0aUlS7dq1DcsmTJggDw8PPfPMM5KkevXqSZJWrFghX19fPfroo6pXr55SU1P1wQcf6PHHH1dCQoL8/f0lSVFRUUpISNDBgwfVoUMHSdK+ffvk7OyszMxMHT16VKGhobZxJycnRUVF2Y4/fvx4bd26VTExMerSpYtSU1MVGxurxo0bG+qMiIjQ3Llzi7yu+fPnKy0trcjrKuTu7q42bdpo3759hvHCGq8dt1qt2r9/vyIiImzfUv7yyy8aPHiwfvjhBw0cOFBBQUH6+eef9f7772vQoEH68MMPdeedd173vS98TxITE21TNA8dOiSz2SxnZ2clJibapoIeOHBAeXl5hvcnISFBmZmZ6t+/vxo2bKiffvpJq1ev1lNPPaXly5crPDzccLzs7Gw98cQTatu2rcaNG6f09HRFRERoxIgRWrRokR577DG1a9dOkmzfQOfl5WnYsGE6ePCgevbsqaefflqnT5/WypUrtWfPHn344Ydq2LChJOmtt97SG2+8oZiYGA0ePFguLi46e/asPv/8c+Xk5MjV1VWStH37dsXGxspkMumRRx7RXXfdpZ9//lm7d+/W8ePH1bRp0+u+ZwCAG7teX79eT1+1apWmT5+uNm3aaMSIEfL09NTevXs1Y8YMpaam6s9//rNtH8uWLdPs2bMVEhKi8ePH68qVK1qyZInq1KljqKF///6aN2+e1qxZo44dOxqWbdmyRZmZmXrkkUeu+xq++eYbPf/886pZs6ZWrVpl66WXL1/W73//e504cUI9e/bU448/roKCAh09elTbt29Xnz59rrvP2+3ZQFkjKMLhrly5ovT0dElSWlqaVqxYoeTkZLVp08YW8ArVrFlTS5cuLTJ9Y/HixTKZTIax/v37q1+/flq2bJntrNe14fDan2NiYpSUlKTExERbUExKSlJQUJD8/PwkXT1btnXrVg0YMEB///vfbceJiIjQqFGjDMdu0qSJmjRpYhibO3eufvzxR02ePFnNmze/7vsRFRWlpKQknTp1Sv7+/jp//rxSU1P1u9/9TuvWrdOFCxdUt25dHT9+XBcvXjQEtddff11nzpzRBx98oJCQENv4gAED1LdvX8XFxRlq/61GjRqpadOmhkC6b98+1a5dW6GhoUpMTLQ19MIgHRkZaVv3b3/7W5G/h8GDB6tPnz56++23iwTFjIwMjRgxQi+++KJhvGPHjrabzlw7fUm6etbz4MGDGjZsmCZOnGjYZvjw4Xrttdf06quvSpK2bt2qgIAALVq0yLCPCRMm2H6+cuWKJk+eLB8fH3388cdq0KCBbdno0aN5jAYAlFJJ+3pxPT0tLU2zZs1Snz599Nprr9nGhwwZolmzZmnZsmV6/PHH1bRpU12+fFkLFixQQECA/vOf/8jT01OSNHDgQPXu3dtQU+3atfXAAw/os88+U0ZGhmrVqmVbtmbNGvn6+uqBBx4o9vXs2bNHsbGxat68ueLj4w1hd968eTpx4oRmzpypxx57zLDdzfrH7fZsoKwx9RQOFxcXpw4dOqhDhw7q16+fPvzwQ3Xt2lULFy4ssu4f/vCHYuf4F4YTq9WqrKwspaenq3bt2mrWrJkOHz5sW69Bgwby9/e3BSGLxaKvv/5anTp1UkREhG388uXLOnbsmCGEbd26VZI0bNgww7G7d++uZs2a3fA1rl69WkuWLNHgwYP11FNP3XDdwmMW1pKYmCgXFxfFxsbKycnJNl44PbRwfavVqvXr1ysiIkL169dXenq67Y+np6fatGlT7DTZ4o5/5MgRZWdn247Tvn17dezYUQcOHLBN2SycNlsYpCUZQmJ2drYuXbokZ2dntW7d2vD3cK3fvp83s2XLFjk7O2v48OGG8fvvv18tWrTQtm3bbM3Z29tbP/30kw4cOHDd/e3evVuXLl3S008/bQiJhW7lMRoAUJ2VtK8X19M3b96snJwcPfLII4Y+lp6erq5du6qgoECJiYmSrn5+X7lyRUOGDLGFRElq2LCh+vbtW6SuRx99VDk5OVq/fr1t7OzZs0pMTFTfvn2LvT5y7dq1Gj58uNq3b69ly5YZQmJBQYE++eQTBQQE6NFHHy2y7Y36h716NlCWOKMIh3vsscfUq1cvOTk5ydPTU/7+/oZv+q712zOMhY4eParXX39d+/fvl9lsNiz77bTQ9u3b68MPP1RWVpb+97//yWKxKCoqSrm5uVqwYIFycnK0f/9+FRQUGILimTNn5OzsXGwNAQEB+v7774utLTExUTNmzFDnzp310ksv3eCduKpVq1YymUzat2+fBg8erH379ik0NFRNmzZV8+bNtW/fPj300EPat2+fatWqZfsWMj09XRkZGdq9e7ftbOlvlST0REVF6YMPPtCBAwfUvn17ff3115oyZYpat26tOXPm6PDhwwoMDFRycnK
2022-05-17 17:30:50 +02:00
"text/plain": [
"<Figure size 1080x432 with 2 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"x.visualize(y_test, predictions, 'Potability')"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"id": "aw8Tefprhjnn"
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/usr/lib/python3/dist-packages/matplotlib/cbook/__init__.py:1377: FutureWarning: Support for multi-dimensional indexing (e.g. `obj[:, None]`) is deprecated and will be removed in a future version. Convert to a numpy array before indexing instead.\n",
" x[:, None]\n",
"/usr/lib/python3/dist-packages/matplotlib/axes/_base.py:237: FutureWarning: Support for multi-dimensional indexing (e.g. `obj[:, None]`) is deprecated and will be removed in a future version. Convert to a numpy array before indexing instead.\n",
" x = x[:, np.newaxis]\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAXMAAAELCAYAAAAry2Y+AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAIABJREFUeJzt3XtcFPX+P/DX7rLLVYUQFNPykhiKpqJHMz3eUxTFjMRU8FJ6vKX57WYPTa3ItHrksbxlP29odkozvJKWmpdKT5KZpB4M8QYIKKjIIiy78/uDdmNhL7MXdpfp9Xw8fMjufOYz75nPe947O7s7IxMEQQAREdVpcncHQEREjmMxJyKSABZzIiIJYDEnIpIAFnMiIglgMScikgAWcyJyqoSEBGzbts3weNmyZejWrRueeOIJN0YlfSzmHuTkyZP45z//WeP56jsHkSucOnUKo0ePRlRUFP7xj39g9OjR+O2332zqIzc3Fxs2bMC+ffvwww8/WG0/d+5cLFu2zN6Q/9a83B0AEXmee/fuYerUqVi0aBGio6Oh0Whw6tQpqFQqm/rJzs5GYGAggoODaylS0uORuRv069cPn3zyCYYMGYKuXbvi9ddfR1lZmbvDIjLIysoCAMTExEChUMDHxwc9e/bEo48+io8//hgvv/yyoe3169fRpk0bVFRUGPXx448/YtKkScjPz0enTp0wd+5cAMCsWbPwxBNPICoqCmPHjsXFixcBAF988QV2796NdevWoVOnTpg6dSoAIC8vDy+88AK6d++Ofv36ITk52RWboM5hMXcTfdJ+++23yMrKwqpVq9wdEpFBixYtoFAo8Nprr+HIkSO4c+eOzX306NEDn376KUJDQ3H69GksWbIEAPDPf/4T+/fvx08//YS2bdsaXhji4+MxbNgwPPfcczh9+jTWrFkDnU6HadOmoU2bNjh69Cg2bdqETZs24dixY05dXylgMXeTsWPHIiwsDIGBgZg2bRr27t0LAMjPz0eXLl2M/qWlpbk5Wvq7CQgIwNatWyGTyfDGG2/g8ccfx9SpU3Hz5k2H+46Li0NAQABUKhVeeOEFXLhwAcXFxSbbnj17FoWFhZg5cyZUKhWaNWuGUaNGYd++fQ7HITU8Z+4mYWFhhr+bNGmC/Px8AEBoaCiOHj1q1DYhIcGlsREBQKtWrQxH05mZmXjllVewePFitGjRwu4+tVotli1bhm+++QaFhYWQyyuPJ4uKilCvXr0a7bOzsw0HOFX7qPqYKrGYu0lubq7h75ycHISGhroxGiLLWrVqhZEjR+KLL75A27Ztcf/+fcM0W47Wd+/ejYMHD2LDhg1o2rQpiouL0bVrV+gv3iqTyYzah4WFoWnTpjhw4IBzVkTCeJrFTbZu3YobN27g9u3bhg9DiTxFZmYm1q9fjxs3bgCoPPjYs2cPHnvsMURERODnn39GTk4OiouL8cknn4jut6SkBCqVCkFBQSgtLcWHH35oND04OBjXr183PO7QoQMCAgKwdu1a3L9/H1qtFhkZGTZ/RfLvgMXcTWJiYjBp0iQMGDAAzZo1w7Rp09wdEpFBQEAAzpw5g2eeeQYdO3bEqFGjEB4ejrlz5+KJJ57AkCFDMHz4cIwcORJ9+/YV3e+IESPQpEkT9OrVC0OHDkXHjh2NpsfFxeGPP/5Aly5dMH36dCgUCqxevRoXLlxA//790b17d8yfPx/37t1z9irXeTLenML1+vXrh6SkJPTo0cPdoRCRRPDInIhIAljMiYgkgKdZiIgkgEfmREQSYPV75kuXLsX+/fuRnZ2N3bt3Izw8XHTnOp0OJSUlUCqVNb4/SuQoQRCg0Wjg7+9v+PGJLezNbeY11SZ789pqMe/fvz8SExMxduxYm4MqKSlBRkaGzfMR2SI8PNzkrwetsTe3mdfkCrbmtdVi7sjPZpVKpSEoWy+dWVV6ejoiIyPtnt+UE+m5SN57HuUV2hrTVF4K9OjQGD/+dsNouv75Y7/mQKur9lGDDJDLZNBVf/5PEQ8H4aVx4rflifRcrN/9u8n+5HIZJg1rh+6RYSbnq75eKi8FEodGmGxvjrlt7qz+naG8vBwZGRmGPLOVvbltKq/NbRdzeeTI9rKUu2K5a8zMqY19vDpn5a6lsTZZG6pQyGXo1bGJxZywN69r9ef8+regKpUK3t7eDvXl6PzVbUrNQMGdcjNTtdj1wzUThdTc89b9dO6mTeuwKTUDhcUVFqf3jmpu8vma66U1294SU/E6s39ncfWpDlN5bW67mMsjR7aX5dwVy71jZoqz9/HqnJW7to11TWJzwta8dsm1WdLT0x3uw9lXDiwoKrU43dyg2FPI9WxZB2vxFRSVmuzP3Hzm2ltS2/3XdVXz2tx2MZcvjmwva7lhSz+eNGa1HYuzctfWsRbbztHxcEkxj4yMdOhVNy0tDVFRUU6MCAhJvWVxp5DLTZ8yMfe8GLasg7X4QoJ8TfZnbj5z7c0xt82d1b8zlJWVOeVAwV5V89rcdjGXL45sL2u5IbofN4yZObWxj1fnrNy1dazFttPHYW9e/22/mpgYHQFvpcLkNG+lAoO7PVRjuv55L0XNtz8yWeX5MHMee8S222YlRkeY7U8hlyExOsLsfKbiNtfeVrXdf11lbruYyyNHtpel3BXr7zhmzspdS2NtqjZU5aWQ1UpOACKOzJOSknDgwAHcvHkTEydORGBgoOFGCnVZn6hmAIDk1PMoKCo1vFqGBPkiMToCfaKaIaJFMJJTz+NmUSkaVnt+bcpZFKs1AIB6fkpMGdHeqL+qHnskGEnTetoVX9XlVF2Wfrql9aoetzPUdv+u5MzctrRdzOWRvaovK8Cv8oOye2oNGgb54uFgGa7cEoymFas1JnP878RZuatv//9SzuCuWmuxNlRVdd91dk4AtfwLUP3bBU88zeIqdTX2uhC3s/KrrixXrLowdtUx5r/Ym19/29MsRERSwmJORCQBLOZERBLAYk5EJAEs5kREEsBiTkQkASzmREQSwGJORCQBLOZERBLAYk5EJAEs5kREEsBiTkQkASzmREQSwGJORCQBLOZERBLAYk5EJAEs5kREEsBiTkQkASzmREQSwGJORCQBLOZERBLAYk5EJAEs5kREEsBiTkQkASzmREQSwGJORCQBLOZERBLAYk5EJAEs5kREEsBiTkQkASzmREQSwGJORCQBLOZERBLAYk5EJAEs5kREEsBiTkQkASzmREQSwGJORCQBLOZERBLAYk5EJAEs5kREEsBiTkQkASzmREQSwGJORCQBLOZERBLAYk5EJAEs5kREEsBiTkQkASzmREQSIKqYZ2VlIT4+HoMGDUJ8fDwuX75cy2ERuQZzm6TCS0yjhQsXYsyYMYiNjcXOnTuxYMECJCcn273Q79OuITn1PG4WlSLATwkAuKfWoGGQLxKjI9AnqpnJedamnEWxWmP0vLdSDpVSUeN5j7L1ursjMAgJ8oW2QovC4nKj5+v5KY3GoJ6VfqqOoblxE9PG0WU4ytm57anM7T8AIJMBguCGoKrzoP1Er56fElNGtAcAJKeeR0FRKeRyGXQ6AQ38FHge15yek/ayemR+69YtnDt3DjExMQCAmJgYnDt3DoWFhXYt8Pu0a1ix7QwKikohAChWa1Cs1kAAUFBUihXbzuD7tGtG8/yWVYLlX5w2mYhlGp1nF3IPU1BUWqOQA6gxBr9llZjto/oYmho3MW0scXR+MZyd257q+7Rr+Pd/TO8/gIcUcg9VrNbgw62/YPkXp1FQVAoA0OkqN9gdtdbpOekIq8U8NzcXjRo1gkKhAAAoFAqEhoYiNzfXrgUmp55HmUZrdnqZRovk1PNGzx08cxcVWmacq5RptDh45q7Z6abGsPq4iWljiaPzi+Hs3PZUyannodVx/7GXAJitP87OSUeIOs3iqPT0dMPf+lc3SwqKSpGWlmZ4fEdtvvjXdRp1IbIOLUHrIe9CJle4OxyDO2qt0RhUZW4Mq46bmDaWODq/K1TNa09TdRuJ2eccdf3kOtRr8hgaNOtS68syJ2PPq2je91Wo/Bu6dLmekpNWi3l
"text/plain": [
"<Figure size 432x288 with 6 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"ph_val = X_test[\"ph\"]\n",
"sulfate_val = X_test[\"Sulfate\"]\n",
"hard_val = X_test[\"Hardness\"]\n",
"carb_val = X_test[\"Organic_carbon\"]\n",
"turb_val = X_test[\"Turbidity\"]\n",
"ch_val = X_test[\"Chloramines\"]\n",
"\n",
"\n",
"figure, axes = plt.subplots(nrows=3, ncols=2)\n",
"\n",
"axes[0, 0].plot(ph_val, predictions, 'bo')\n",
"axes[0, 0].set_title(\"pH\")\n",
"\n",
"axes[0, 1].plot(sulfate_val, predictions, 'bo')\n",
"axes[0, 1].set_title(\"Sulfate\")\n",
"\n",
"axes[1, 0].plot(hard_val, predictions, 'bo')\n",
"axes[1, 0].set_title(\"Hardness\")\n",
"\n",
"axes[1, 1].plot(carb_val, predictions, 'bo')\n",
"axes[1, 1].set_title(\"Organic carbon\")\n",
"\n",
"axes[2, 0].plot(turb_val, predictions, 'bo')\n",
"axes[2, 0].set_title(\"Turbidity\")\n",
"\n",
"axes[2, 1].plot(ch_val, predictions, 'bo')\n",
"axes[2, 1].set_title(\"Chloramines\")\n",
"\n",
"plt.show()"
]
2022-05-17 18:37:29 +02:00
},
{
"cell_type": "code",
2022-05-17 18:54:16 +02:00
"execution_count": 12,
2022-05-17 18:37:29 +02:00
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
2022-05-17 18:54:16 +02:00
"<matplotlib.collections.PathCollection at 0x7fbbadd08048>"
2022-05-17 18:37:29 +02:00
]
},
2022-05-17 18:54:16 +02:00
"execution_count": 12,
2022-05-17 18:37:29 +02:00
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAXwAAAEBCAYAAAB7Wx7VAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAIABJREFUeJzsnWdgFFUXhp/ZnoQQIIRepUmR3gSlowgoTarSLIAiTQX96CAqgoiKINKkCCgIKEV6L9JCDb2FkoQAIYVsts98PxYiIbubTYEEcp8/ypQ7Zza779w59xRJURQFgUAgEDzzqDLbAIFAIBA8GYTgCwQCQTZBCL5AIBBkE4TgCwQCQTZBCL5AIBBkE4TgCwQCQTZBCL5AIBBkE4TgCwQCQTZBCL5AIBBkE4TgCwQCQTZBCL5AIBBkEzSZbYAsyxiNRrRaLZIkZbY5AoFA8FSgKAo2mw0/Pz9UKu/m7pku+EajkfPnz2e2GQKBQPBUUrZsWfz9/b06NtMFX6vVAk6jdTpdptoSEhJCpUqVMtUGT2R1+yDr2yjsSz9Z3cbsYp/VauX8+fOJGuoNmS74D9w4Op0OvV6fydaQJWzwRFa3D7K+jcK+9JPVbcxO9qXGFS4WbQUCgSCbIARfIBAIsglC8AUCgSCbkOk+fIFAIMhKKIoM5tUoxgUg3wFtBSS/D5B0VTPbtHQjBF8gEAjuoygKSswgsO4CxeTcaLmFYvkXJecEVL5vZK6B6US4dAQCgeAB1r1geUjsAVAAM9wbhZJk+9OHEHyBQCC4j2L6E3An6iqw7H6S5mQ4QvAFAoHgAfI9DzsVUIxPzJTHgRB8gUAgeID+ZZB8XO9THKCt8WTtyWCE4AsEAsF9JJ8OgA/JpVEP+oZImmKZYFXGIQRfIBAI7iOp/JECl4GmEqAHyd/5X5/XkXJ9l9nmpRsRlikQCAQPIWmKIeX9E8URBnIUqEsgqXJmtlkZQobP8H/66SfKlSsnSh4LBIKnGkldGElb+ZkRe8hgwT916hTHjh2jUKFCGTmsQCAQCDKADBN8q9XK+PHjGTNmjOhcJRAIBFkQSVEUJSMGmjx5MoUKFeKtt96iSZMmzJw5k7Jly6Z4nsViISQkJCNMEAgEgmxHpUqVvK6vnyGLtkePHuXkyZN8+umnaR4jNUY/LoKDg6lRI+vG2WZ1+yDr2yjsSz9Z3cbsYl9aJssZ4tI5dOgQly9fpmnTpjRp0oSbN2/y7rvvsmfPnowYXiAQCAQZQIbM8Pv06UOfPn0S/50al45AIBAIngwi8UogEAiyCY8l8Wrbtm2PY1iBQCAQpAMxwxcIBIJsghB8gUAgyCYIwRcIBIJsghB8gUAgyCYIwRcIBIJsghB8gUAgyCYIwRcIBIJsghB8gUAgyCYIwRcIBIJsghB8gUAgyCYIwRcIBIJsghB8gUAgyCY8luJpAoFAkF4UxQGWbSgJf4ISD/omSL5vIqkCMtu0pxYh+AKBIMuhKHaU6PfBehRIcG60nUQxzoLA5UiaYplq39OKcOkIBIIsh5KwDKxHSBR7AMygxKLEDs0ss556hOALBIKsR8IiwORihwy20yiOyCdt0TOBEHyBQJD1UGLc75O0IEc/OVueIYTgCwSCrIemgvt9ih3UwoefFjJs0fbDDz/kxo0bqFQqfH19GTVqFOXLl8+o4QUCQTZCytEf5e4hwPzIHgP4dkZS+WaGWU89GSb433zzDf7+/gBs2bKF4cOHs2rVqowaXiAQZCMkXXWUgC8hbjQgAQooNjC0RPL/LLPNe2rJMMF/IPYA8fHxSJKUUUNnaWRZxnTPhMHPgFqjzmxzBIJnBpXP6yiGV8CyDzCDthqSukBmm/VUk6Fx+CNGjGDv3r0oisKcOXMycugshyzL/D7xL/6cshpTvBm1Rk2z7g3pM7k7vv4+mW2eQPBMIEl6MDTObDOeGSRFUZSMHvSvv/5i3bp1zJ49O8VjLRYLISEhGW3CY+fPL/7h+KYz2My2xG0anZp8JfPSf35P1BqxHi4QCB4/lSpVQq/Xe3XsY8m0bdu2LaNHjyY6OprcuXN7dU5qjH5cBAcHU6NGjRSPi7gSyYlHxB7AbnUQHRaLLUKhdtuUx3lc9mUmWd1GYV/6yeo2Zhf70jJZzhDBNxqNxMXFUbBgQQC2bdtGQEAAuXLlyojhsxyHNx5HUrleozDFm9m98gD129Z+wlYJsiIOh4PDG48TvPk4eh8djTrXp1SVEpltliCbkiGCbzKZGDRoECaTCZVKRUBAADNnznxmF27Vas/umpT2C7IH8TFGPmk0hojLkZjizajUKlb98A9N3nqZIb/0fWZ/H4KsS4YIft68eVm2bFlGDPVUULtlNX4aNM/lPkMOA40613vCFgmyIt/3+4VrZ8OwW+0AyA4Zi8nK9qV7eOHl8jTv3jCTLRRkN8RUNA3kLRxIm/4tMPgmXXPQ+egoW/05arxSJZMsE2QVjLFG9v19OFHsH8ZstLBs8t+ZYJUguyMEP430mdSdD77vRf4SQUgqiZyB/nT69A2+3jgSlUp8rNmduzdj0Gjd52XcvhH1BK0RCJyIevhpRJIkWr7XjJbvNctsUwRZkMBCeXDYHW73F3ou/xO0RiBwIqaiAsFjwNffh0Zd6qMzaJPtM/jq6fJ5u0ywSpDdETN8geAx8dG0d4m4FMmFI5exmm3OZDxJou2A13i5Q91kx98MvcU/P27jj7C1FCyVnzc+bCFCOAUZihB8geAx4eNnYMqOcZzZf55j20+hM2h5qX0dCpTIl+zYA/8c4YtOU7BbHTjsDo7vPM3W33bT+8uudBjcOhOsFzyLCMF/yji5+wyrf97InbC7VKhbljYftSBf0byZbZbADZIkUeHFclR4sZzbY0xGMxM6f4clwZq47UEI57zhS6nTsjpFyhZ6EuYKnnGE4D9FrPt+GwdXHcdqsqAocO7ABVbP2MCX64ZTuYGHhhGCLM2/fx9ym4QlOxxsmLeN9ya+/cTskWWZrb/tZsX3a7l7M4aSlYrSbXgHqjSq+MRsEDwehOA/JYTsPcv+FUewmf+L67ZZ7disdsZ1+JZlEbNFeeanlJhbcS7j9QHsNge3w+4+MVsUReHrt39k/5rDmI0WAKJvxnBq3zn6TelJ676vPDFbBBmPiNJ5Slg7cxN2ixtRsNo5uu3pqzgqcPJcleJodK7nXgY/PeXrlHlitpzYdTqJ2D/AkmDl5yELMMYan5gtgoxHCP5TQlR4NO4KWSsoxN6OS/c1LCYLsXfikGU53WMJvKdKo4oEFs6DykUNJrVGTfPuDZ6YLZsX7sSSYHG5T61RsX/tkSdmiyDjEYL/lFCxfjk0OtcuG4ddpnS1EmkeOyoimrHtJ9M2dy+6Fu1H50J9WD1jA4+hVYLABZIkMXnLaIpXKILWoMUnhwEffwOBhXLz7fax+AX4PTFbjLEJbicWsixjin+0x6zgaUL48J8SXv/gVZZ/txqsSbM3tToNz9cqTfEKRdM0rjHWSP9anxEdGYvscM7sY27FMnvYb8TeuUf30R3TbbsgZfIWDuSXY9+y+rd1+Ek5yVs4D5UbVnjiZTpqv1aNw5uOYY5PPstXFKjcsAK3jTefqE2CjEPM8J8SAgvm5t1pXciVLwAff+cMUO+jo8KLZRm7amiax/1nzlbio42JYv8Ac4KF3yf+hTEuIb2mC7xEkiSKVChIs7cbULVxpUypydS460vkCPBL5l7SGbRUa1KJYs8XfuI2CTIOMcN/iiheuTC/h/3CyV1niLkVS8nKxSlevki6xtz1534sJqvLfRqdmlN7z1H7tWrpukZWw2F3oFKrRD16Fxh89fz471dM7P4jZw5cQKvTYLPaadz1JQb89G5mmydIJ0LwnzLUajVVG1fKuPFS6L37LPXm3bf6ENOHzOV26F3UWjUN3qxLn8k9CCzoXRvO7EJQkUCmbB9HVEQ00ZExFCyZ74muIwgeH8/Or1mQJpp3b5isrv8DZFmh0kvPP2GLHg+bFu7gq27fc+tKFIqiYLfa2blsHx/WGEZc1L1kx98JiyLiSmS2jlgKLJib0lV
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"from sklearn.decomposition import PCA\n",
"\n",
"pca = PCA(n_components=2)\n",
"pca.fit(X_test)\n",
"X_pca = pca.transform(X_test)\n",
"\n",
"plt.scatter(X_pca[:, 0], X_pca[:, 1], c=predictions, s=50, cmap='viridis')"
]
},
{
"cell_type": "code",
2022-05-17 18:54:16 +02:00
"execution_count": 13,
2022-05-17 18:37:29 +02:00
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
2022-05-17 18:54:16 +02:00
"<matplotlib.collections.PathCollection at 0x7fbbad8e7358>"
2022-05-17 18:37:29 +02:00
]
},
2022-05-17 18:54:16 +02:00
"execution_count": 13,
2022-05-17 18:37:29 +02:00
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAXwAAAEBCAYAAAB7Wx7VAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAIABJREFUeJzsnWd4FFUXgN+Z7ekk9FClF6kCIiodEVCaghX1s6GIWAALKoKIBbCjKEUBFQQp0gSkN6X33lsghPRsts/9fiyEhN0NAQJJyH2fx0cy5cyZ3Z0z9557iiKEEEgkEonklkfNawUkEolEcnOQBl8ikUgKCdLgSyQSSSFBGnyJRCIpJEiDL5FIJIUEafAlEomkkCANvkQikRQSpMGXSCSSQoI0+BKJRFJIkAZfIpFICgnS4EskEkkhQZ/XCmiahtVqxWAwoChKXqsjkUgkBQIhBC6Xi+DgYFQ1Z2P3PDf4VquVAwcO5LUaEolEUiCpWrUqoaGhOTo2zw2+wWAAvEobjcY81WXXrl3Url07T3XIjvyuH+R/HaV+109+17Gw6Od0Ojlw4ECGDc0JeW7wL7pxjEYjJpMpj7UhX+iQHfldP8j/Okr9rp/8rmNh0u9qXOFy0VYikUgKCdLgSyQSSSFBGnyJRCIpJOS5D18ikUjyE0JoYJ+DsE4E7TwYaqIEv4RirJfXql030uBLJBLJBYQQiKR+4FwFwubd6DiHcPyLCBuGGvRg3ip4nUiXjkQikVzEuRYcmYw9AAKwQ+r7iCzbCx7S4EskEskFhO1PIJBRV8Gx+maqk+tIgy+RSCQX0VKz2SlAWG+aKjcCafAlEonkIqZ7QLH43yc8YGh4c/XJZaTBl0gkkgsolu6ABV/TaAJTcxR9uTzQKveQBl8ikUguoKihKFHTQF8bMIES6v2/5QGUiC/yWr3rRoZlSiQSSSYUfTmUon8iPKdBiwddBRQ1LK/VyhVyfYT/3XffUa1aNVnyWCKRFGgUXTSKoc4tY+whlw3+7t272bZtG6VLl85NsRKJRCLJBXLN4DudToYOHcrgwYNl5yqJRCLJhyhCCJEbgkaMGEHp0qV5/PHHadWqFWPGjKFq1apXPM/hcLBr167cUEEikUgKHbVr185xff1cWbTdunUrO3fupH///tcs42qUvlFs3ryZhg3zb5xtftcP8r+OUr/rJ7/rWFj0u5bBcq64dDZu3MiRI0do3bo1rVq14uzZszz77LOsWbMmN8RLJBKJJBfIlRH+Cy+8wAsvvJDx99W4dCQSiURyc5CJVxKJRFJIuCGJV8uWLbsRYiUSiURyHcgRvkQikRQSpMGXSCSSQoI0+BKJRFJIkAZfIpFICgnS4EskEkkhQRp8iUQiKSRIgy+RSCSFBGnwJRKJpJAgDb5EIpEUEqTBl0gkkkKCNPgSiURSSJAGXyKRSAoJN6R4mkQikVwvQnjAsQyR/ieINDC1Qgl6CEUNz2vVCizS4EskknyHEG5E4vPg3Aqkeze6diKsP0HUdBR9uTzVr6AiXToSiSTfIdKngXMLGcYeADuIZETygLxSq8AjDb5EIsl/pE8GbH52aODag/DE3myNbgmkwZdIJPkPkRR4n2IALfHm6XILIQ2+RCLJf+hrBt4n3KCTPvxrIdcWbV9++WVOnTqFqqoEBQXx/vvvU6NGjdwSL5FIChFKSB9EwkbAftkeMwT1RFGD8kKtAk+uGfzPPvuM0NBQAJYsWcK7777LrFmzcku8RCIpRCjGBojwjyHlA0ABBAgXmDughL6V1+oVWHLN4F809gBpaWkoipJbovM1mqZhS7VhDjaj0+vyWh2J5JZBtTyAMLcDxzrADob6KLqSea1WgSZX4/AHDRrE2rVrEUIwbty43BSd79A0jamfzubPUXOwpdnR6XW0ebI5L4x4kqBQS16rJ5HcEiiKCcwt81qNWwZFCCFyW+js2bOZP38+Y8eOveKxDoeDXbt25bYKN5w/P1rA9sV7cdldGdv0Rh3FKxalzy9PodPL9XCJRHLjqV27NiaTKUfH3pBM2y5duvDBBx+QmJhIkSJFcnTO1Sh9o9i8eTMNGza84nFnjsay4zJjD+B2ekg8nYzrjKBxlyvLuVH65SX5XUep3/WT33UsLPpdy2A5Vwy+1WolJSWFUqVKAbBs2TLCw8OJiIjIDfH5jk2LtqOo/tcobGl2Vs9cT7MujW+yVpL8iMfjYdOi7Wz+Zzsmi5EWPZtRqW6FvFZLUkjJFYNvs9no168fNpsNVVUJDw9nzJgxt+zCrU6XvbvmSvslhYO0JCtvthjMmSOx2NLsqDqVWV8voNXj9/D6jy/ess+HJP+SKwa/aNGiTJs2LTdEFQgad6jPd/0m+N1nDjHTouddN1kjSX7kq94/cmLfadxONwCaR8Nhc7J8yhpuv6cGbZ9snscaSgobcih6DRSNjqJzn/aYg7KuORgtRqo2uI2G7ermkWaS/II12cq6vzZlGPvM2K0Opo34Kw+0khR2pMG/Rl74/Ele+uppSlQohqIqhEWF0qP/g3yy6D1UVX6shZ2Es0noDYHzMuJOxd9EbSQSL7Ie/jWiKAodnmtDh+fa5LUqknxIVOlIPG5PwP2lbytxE7WRSLzIoahEcgMICrXQ4pFmGM0Gn33mIBOPvN01D7SSFHbkCF8iuUG88u2znDkcy8EtR3DaXd5kPEWhS9/7uaf7nT7Hnz12jgXfLOOP0/MoVakED77cXoZwSnIVafAlkhuEJdjMqBVD2PvfAbYt343RbODubk0oWaG4z7HrF2zhox6jcDs9eNwetq/cw9JfV/PMx4/S/bVOeaC95FZEGvwCxs7Ve5nzwyLOn06g5p1V6fxKe4qXLZrXakkCoCgKNZtWo2bTagGPsVntDOv5BY50Z8a2iyGcE96dQpMODShTtfTNUFdyiyMNfgFi/lfL2DBrO06bAyFg//qDzPl+IR/Pf5c692bTMEKSr/n3r40Bk7A0j4eFE5bx3KdP3DR9NE1j6a+rmfHVPBLOJlGxdlkee7c7dVvUumk6SG4M0uAXEHat3cd/M7bgsl+K63Y53bicboZ0H8m0M2NleeYCStK5FL/x+gBul4e40wk3TRchBJ888Q3/zd2E3eoAIPFsErvX7af3qKfo9GK7m6aLJPeRUToFhHljFuN2BDAKTjdblxW8iqMSL7fVLY/e6H/sZQ42UaNJlZumy45Ve7IY+4s40p388PpErMnWm6aLJPeRBr+AEB+TSKBC1gJBclzKdV9DCDtCS0AI7bplSXJO3Ra1iIqORPVTg0mn19H2yXtvmi7/TFqJI93hd59Or/LfvC03TRdJ7iMNfgGhVrNq6I3+XTYet0bl+hWuWbbwnENL7IOIbYg4dy8irhma9VduQKsEiR8URWHEkg8oX7MMBrMBS4gZS6iZqNJFGLn8Q4LDg2+aLtbk9IADC03TsKVd3mNWUpCQPvwCwgMv3cf0L+aAM2v2psGop3qjypSvWfaa5AotFRHfDbR44IJsLR5SRyBEEkrIK9epuSQnFI2O4sdtI5nz63yClTCKRkdSp3nNm16mo/H99dm0eBv2NN9RvhBQp3lN4qxnb6pOktxDjvALCFGlivDst48QUTwcS6h3BGiyGKnZtCofzhpwzXJF+jTQUsgw9hnYIO1HhJZ2XXpLco6iKJSpWYo2T9xLvZa186QmU8tH7yYkPNjHvWQ0G6jfqjblqkffdJ0kuYcc4RcgyteJZurpH9m5ai9J55KpWKc85WuUuT6h9oVAgGm6YgDXZjDdWmV8hXADOlmP3g/mIBPf/DucT5/8hr3rD2Iw6nE53bR89G76fvdsXqsnuU6kwS9g6HQ66rWsnXsClSuFct46PxFhX0r14sMQsTGAHmFujxL6ForON/O1MFOsTBSjlg8h/kwiibFJlKpY/KauI0huHNKlU9gxdwUsAXZqYMy/vUGvBi19FiLpdSyG04AAXGBfgIjvitASfY4XnrMI98lCHbEUVaoIletVlMb+FkIa/EKKy+nClmYDy4OgiwaMlx1hhtB3UBRzXqiXqwjhgtTh+LquPKAlI6y
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"plt.scatter(X_pca[:, 0], X_pca[:, 1], c=y_test, s=50, cmap='viridis')"
]
2022-05-17 18:54:16 +02:00
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Irysy"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(105, 4) (105,)\n",
"(45, 4) (45,)\n",
"0.9333333333333333\n"
]
}
],
"source": [
"# Preprocessing danych\n",
"\n",
"\n",
"# Wczytywanie danych\n",
"df = pd.read_csv(\"iris.csv\")\n",
"\n",
"# Zrandomizowanie kolejności danych w datasecie\n",
"df = df.sample(frac=1, random_state=10).reset_index(drop=True)\n",
"\n",
"# Podział na atrybuty i przewidywane wartości\n",
"X, y = df.iloc[:, :-1], df.iloc[:, -1]\n",
"\n",
"# Normalizacja i skalowanie danych\n",
"from sklearn.preprocessing import StandardScaler\n",
"sc = StandardScaler()\n",
"X = sc.fit_transform(X.to_numpy())\n",
"X = pd.DataFrame(X, columns=df.columns.values.tolist()[:-1])\n",
"\n",
"# Podział na dane trenujące i testowe, z uwzględnieniem równego rozłożenia danych\n",
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, stratify=y, random_state=1)\n",
"\n",
"print(X_train.shape, y_train.shape)\n",
"print(X_test.shape, y_test.shape)\n",
"\n",
"\n",
"# Trenowanie modelu klasyfikatora\n",
"x = NaiveBayesClassifier()\n",
"x.fit(X_train, y_train)\n",
"\n",
"\n",
"# Predykcja wartości dla danych testowych\n",
"predictions = x.predict(X_test)\n",
"\n",
"# Prawdopodobieństwa kolejnych predykcji\n",
"probabilities = [p[1] for p in predictions]\n",
"\n",
"# Przewidziana wartość\n",
"predictions = [p[0] for p in predictions]\n",
"\n",
"\n",
"# Wyliczenie accuracy modelu\n",
"print(x.accuracy(y_test, predictions))"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.9326599326599326"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"f1_score(y_test, predictions, average=\"macro\")"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<matplotlib.collections.PathCollection at 0x7fbbad7b1358>"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAXwAAAEBCAYAAAB7Wx7VAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAIABJREFUeJzt3Xl4VNX9x/H3ubNlsrGDbIKyBsImKNpSF1xABQQ3LEWr1q3Un627aBV3hdJSW7Ao7kK1WgUFtIrigqCCQZDIqrLIFraQQDKZ7Z7fH5EIZCYkZGbuzJ3v63l4HjJ35s73QPLJnXPPorTWGiGEELZnWF2AEEKIxJDAF0KINCGBL4QQaUICXwgh0oQEvhBCpAkJfCGESBMS+EIIkSYk8IUQIk1I4AshRJqQwBdCiDQhgS+EEGnCaXUBpmlSVlaGy+VCKWV1OUIIkRK01gSDQbKysjCM2l27Wx74ZWVlrF271uoyhBAiJXXu3JmcnJxaPdfywHe5XEBl0W632+Jq6qawsJD8/Hyry4gpu7XJbu0B+7XJbu2BxLQpEAiwdu3aqgytDcsD/0A3jtvtxuPxWFxN3aVizUditzbZrT1gvzbZrT2QuDbVpStcbtoKIUSakMAXQog0IYEvhBBpwvI+fCGEqKvNa7cy/9XPKC8pp9fp+Zx0Xh8cDofVZSU9CXwhREp5duwM3nxiLuGQSTgU5p1pH9KkVSMmLXiIhs0aWF1eUpMuHSFEyvhiTgGzJr9LoCJIOBQGwLe/gu3rd/D46H9YXF3yk8AXQqSM1ya+RUWZv9rjoWCYFQtWsWvrHguqSh0S+EKIlLHthx1Rj7k8Lnb+uDuB1aQeCXwhRMpo06ll1GNBf5Bj2jdLYDWpRwJfCJEyRt45HE9m9RmsTreTE87qSaMWDS2oKnVI4AshUka/c3ox6u4RuDNcuDyVK+x6szNo160Nd7x4o9XlJT0ZlimESCmj7r6Isy4/jQX//YLyfT7yB3Sl9xn5srx6LUjgCyFSTvO2Tbno5iFWl5FypEtHCCHShAS+EEKkCQl8IYRIExL4QgiRJiTwhRAiTUjgCyFEmpDAF0KINCGBL4QQaUICXwgh0oQEvhBCpAkJfCGESBMS+EIIkSZisnhacXExd9xxB5s2bcLtdtOuXTsefPBBGjduHIvTR6SD69BlT0NwKaiGqKzfQMYwlJL14IQQIpKYXOErpbjmmmt47733mD17Nm3btmXixImxOHVE2v8ZevfFUDEbwj9CaAW65AF08Q1oHY7b+wohRCqLSeA3bNiQ/v37V33du3dvtm7dGotTV6N1GL33VsAHmAcd8UHwK/C/H5f3FUKIVBfzPnzTNHnllVcYOHBgrE9dKbgUCEQ+psvR5a/G532FECLFKa21juUJH3jgAYqKipg8eTKGceTfJ36/n8LCwlqfP9ezlHaNJ+M0fBGPlweOY83OR2t9PiGESGX5+fl4PNX3+Y0kpnc4x48fz8aNG5k6dWqtwv5gtS1ah9uhdz4R5aibzEbn0PfYvnV676NVUFBA376Jea9EsVub7NYesF+b7NYeSEyb6nqxDDHs0pk0aRKFhYVMmTIFt9sdq9NWoxxNwTsUyIhw0I3KHB239xZCiFQWkyv8devWMXXqVNq3b89ll10GQJs2bZgyZUosTl+Nyn0QjQt8M0G5QYfAcQyq4SSUo0Vc3lMIIVJdTAK/U6dOrFmzJhanqhWlXKgGD6JzboXQOjAaopwdE/b+QgiRilJ6lpIyGoC7n9VlCCFESpClFYQQIk1I4AshRJpIu8DXOow2S9A6ZHUpQgiRUCndh18XWgfR+ydD+cug/YAT7b0IlXs7SnmtLk8IIeIufQJ/783g/xSo+OmRIPheR4e+hcavoFTafdgRQqSZtEg5HVxzWNgf4IfQGgh8bkVZQgiRUGkR+AQ+A6L02etydMWHCS1HCCGskB6Bj0H0piqQTVOEEGkgPQLfMxBQkY+pDFTG4ISWI4QQVkiLwFfOduAdDhw+GicD3CeDq48VZQkhREKlTV+Gyn0Q7cyDsqfB3A5GE8i8ApX1O5SKcvUvhBA2kj6BrxQqaxRkjbK6FCGEsERadOkIIYSQwBdCiLQhgS+EEGlCAl8IIdKEBL4QQqQJCXwhhEgTEvhCCJEmJPCFECJNSOALIUSakMAXQog0IYEvhBBpQgJfCCHShAS+EEKkCQl8IYRIExL4QgiRJiTwhRAiTUjgCyFEmpDAF0KINCGBL4QQaUICXwgh0oQEvhBCpAkJfCGESBMS+EIIkSZiFvjjx49n4MCBdOnShbVr18bqtEIIIWIkZoF/5plnMmPGDFq3bh2rUwohhIghZ6xO1K9fv1idSghhgb07S3jnmQ9ZuXANTVo3Zsj1Z9PphOOtLkvEUMwCXwiRulYvXsedZz9EKBgiUBHEcBh8OONTLr39Aq4Yd2mdz7d9ww7mvfQJxUV7yTu5M6ddcgruDHccKhd1obTWOpYnHDhwIFOnTqVz5861er7f76ewsDCWJdSSxu3YiVJh/KEWyP1rka7MsMmj501m/57yasdcGU6umzqKtt1b1fp8n726hP/982O0hnAwjDvThTvDxQ3TRtP02MaxLF0A+fn5eDyeWj03aa7w61J0fWn/InTpvRDeCSgwMiH7LozMC+p0noKCAvr27RufIi1itzbZrT0Q+zZ9PX8F4aAZ8VgoEGbt/I0Mv2Jorc61tuB75v1rAaFAuOqxQHmQoC/Eq2Nn89yqJ1BKHfIa+T86OkdzsZx2l7VmYDm6+AYI/whUAD4wd0PpvZi+d6wuT4iE27Ntb9Rj2tTs2LSr1uea+c93CVYEq59Ha3Zt2cPar74/qhpFbMQs8B9++GFOPfVUtm/fzlVXXcX5558fq1PHhA4swdx9Key5hMqgP1wF7JtAjHu4hEh6x/U4FjMc+fve5XbS9aSOtT7XlrXbMM3I5zIcBkUbdx5VjSI2Yhb4f/7zn/n0009ZuXIlCxcuZO7cubE6db1p/2foPb+D4LKan2juAnNPYooSIkkc37Mdx/U4FqfLUe2Yw+Vg2B8G1/pcx/Voi+GIHCvhUJhdm/cw9+l5rF68Ti6uLJA0ffjxorVGl44j8lV9tWeDkpEEIv089Pad3HPeo2xavQWtweEwUIZi3Bu30bxt01qfZ8RN5/Ph9AX4fYFDHjcMRaAiyIvj/kM4bKIUtOncikffvSfWTRE1sH3gY26D8I7aPdfVE2XkxLceIZJQw2YNmLJkPGu++p4flm+gQbNcThzcG5fbVafztO/elpun3cDfrpmKMhTBigBurxt/uR+toXyfr+q5P6zYyL1DH+OqJy+JdXNEFPYPfG0C6ghPMkB5UbkPJKIiIZJWl34d6NKvQ51es33DDmY88gZfvP0VyjA49ZKTefKr8axYsIqSXaX88M0GFs5cTDh06EggM2Tywzeb2LqmCJsN0kla9g98R2swGlVe6UejsqHxGyhXu8TVJYQNbF67lRv7j6WirKIq0Oc8NY+PX13IkwUTaN62KbcNvL9a2B8Q9AfZ8PWPMCqRVacv2w/LVEqhcu+j5t9tIZQuSlRJQtjG5Jueo7zUd0igh4Nh9hWX8ezd/wbA4625W+jHlTVcjImYsn3gA6iMM8FVw1o/OgjBbxNXkBA2EKgIsGx+YcTRNmbY5LM3vgCgQ5+a1+Px7avNgAoRC/bv0jnAnQ/Br4BQhIMuMBomuiJhc/sDAWavWcWqXTtpnZPLhXndaZaVZXVZMRMOhWs8HgpWHs//ZVdcbifBQPWfPcNh0KSN/OwlStoEvvJehC57mciBb4Ln7ESXJGxsxY4iRr/5OiHTxBcK4nE4eOLLzxl/9iCGdu5qdXkx4c320qpDC35cszXi8QMTtvqe3ZPM3ExKdpVWe47T7aT/RSfEtU7xs7To0gFQzg6Q/Qcgg59H7Tgqv24wAWVkW1ecsJVgOMyVs95gX8CPL1S5zIA/HKYiHOLOee+xZV/14EtV10+8Ao+3+twVj9fNNY+PBsDhdPDwnLvIzPXiyax8rtPlxJ3h4vqJV9C8fZOE1pzO0uYKH8D
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"pca = PCA(n_components=2)\n",
"pca.fit(X_test)\n",
"X_pca = pca.transform(X_test)\n",
"\n",
"df_pred = pd.DataFrame(predictions).replace({'Virginica': 0, 'Versicolor': 1, \"Setosa\": 2}, regex=True)\n",
"df_pred = np.array(df_pred).reshape(1, -1)\n",
"plt.scatter(X_pca[:, 0], X_pca[:, 1], c=df_pred[0], s=50, cmap='viridis')"
]
2022-05-17 17:30:50 +02:00
}
],
"metadata": {
"colab": {
"collapsed_sections": [],
"name": "naive_bayes.ipynb",
"provenance": []
},
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
}
},
"nbformat": 4,
"nbformat_minor": 4
}