diff --git a/TestBayes.ipynb b/TestBayes.ipynb deleted file mode 100644 index 497667d..0000000 --- a/TestBayes.ipynb +++ /dev/null @@ -1,182 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import numpy as np \n", - "import pandas as pd \n", - "import matplotlib.pyplot as plt \n", - "import math\n", - "\n", - "\n", - "def accuracy_score(y_true, y_pred):\n", - "\n", - " \"\"\"\tscore = (y_true - y_pred) / len(y_true) \"\"\"\n", - "\n", - " return round(float(sum(y_pred == y_true))/float(len(y_true)) * 100 ,2)\n", - "\n", - "def pre_processing(df):\n", - "\n", - " \"\"\" partioning data into features and target \"\"\"\n", - "\n", - " X = df.drop([df.columns[-1]], axis = 1)\n", - " y = df[df.columns[-1]]\n", - "\n", - " return X, y\n", - "\n", - "\n", - "\n", - "class NaiveBayes:\n", - "\n", - "def __init__(self):\n", - "\n", - " self.features = list\n", - " self.likelihoods = {}\n", - " self.class_priors = {}\n", - " self.pred_priors = {}\n", - "\n", - " self.X_train = np.array\n", - " self.y_train = np.array\n", - " self.train_size = int\n", - " self.num_feats = int\n", - "\n", - "\tdef fit(self, X, y):\n", - "\n", - " self.features = list(X.columns)\n", - " self.X_train = X\n", - " self.y_train = y\n", - " self.train_size = X.shape[0]\n", - " self.num_feats = X.shape[1]\n", - "\n", - " for feature in self.features:\n", - " self.likelihoods[feature] = {}\n", - " self.pred_priors[feature] = {}\n", - "\n", - " for feat_val in np.unique(self.X_train[feature]):\n", - " self.pred_priors[feature].update({feat_val: 0})\n", - "\n", - " for outcome in np.unique(self.y_train):\n", - " self.likelihoods[feature].update({feat_val+'_'+outcome:0})\n", - " self.class_priors.update({outcome: 0})\n", - "\n", - " self._calc_class_prior()\n", - " self._calc_likelihoods()\n", - " self._calc_predictor_prior()\n", - "\n", - " def _calc_class_prior(self):\n", - "\n", - " \"\"\" P(c) - Prior Class Probability \"\"\"\n", - "\n", - " for outcome in np.unique(self.y_train):\n", - " outcome_count = sum(self.y_train == outcome)\n", - " self.class_priors[outcome] = outcome_count / self.train_size\n", - "\n", - " def _calc_likelihoods(self):\n", - "\n", - " \"\"\" P(x|c) - Likelihood \"\"\"\n", - "\n", - " for feature in self.features:\n", - "\n", - " for outcome in np.unique(self.y_train):\n", - " outcome_count = sum(self.y_train == outcome)\n", - " feat_likelihood = self.X_train[feature][self.y_train[self.y_train == outcome].index.values.tolist()].value_counts().to_dict()\n", - "\n", - " for feat_val, count in feat_likelihood.items():\n", - " self.likelihoods[feature][feat_val + '_' + outcome] = count/outcome_count\n", - "\n", - "\n", - " def _calc_predictor_prior(self):\n", - "\n", - " \"\"\" P(x) - Evidence \"\"\"\n", - "\n", - " for feature in self.features:\n", - " feat_vals = self.X_train[feature].value_counts().to_dict()\n", - "\n", - " for feat_val, count in feat_vals.items():\n", - " self.pred_priors[feature][feat_val] = count/self.train_size\n", - "\n", - "\n", - " def predict(self, X):\n", - "\n", - " \"\"\" Calculates Posterior probability P(c|x) \"\"\"\n", - "\n", - " results = []\n", - " X = np.array(X)\n", - "\n", - " for query in X:\n", - " probs_outcome = {}\n", - " for outcome in np.unique(self.y_train):\n", - " prior = self.class_priors[outcome]\n", - " likelihood = 1\n", - " evidence = 1\n", - "\n", - " for feat, feat_val in zip(self.features, query):\n", - " likelihood *= self.likelihoods[feat][feat_val + '_' + outcome]\n", - " evidence *= self.pred_priors[feat][feat_val]\n", - "\n", - " posterior = (likelihood * prior) / (evidence)\n", - "\n", - " probs_outcome[outcome] = posterior\n", - "\n", - " result = max(probs_outcome, key = lambda x: probs_outcome[x])\n", - " results.append(result)\n", - "\n", - " return np.array(results)\n", - "\n", - "\n", - "\n", - "if __name__ == \"__main__\":\n", - "\n", - " #Weather Dataset\n", - " print(\"\\nWeather Dataset:\")\n", - "\n", - " df = pd.read_table(\"../Data/weather.txt\")\n", - " #print(df)\n", - "\n", - " #Split fearures and target\n", - " X,y = pre_processing(df)\n", - "\n", - " nb_clf = NaiveBayes()\n", - " nb_clf.fit(X, y)\n", - "\n", - " print(\"Train Accuracy: {}\".format(accuracy_score(y, nb_clf.predict(X))))\n", - "\n", - " #Query 1:\n", - " query = np.array([['Rainy','Mild', 'Normal', 't']])\n", - " print(\"Query 1:- {} ---> {}\".format(query, nb_clf.predict(query)))\n", - "\n", - " #Query 2:\n", - " query = np.array([['Overcast','Cool', 'Normal', 't']])\n", - " print(\"Query 2:- {} ---> {}\".format(query, nb_clf.predict(query)))\n", - "\n", - " #Query 3:\n", - " query = np.array([['Sunny','Hot', 'High', 't']])\n", - " print(\"Query 3:- {} ---> {}\".format(query, nb_clf.predict(query)))" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.8.5" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -}