testBayes
This commit is contained in:
parent
9fcd29016f
commit
52937d1152
182
TestBayes.ipynb
Normal file
182
TestBayes.ipynb
Normal file
@ -0,0 +1,182 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"import numpy as np \n",
|
||||||
|
"import pandas as pd \n",
|
||||||
|
"import matplotlib.pyplot as plt \n",
|
||||||
|
"import math\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"def accuracy_score(y_true, y_pred):\n",
|
||||||
|
"\n",
|
||||||
|
" \"\"\"\tscore = (y_true - y_pred) / len(y_true) \"\"\"\n",
|
||||||
|
"\n",
|
||||||
|
" return round(float(sum(y_pred == y_true))/float(len(y_true)) * 100 ,2)\n",
|
||||||
|
"\n",
|
||||||
|
"def pre_processing(df):\n",
|
||||||
|
"\n",
|
||||||
|
" \"\"\" partioning data into features and target \"\"\"\n",
|
||||||
|
"\n",
|
||||||
|
" X = df.drop([df.columns[-1]], axis = 1)\n",
|
||||||
|
" y = df[df.columns[-1]]\n",
|
||||||
|
"\n",
|
||||||
|
" return X, y\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"class NaiveBayes:\n",
|
||||||
|
"\n",
|
||||||
|
"def __init__(self):\n",
|
||||||
|
"\n",
|
||||||
|
" self.features = list\n",
|
||||||
|
" self.likelihoods = {}\n",
|
||||||
|
" self.class_priors = {}\n",
|
||||||
|
" self.pred_priors = {}\n",
|
||||||
|
"\n",
|
||||||
|
" self.X_train = np.array\n",
|
||||||
|
" self.y_train = np.array\n",
|
||||||
|
" self.train_size = int\n",
|
||||||
|
" self.num_feats = int\n",
|
||||||
|
"\n",
|
||||||
|
"\tdef fit(self, X, y):\n",
|
||||||
|
"\n",
|
||||||
|
" self.features = list(X.columns)\n",
|
||||||
|
" self.X_train = X\n",
|
||||||
|
" self.y_train = y\n",
|
||||||
|
" self.train_size = X.shape[0]\n",
|
||||||
|
" self.num_feats = X.shape[1]\n",
|
||||||
|
"\n",
|
||||||
|
" for feature in self.features:\n",
|
||||||
|
" self.likelihoods[feature] = {}\n",
|
||||||
|
" self.pred_priors[feature] = {}\n",
|
||||||
|
"\n",
|
||||||
|
" for feat_val in np.unique(self.X_train[feature]):\n",
|
||||||
|
" self.pred_priors[feature].update({feat_val: 0})\n",
|
||||||
|
"\n",
|
||||||
|
" for outcome in np.unique(self.y_train):\n",
|
||||||
|
" self.likelihoods[feature].update({feat_val+'_'+outcome:0})\n",
|
||||||
|
" self.class_priors.update({outcome: 0})\n",
|
||||||
|
"\n",
|
||||||
|
" self._calc_class_prior()\n",
|
||||||
|
" self._calc_likelihoods()\n",
|
||||||
|
" self._calc_predictor_prior()\n",
|
||||||
|
"\n",
|
||||||
|
" def _calc_class_prior(self):\n",
|
||||||
|
"\n",
|
||||||
|
" \"\"\" P(c) - Prior Class Probability \"\"\"\n",
|
||||||
|
"\n",
|
||||||
|
" for outcome in np.unique(self.y_train):\n",
|
||||||
|
" outcome_count = sum(self.y_train == outcome)\n",
|
||||||
|
" self.class_priors[outcome] = outcome_count / self.train_size\n",
|
||||||
|
"\n",
|
||||||
|
" def _calc_likelihoods(self):\n",
|
||||||
|
"\n",
|
||||||
|
" \"\"\" P(x|c) - Likelihood \"\"\"\n",
|
||||||
|
"\n",
|
||||||
|
" for feature in self.features:\n",
|
||||||
|
"\n",
|
||||||
|
" for outcome in np.unique(self.y_train):\n",
|
||||||
|
" outcome_count = sum(self.y_train == outcome)\n",
|
||||||
|
" feat_likelihood = self.X_train[feature][self.y_train[self.y_train == outcome].index.values.tolist()].value_counts().to_dict()\n",
|
||||||
|
"\n",
|
||||||
|
" for feat_val, count in feat_likelihood.items():\n",
|
||||||
|
" self.likelihoods[feature][feat_val + '_' + outcome] = count/outcome_count\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
" def _calc_predictor_prior(self):\n",
|
||||||
|
"\n",
|
||||||
|
" \"\"\" P(x) - Evidence \"\"\"\n",
|
||||||
|
"\n",
|
||||||
|
" for feature in self.features:\n",
|
||||||
|
" feat_vals = self.X_train[feature].value_counts().to_dict()\n",
|
||||||
|
"\n",
|
||||||
|
" for feat_val, count in feat_vals.items():\n",
|
||||||
|
" self.pred_priors[feature][feat_val] = count/self.train_size\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
" def predict(self, X):\n",
|
||||||
|
"\n",
|
||||||
|
" \"\"\" Calculates Posterior probability P(c|x) \"\"\"\n",
|
||||||
|
"\n",
|
||||||
|
" results = []\n",
|
||||||
|
" X = np.array(X)\n",
|
||||||
|
"\n",
|
||||||
|
" for query in X:\n",
|
||||||
|
" probs_outcome = {}\n",
|
||||||
|
" for outcome in np.unique(self.y_train):\n",
|
||||||
|
" prior = self.class_priors[outcome]\n",
|
||||||
|
" likelihood = 1\n",
|
||||||
|
" evidence = 1\n",
|
||||||
|
"\n",
|
||||||
|
" for feat, feat_val in zip(self.features, query):\n",
|
||||||
|
" likelihood *= self.likelihoods[feat][feat_val + '_' + outcome]\n",
|
||||||
|
" evidence *= self.pred_priors[feat][feat_val]\n",
|
||||||
|
"\n",
|
||||||
|
" posterior = (likelihood * prior) / (evidence)\n",
|
||||||
|
"\n",
|
||||||
|
" probs_outcome[outcome] = posterior\n",
|
||||||
|
"\n",
|
||||||
|
" result = max(probs_outcome, key = lambda x: probs_outcome[x])\n",
|
||||||
|
" results.append(result)\n",
|
||||||
|
"\n",
|
||||||
|
" return np.array(results)\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"if __name__ == \"__main__\":\n",
|
||||||
|
"\n",
|
||||||
|
" #Weather Dataset\n",
|
||||||
|
" print(\"\\nWeather Dataset:\")\n",
|
||||||
|
"\n",
|
||||||
|
" df = pd.read_table(\"../Data/weather.txt\")\n",
|
||||||
|
" #print(df)\n",
|
||||||
|
"\n",
|
||||||
|
" #Split fearures and target\n",
|
||||||
|
" X,y = pre_processing(df)\n",
|
||||||
|
"\n",
|
||||||
|
" nb_clf = NaiveBayes()\n",
|
||||||
|
" nb_clf.fit(X, y)\n",
|
||||||
|
"\n",
|
||||||
|
" print(\"Train Accuracy: {}\".format(accuracy_score(y, nb_clf.predict(X))))\n",
|
||||||
|
"\n",
|
||||||
|
" #Query 1:\n",
|
||||||
|
" query = np.array([['Rainy','Mild', 'Normal', 't']])\n",
|
||||||
|
" print(\"Query 1:- {} ---> {}\".format(query, nb_clf.predict(query)))\n",
|
||||||
|
"\n",
|
||||||
|
" #Query 2:\n",
|
||||||
|
" query = np.array([['Overcast','Cool', 'Normal', 't']])\n",
|
||||||
|
" print(\"Query 2:- {} ---> {}\".format(query, nb_clf.predict(query)))\n",
|
||||||
|
"\n",
|
||||||
|
" #Query 3:\n",
|
||||||
|
" query = np.array([['Sunny','Hot', 'High', 't']])\n",
|
||||||
|
" print(\"Query 3:- {} ---> {}\".format(query, nb_clf.predict(query)))"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.8.5"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 4
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user