This commit is contained in:
s434766 2021-05-27 22:13:52 +02:00
commit 50baf055eb

182
TestBayes.ipynb Normal file
View File

@ -0,0 +1,182 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np \n",
"import pandas as pd \n",
"import matplotlib.pyplot as plt \n",
"import math\n",
"\n",
"\n",
"def accuracy_score(y_true, y_pred):\n",
"\n",
" \"\"\"\tscore = (y_true - y_pred) / len(y_true) \"\"\"\n",
"\n",
" return round(float(sum(y_pred == y_true))/float(len(y_true)) * 100 ,2)\n",
"\n",
"def pre_processing(df):\n",
"\n",
" \"\"\" partioning data into features and target \"\"\"\n",
"\n",
" X = df.drop([df.columns[-1]], axis = 1)\n",
" y = df[df.columns[-1]]\n",
"\n",
" return X, y\n",
"\n",
"\n",
"\n",
"class NaiveBayes:\n",
"\n",
"def __init__(self):\n",
"\n",
" self.features = list\n",
" self.likelihoods = {}\n",
" self.class_priors = {}\n",
" self.pred_priors = {}\n",
"\n",
" self.X_train = np.array\n",
" self.y_train = np.array\n",
" self.train_size = int\n",
" self.num_feats = int\n",
"\n",
"\tdef fit(self, X, y):\n",
"\n",
" self.features = list(X.columns)\n",
" self.X_train = X\n",
" self.y_train = y\n",
" self.train_size = X.shape[0]\n",
" self.num_feats = X.shape[1]\n",
"\n",
" for feature in self.features:\n",
" self.likelihoods[feature] = {}\n",
" self.pred_priors[feature] = {}\n",
"\n",
" for feat_val in np.unique(self.X_train[feature]):\n",
" self.pred_priors[feature].update({feat_val: 0})\n",
"\n",
" for outcome in np.unique(self.y_train):\n",
" self.likelihoods[feature].update({feat_val+'_'+outcome:0})\n",
" self.class_priors.update({outcome: 0})\n",
"\n",
" self._calc_class_prior()\n",
" self._calc_likelihoods()\n",
" self._calc_predictor_prior()\n",
"\n",
" def _calc_class_prior(self):\n",
"\n",
" \"\"\" P(c) - Prior Class Probability \"\"\"\n",
"\n",
" for outcome in np.unique(self.y_train):\n",
" outcome_count = sum(self.y_train == outcome)\n",
" self.class_priors[outcome] = outcome_count / self.train_size\n",
"\n",
" def _calc_likelihoods(self):\n",
"\n",
" \"\"\" P(x|c) - Likelihood \"\"\"\n",
"\n",
" for feature in self.features:\n",
"\n",
" for outcome in np.unique(self.y_train):\n",
" outcome_count = sum(self.y_train == outcome)\n",
" feat_likelihood = self.X_train[feature][self.y_train[self.y_train == outcome].index.values.tolist()].value_counts().to_dict()\n",
"\n",
" for feat_val, count in feat_likelihood.items():\n",
" self.likelihoods[feature][feat_val + '_' + outcome] = count/outcome_count\n",
"\n",
"\n",
" def _calc_predictor_prior(self):\n",
"\n",
" \"\"\" P(x) - Evidence \"\"\"\n",
"\n",
" for feature in self.features:\n",
" feat_vals = self.X_train[feature].value_counts().to_dict()\n",
"\n",
" for feat_val, count in feat_vals.items():\n",
" self.pred_priors[feature][feat_val] = count/self.train_size\n",
"\n",
"\n",
" def predict(self, X):\n",
"\n",
" \"\"\" Calculates Posterior probability P(c|x) \"\"\"\n",
"\n",
" results = []\n",
" X = np.array(X)\n",
"\n",
" for query in X:\n",
" probs_outcome = {}\n",
" for outcome in np.unique(self.y_train):\n",
" prior = self.class_priors[outcome]\n",
" likelihood = 1\n",
" evidence = 1\n",
"\n",
" for feat, feat_val in zip(self.features, query):\n",
" likelihood *= self.likelihoods[feat][feat_val + '_' + outcome]\n",
" evidence *= self.pred_priors[feat][feat_val]\n",
"\n",
" posterior = (likelihood * prior) / (evidence)\n",
"\n",
" probs_outcome[outcome] = posterior\n",
"\n",
" result = max(probs_outcome, key = lambda x: probs_outcome[x])\n",
" results.append(result)\n",
"\n",
" return np.array(results)\n",
"\n",
"\n",
"\n",
"if __name__ == \"__main__\":\n",
"\n",
" #Weather Dataset\n",
" print(\"\\nWeather Dataset:\")\n",
"\n",
" df = pd.read_table(\"../Data/weather.txt\")\n",
" #print(df)\n",
"\n",
" #Split fearures and target\n",
" X,y = pre_processing(df)\n",
"\n",
" nb_clf = NaiveBayes()\n",
" nb_clf.fit(X, y)\n",
"\n",
" print(\"Train Accuracy: {}\".format(accuracy_score(y, nb_clf.predict(X))))\n",
"\n",
" #Query 1:\n",
" query = np.array([['Rainy','Mild', 'Normal', 't']])\n",
" print(\"Query 1:- {} ---> {}\".format(query, nb_clf.predict(query)))\n",
"\n",
" #Query 2:\n",
" query = np.array([['Overcast','Cool', 'Normal', 't']])\n",
" print(\"Query 2:- {} ---> {}\".format(query, nb_clf.predict(query)))\n",
"\n",
" #Query 3:\n",
" query = np.array([['Sunny','Hot', 'High', 't']])\n",
" print(\"Query 3:- {} ---> {}\".format(query, nb_clf.predict(query)))"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.5"
}
},
"nbformat": 4,
"nbformat_minor": 4
}