final commit
This commit is contained in:
commit
3f5fac6712
13290
bayes_1 .html
Normal file
13290
bayes_1 .html
Normal file
File diff suppressed because it is too large
Load Diff
177
bayes_1.ipynb
Normal file
177
bayes_1.ipynb
Normal file
@ -0,0 +1,177 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 1,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"from sklearn.datasets import fetch_20newsgroups\n",
|
||||||
|
"from sklearn.feature_extraction.text import TfidfVectorizer\n",
|
||||||
|
"import numpy as np\n",
|
||||||
|
"import sklearn.metrics\n",
|
||||||
|
"import gensim"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 2,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"newsgroups = fetch_20newsgroups()\n",
|
||||||
|
"newsgroups_text = newsgroups['data']\n",
|
||||||
|
"newsgroups_text_tokenized = [list(set(gensim.utils.tokenize(x, lowercase = True))) for x in newsgroups_text]"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 3,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"Y = newsgroups['target']\n",
|
||||||
|
"Y_names = newsgroups['target_names']"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 11,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"def get_prob3(index, document_tokenized):\n",
|
||||||
|
" talks_topic = [x for x,y in zip(newsgroups_text_tokenized,Y) if y == index]\n",
|
||||||
|
"\n",
|
||||||
|
" if len(talks_topic) == 0:\n",
|
||||||
|
" return 0.0\n",
|
||||||
|
" \n",
|
||||||
|
" p1_list = []\n",
|
||||||
|
" for word in document_tokenized:\n",
|
||||||
|
" to_p1 = len([x for x in talks_topic if word in x]) / len(talks_topic)\n",
|
||||||
|
" p1_list.append(to_p1)\n",
|
||||||
|
" \n",
|
||||||
|
" p1 = np.prod(p1_list)\n",
|
||||||
|
" \n",
|
||||||
|
" p2 = len(talks_topic) / len(Y)\n",
|
||||||
|
" \n",
|
||||||
|
" return (p1 * p2) # / p3 --- mianownik dla wszystkich klas będzie taki sam dlatego można go pominąć, bez \n",
|
||||||
|
" # bez wpływu na działanie klasyfikatora"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 15,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"def print_results(list_of_words):\n",
|
||||||
|
" probs = []\n",
|
||||||
|
" for i in range(len(Y_names)):\n",
|
||||||
|
" p = get_prob3(i, list_of_words)\n",
|
||||||
|
" probs.append(p)\n",
|
||||||
|
" print(\"%.5f\" % p,'\\t\\t', Y_names[i])"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 17,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"0.00001 \t\t alt.atheism\n",
|
||||||
|
"0.00000 \t\t comp.graphics\n",
|
||||||
|
"0.00000 \t\t comp.os.ms-windows.misc\n",
|
||||||
|
"0.00000 \t\t comp.sys.ibm.pc.hardware\n",
|
||||||
|
"0.00000 \t\t comp.sys.mac.hardware\n",
|
||||||
|
"0.00000 \t\t comp.windows.x\n",
|
||||||
|
"0.00000 \t\t misc.forsale\n",
|
||||||
|
"0.00000 \t\t rec.autos\n",
|
||||||
|
"0.00002 \t\t rec.motorcycles\n",
|
||||||
|
"0.00000 \t\t rec.sport.baseball\n",
|
||||||
|
"0.00001 \t\t rec.sport.hockey\n",
|
||||||
|
"0.00001 \t\t sci.crypt\n",
|
||||||
|
"0.00000 \t\t sci.electronics\n",
|
||||||
|
"0.00000 \t\t sci.med\n",
|
||||||
|
"0.00000 \t\t sci.space\n",
|
||||||
|
"0.00000 \t\t soc.religion.christian\n",
|
||||||
|
"0.00087 \t\t talk.politics.guns\n",
|
||||||
|
"0.00003 \t\t talk.politics.mideast\n",
|
||||||
|
"0.00005 \t\t talk.politics.misc\n",
|
||||||
|
"0.00006 \t\t talk.religion.misc\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"print_results(['i','love','guns'])"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 19,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"0.00004 \t\t alt.atheism\n",
|
||||||
|
"0.00000 \t\t comp.graphics\n",
|
||||||
|
"0.00000 \t\t comp.os.ms-windows.misc\n",
|
||||||
|
"0.00000 \t\t comp.sys.ibm.pc.hardware\n",
|
||||||
|
"0.00000 \t\t comp.sys.mac.hardware\n",
|
||||||
|
"0.00000 \t\t comp.windows.x\n",
|
||||||
|
"0.00000 \t\t misc.forsale\n",
|
||||||
|
"0.00000 \t\t rec.autos\n",
|
||||||
|
"0.00000 \t\t rec.motorcycles\n",
|
||||||
|
"0.00000 \t\t rec.sport.baseball\n",
|
||||||
|
"0.00000 \t\t rec.sport.hockey\n",
|
||||||
|
"0.00000 \t\t sci.crypt\n",
|
||||||
|
"0.00000 \t\t sci.electronics\n",
|
||||||
|
"0.00000 \t\t sci.med\n",
|
||||||
|
"0.00000 \t\t sci.space\n",
|
||||||
|
"0.00012 \t\t soc.religion.christian\n",
|
||||||
|
"0.00004 \t\t talk.politics.guns\n",
|
||||||
|
"0.00007 \t\t talk.politics.mideast\n",
|
||||||
|
"0.00003 \t\t talk.politics.misc\n",
|
||||||
|
"0.00008 \t\t talk.religion.misc\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"print_results(['is','there','life','after','death'])"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": null,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": []
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.7.4"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 2
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user