This commit is contained in:
JPogodzinski 2021-05-31 22:09:09 +02:00
parent a569e157b4
commit 67b0edfffd
4 changed files with 148 additions and 89 deletions

21
age_sex_heart_attack.csv Normal file
View File

@ -0,0 +1,21 @@
age,sex,heart attack,no heart attack
"[30, 35]",0,1.0,0.0
"[30, 35]",1,1.0,0.0
"[35, 40]",0,0.8495212038303693,0.15047879616963064
"[35, 40]",1,0.6055487402094931,0.39445125979050677
"[40, 45]",0,0.8827292110874201,0.11727078891257994
"[40, 45]",1,0.6717964824120602,0.3282035175879397
"[45, 50]",0,0.8070175438596491,0.19298245614035084
"[45, 50]",1,0.532089552238806,0.467910447761194
"[50, 55]",0,0.8411934552454283,0.1588065447545717
"[50, 55]",1,0.5902317880794702,0.40976821192052987
"[55, 60]",0,0.6172962226640158,0.3827037773359841
"[55, 60]",1,0.3048890578229676,0.6951109421770323
"[60, 65]",0,0.5713589868485144,0.4286410131514856
"[60, 65]",1,0.26603893680999097,0.7339610631900091
"[65, 70]",0,0.7016949152542373,0.29830508474576267
"[65, 70]",1,0.39011490060186027,0.6098850993981397
"[70, 75]",0,0.8070175438596491,0.19298245614035084
"[70, 75]",1,0.532089552238806,0.467910447761194
"[75, 80]",0,0.7150259067357513,0.2849740932642487
"[75, 80]",1,0.4055745164960181,0.5944254835039817
1 age sex heart attack no heart attack
2 [30, 35] 0 1.0 0.0
3 [30, 35] 1 1.0 0.0
4 [35, 40] 0 0.8495212038303693 0.15047879616963064
5 [35, 40] 1 0.6055487402094931 0.39445125979050677
6 [40, 45] 0 0.8827292110874201 0.11727078891257994
7 [40, 45] 1 0.6717964824120602 0.3282035175879397
8 [45, 50] 0 0.8070175438596491 0.19298245614035084
9 [45, 50] 1 0.532089552238806 0.467910447761194
10 [50, 55] 0 0.8411934552454283 0.1588065447545717
11 [50, 55] 1 0.5902317880794702 0.40976821192052987
12 [55, 60] 0 0.6172962226640158 0.3827037773359841
13 [55, 60] 1 0.3048890578229676 0.6951109421770323
14 [60, 65] 0 0.5713589868485144 0.4286410131514856
15 [60, 65] 1 0.26603893680999097 0.7339610631900091
16 [65, 70] 0 0.7016949152542373 0.29830508474576267
17 [65, 70] 1 0.39011490060186027 0.6098850993981397
18 [70, 75] 0 0.8070175438596491 0.19298245614035084
19 [70, 75] 1 0.532089552238806 0.467910447761194
20 [75, 80] 0 0.7150259067357513 0.2849740932642487
21 [75, 80] 1 0.4055745164960181 0.5944254835039817

36
charts.R Normal file
View File

@ -0,0 +1,36 @@
basedf<-read.csv("dev/matma_bayes/heart.csv", header = TRUE)
siha<-read.csv('dev/matma_bayes/sex_if_heart_attack.csv')
df2<-read.csv('dev/matma_bayes/age_sex_heart_attack.csv')
basedf$sex[basedf$sex==1]<-'Man'
basedf$sex[basedf$sex==0]<-'Woman'
df2$sex[df2$sex==1]<-'Man'
df2$sex[df2$sex==0]<-'Woman'
df2$x<-paste(df2$sex,df2$age)
df2$comb<-
basedf%>%
count(sex)%>% arrange(n)%>%
hchart(type = "pie", hcaes(x=sex, y = n, color = n), name="Quantity") %>%
hc_title(text="<b>Sex in dataset</b>")
siha%>%
hchart(type = "pie", hcaes(x=sex, y = Probability, color = Probability, name='Probability')) %>%
hc_title(text="<b>Sex if heart attack</b>")
basedf%>%
group_by(sex, chol)%>%
count()%>%
hchart('area', hcaes(x='chol', y='n', group='sex'))%>%
hc_xAxis(title=list(text='Cholesterol'))%>%
hc_yAxis(title=list(text='Quantity'))%>%
hc_title(text="<b>Number of terrorists attacks in each year by region</b>")
df2%>%
hchart('column', hcaes(x=x, y=heart.attack, group=age))%>%
hc_plotOptions(column = list(stacking = "normal"))%>%
hc_xAxis(title=list(text='Age'))%>%
hc_yAxis(title=list(text='Probability'))%>%
hc_legend(enabled=F)
hc_title(text="<b>Probability of heart attack for sex in age group</b>")

177
main.py
View File

@ -1,93 +1,92 @@
import pandas as pd
data=pd.read_csv('heart.csv')
print(data)
men=0
for i in range(len(data['sex'])):
if data['sex'][i] ==1:
men+=1
print(men)
p_men=men/len(data)
print("ssssss",p_men)
chol=0
for i in range(len(data['chol'])):
if data['chol'][i] >200:
chol+=1
print(chol)
p_chol=chol/len(data)
print(p_chol)
age_over_50=0
for i in range(len(data['age'])):
if data['age'][i] >50:
age_over_50+=1
print(age_over_50)
p_age_over_50=age_over_50/len(data)
print(p_age_over_50)
sugar=0
for i in range(len(data['fbs'])):
if data['fbs'][i] ==1:
sugar+=1
print(sugar)
p_sugar=sugar/len(data)
print(p_sugar)
heart_attack=0
for i in range(len(data['target'])):
if data['target'][i] ==1:
heart_attack+=1
print(heart_attack)
p_heart_attack=heart_attack/len(data)
print(p_heart_attack) #P(class)
man_if_heart_attack=0
for i in range(len(data['target'])):
if data['target'][i] ==1:
if data['sex'][i]==1:
man_if_heart_attack+=1
print(man_if_heart_attack)
p_man_if_heart_attack=man_if_heart_attack/len(data)
p_man_if_heart_attack=p_man_if_heart_attack/p_heart_attack
print(p_man_if_heart_attack)
over50_if_heart_attack=0
for i in range(len(data['target'])):
if data['target'][i] ==1:
if data['age'][i]>50:
over50_if_heart_attack+=1
print(over50_if_heart_attack)
p_over50_if_heart_attack=over50_if_heart_attack/len(data)
p_over50_if_heart_attack=p_over50_if_heart_attack/p_heart_attack
print(p_over50_if_heart_attack)
chol_over200_if_heart_attack=0
for i in range(len(data['target'])):
if data['target'][i] ==1:
if data['chol'][i]>200:
chol_over200_if_heart_attack+=1
print(chol_over200_if_heart_attack)
p_chol_over200_if_heart_attack=chol_over200_if_heart_attack/len(data)
p_chol_over200_if_heart_attack=p_chol_over200_if_heart_attack/p_heart_attack
print(p_chol_over200_if_heart_attack)
sugar_if_heart_attack=0
for i in range(len(data['target'])):
if data['target'][i] ==1:
if data['fbs'][i]==1:
sugar_if_heart_attack+=1
print(sugar_if_heart_attack)
p_sugar_if_heart_attack=sugar_if_heart_attack/len(data)
p_sugar_if_heart_attack=p_sugar_if_heart_attack/p_heart_attack
print(p_sugar_if_heart_attack)
import numpy
import csv
data = pd.read_csv('heart.csv')
licznik=p_man_if_heart_attack*p_sugar_if_heart_attack*p_over50_if_heart_attack*p_chol_over200_if_heart_attack*p_heart_attack
mianownik=(p_man_if_heart_attack*p_sugar_if_heart_attack*p_over50_if_heart_attack*p_chol_over200_if_heart_attack*p_heart_attack)+((1-p_man_if_heart_attack)*(1-p_sugar_if_heart_attack)*(1-p_over50_if_heart_attack)*(1-p_chol_over200_if_heart_attack)*(1-p_heart_attack))
def p_target(target):
count = 0
for i in data['target']:
if i == target:
count += 1
return count / len(data)
def p_property(property_name, value):
count = 0
if value == 0 or value == 1:
for i in data[property_name]:
if i == value:
count += 1
else:
for i in data[property_name]:
if value[0] <= i and i < value[1]:
count += 1
return count / len(data)
def p_property_if_target(property_name, property_value, target):
property_if_target = 0
count_target = 0
for i in range(len(data['target'])):
if data['target'][i] == target:
count_target += 1
if property_value == 0 or property_value == 1:
if data[property_name][i] == property_value:
property_if_target += 1
else:
if property_value[0] <= data[property_name][i] and data[property_name][i] < property_value[1]:
property_if_target += 1
return property_if_target / count_target
def p_target_if_property(properties_values, target):
p_properties_if_target = []
for p in properties_values:
p_properties_if_target.append(p_property_if_target(p[0], p[1], target))
numerator = numpy.prod(p_properties_if_target) * p_target(target)
print(numerator)
p_properties_if_target = [[], []]
for t in [0, 1]:
for p in properties_values:
p_properties_if_target[t].append(p_property_if_target(p[0], p[1], t))
denominator = sum([numpy.prod(p_properties_if_target[t]) * p_target(t) for t in [0, 1]]) # coś tutaj jest nie tak
print(denominator)
return numerator / denominator
print(p_property('sex', value=1), p_property('sex', value=0))
print(p_property('chol', value=[200, 1000]))
print(p_property('age', value=[50, 1000]))
print(p_property('fbs', value=1))
print(p_property('target', value=1))
print(p_property_if_target('sex', 1, 1))
print(p_property_if_target('age', [51, 1000], 1))
print(p_property_if_target('chol', [201, 1000], 1))
print(p_property_if_target('fbs', 1, 1))
print()
print(p_target_if_property([('sex', 1), ('chol', [201, 1000]), ('age', [51, 120]), ('fbs', 1)], 1))
print(p_property_if_target('sex', 1, 1))
print(p_property_if_target('sex', 0, 1))
d={'sex':['Woman', 'Man'], 'Probability': [p_property_if_target('sex', 0, 1), p_property_if_target('sex', 1, 1)]}
p=pd.DataFrame(d)
pd.DataFrame(p).to_csv('sex_if_heart_attack.csv',index=False, header=True)
age_sex_target = []
for a in range(30, 80, 5):
print(a, a+5)
for i in [0,1]:
age_sex_target.append([[a, a + 5], i, p_target_if_property([('sex', i), ('age', [a, a + 5])], 1), p_target_if_property([('sex', i), ('age', [a, a + 5])], 0)])
with open('age_sex_heart_attack.csv', 'w', newline='') as file:
writer = csv.writer(file)
writer.writerow(["age", "sex", "heart attack", "no heart attack"])
for i in age_sex_target:
writer.writerow(i)
print(licznik)
print(mianownik)
print(licznik/mianownik)

3
sex_if_heart_attack.csv Normal file
View File

@ -0,0 +1,3 @@
sex,Probability
Woman,0.43636363636363634
Man,0.5636363636363636
1 sex Probability
2 Woman 0.43636363636363634
3 Man 0.5636363636363636