From 67b0edfffd37541b906c42536d94939cbed2aa7e Mon Sep 17 00:00:00 2001 From: JPogodzinski Date: Mon, 31 May 2021 22:09:09 +0200 Subject: [PATCH] addded r --- age_sex_heart_attack.csv | 21 +++++ charts.R | 36 ++++++++ main.py | 177 +++++++++++++++++++-------------------- sex_if_heart_attack.csv | 3 + 4 files changed, 148 insertions(+), 89 deletions(-) create mode 100644 age_sex_heart_attack.csv create mode 100644 charts.R create mode 100644 sex_if_heart_attack.csv diff --git a/age_sex_heart_attack.csv b/age_sex_heart_attack.csv new file mode 100644 index 0000000..c118fa6 --- /dev/null +++ b/age_sex_heart_attack.csv @@ -0,0 +1,21 @@ +age,sex,heart attack,no heart attack +"[30, 35]",0,1.0,0.0 +"[30, 35]",1,1.0,0.0 +"[35, 40]",0,0.8495212038303693,0.15047879616963064 +"[35, 40]",1,0.6055487402094931,0.39445125979050677 +"[40, 45]",0,0.8827292110874201,0.11727078891257994 +"[40, 45]",1,0.6717964824120602,0.3282035175879397 +"[45, 50]",0,0.8070175438596491,0.19298245614035084 +"[45, 50]",1,0.532089552238806,0.467910447761194 +"[50, 55]",0,0.8411934552454283,0.1588065447545717 +"[50, 55]",1,0.5902317880794702,0.40976821192052987 +"[55, 60]",0,0.6172962226640158,0.3827037773359841 +"[55, 60]",1,0.3048890578229676,0.6951109421770323 +"[60, 65]",0,0.5713589868485144,0.4286410131514856 +"[60, 65]",1,0.26603893680999097,0.7339610631900091 +"[65, 70]",0,0.7016949152542373,0.29830508474576267 +"[65, 70]",1,0.39011490060186027,0.6098850993981397 +"[70, 75]",0,0.8070175438596491,0.19298245614035084 +"[70, 75]",1,0.532089552238806,0.467910447761194 +"[75, 80]",0,0.7150259067357513,0.2849740932642487 +"[75, 80]",1,0.4055745164960181,0.5944254835039817 diff --git a/charts.R b/charts.R new file mode 100644 index 0000000..28e3b5b --- /dev/null +++ b/charts.R @@ -0,0 +1,36 @@ +basedf<-read.csv("dev/matma_bayes/heart.csv", header = TRUE) +siha<-read.csv('dev/matma_bayes/sex_if_heart_attack.csv') +df2<-read.csv('dev/matma_bayes/age_sex_heart_attack.csv') +basedf$sex[basedf$sex==1]<-'Man' +basedf$sex[basedf$sex==0]<-'Woman' +df2$sex[df2$sex==1]<-'Man' +df2$sex[df2$sex==0]<-'Woman' +df2$x<-paste(df2$sex,df2$age) +df2$comb<- + +basedf%>% + count(sex)%>% arrange(n)%>% + hchart(type = "pie", hcaes(x=sex, y = n, color = n), name="Quantity") %>% + hc_title(text="Sex in dataset") + + +siha%>% + hchart(type = "pie", hcaes(x=sex, y = Probability, color = Probability, name='Probability')) %>% + hc_title(text="Sex if heart attack") + +basedf%>% + group_by(sex, chol)%>% + count()%>% + hchart('area', hcaes(x='chol', y='n', group='sex'))%>% + hc_xAxis(title=list(text='Cholesterol'))%>% + hc_yAxis(title=list(text='Quantity'))%>% + hc_title(text="Number of terrorists attacks in each year by region") + + +df2%>% + hchart('column', hcaes(x=x, y=heart.attack, group=age))%>% + hc_plotOptions(column = list(stacking = "normal"))%>% + hc_xAxis(title=list(text='Age'))%>% + hc_yAxis(title=list(text='Probability'))%>% + hc_legend(enabled=F) + hc_title(text="Probability of heart attack for sex in age group") diff --git a/main.py b/main.py index 760ac93..8317b4a 100644 --- a/main.py +++ b/main.py @@ -1,93 +1,92 @@ import pandas as pd - -data=pd.read_csv('heart.csv') - -print(data) - -men=0 -for i in range(len(data['sex'])): - if data['sex'][i] ==1: - men+=1 -print(men) -p_men=men/len(data) -print("ssssss",p_men) - -chol=0 -for i in range(len(data['chol'])): - if data['chol'][i] >200: - chol+=1 -print(chol) -p_chol=chol/len(data) -print(p_chol) - -age_over_50=0 -for i in range(len(data['age'])): - if data['age'][i] >50: - age_over_50+=1 -print(age_over_50) -p_age_over_50=age_over_50/len(data) -print(p_age_over_50) - -sugar=0 -for i in range(len(data['fbs'])): - if data['fbs'][i] ==1: - sugar+=1 -print(sugar) -p_sugar=sugar/len(data) -print(p_sugar) - -heart_attack=0 -for i in range(len(data['target'])): - if data['target'][i] ==1: - heart_attack+=1 -print(heart_attack) -p_heart_attack=heart_attack/len(data) -print(p_heart_attack) #P(class) - -man_if_heart_attack=0 -for i in range(len(data['target'])): - if data['target'][i] ==1: - if data['sex'][i]==1: - man_if_heart_attack+=1 -print(man_if_heart_attack) -p_man_if_heart_attack=man_if_heart_attack/len(data) -p_man_if_heart_attack=p_man_if_heart_attack/p_heart_attack -print(p_man_if_heart_attack) - -over50_if_heart_attack=0 -for i in range(len(data['target'])): - if data['target'][i] ==1: - if data['age'][i]>50: - over50_if_heart_attack+=1 -print(over50_if_heart_attack) -p_over50_if_heart_attack=over50_if_heart_attack/len(data) -p_over50_if_heart_attack=p_over50_if_heart_attack/p_heart_attack -print(p_over50_if_heart_attack) - -chol_over200_if_heart_attack=0 -for i in range(len(data['target'])): - if data['target'][i] ==1: - if data['chol'][i]>200: - chol_over200_if_heart_attack+=1 -print(chol_over200_if_heart_attack) -p_chol_over200_if_heart_attack=chol_over200_if_heart_attack/len(data) -p_chol_over200_if_heart_attack=p_chol_over200_if_heart_attack/p_heart_attack -print(p_chol_over200_if_heart_attack) - -sugar_if_heart_attack=0 -for i in range(len(data['target'])): - if data['target'][i] ==1: - if data['fbs'][i]==1: - sugar_if_heart_attack+=1 -print(sugar_if_heart_attack) -p_sugar_if_heart_attack=sugar_if_heart_attack/len(data) -p_sugar_if_heart_attack=p_sugar_if_heart_attack/p_heart_attack -print(p_sugar_if_heart_attack) +import numpy +import csv +data = pd.read_csv('heart.csv') -licznik=p_man_if_heart_attack*p_sugar_if_heart_attack*p_over50_if_heart_attack*p_chol_over200_if_heart_attack*p_heart_attack -mianownik=(p_man_if_heart_attack*p_sugar_if_heart_attack*p_over50_if_heart_attack*p_chol_over200_if_heart_attack*p_heart_attack)+((1-p_man_if_heart_attack)*(1-p_sugar_if_heart_attack)*(1-p_over50_if_heart_attack)*(1-p_chol_over200_if_heart_attack)*(1-p_heart_attack)) +def p_target(target): + count = 0 + for i in data['target']: + if i == target: + count += 1 + return count / len(data) + + +def p_property(property_name, value): + count = 0 + if value == 0 or value == 1: + for i in data[property_name]: + if i == value: + count += 1 + else: + for i in data[property_name]: + if value[0] <= i and i < value[1]: + count += 1 + return count / len(data) + + +def p_property_if_target(property_name, property_value, target): + property_if_target = 0 + count_target = 0 + for i in range(len(data['target'])): + if data['target'][i] == target: + count_target += 1 + if property_value == 0 or property_value == 1: + if data[property_name][i] == property_value: + property_if_target += 1 + else: + if property_value[0] <= data[property_name][i] and data[property_name][i] < property_value[1]: + property_if_target += 1 + return property_if_target / count_target + + +def p_target_if_property(properties_values, target): + p_properties_if_target = [] + for p in properties_values: + p_properties_if_target.append(p_property_if_target(p[0], p[1], target)) + numerator = numpy.prod(p_properties_if_target) * p_target(target) + print(numerator) + p_properties_if_target = [[], []] + for t in [0, 1]: + for p in properties_values: + p_properties_if_target[t].append(p_property_if_target(p[0], p[1], t)) + denominator = sum([numpy.prod(p_properties_if_target[t]) * p_target(t) for t in [0, 1]]) # coś tutaj jest nie tak + print(denominator) + return numerator / denominator + + +print(p_property('sex', value=1), p_property('sex', value=0)) +print(p_property('chol', value=[200, 1000])) +print(p_property('age', value=[50, 1000])) +print(p_property('fbs', value=1)) +print(p_property('target', value=1)) + +print(p_property_if_target('sex', 1, 1)) +print(p_property_if_target('age', [51, 1000], 1)) +print(p_property_if_target('chol', [201, 1000], 1)) +print(p_property_if_target('fbs', 1, 1)) + +print() + +print(p_target_if_property([('sex', 1), ('chol', [201, 1000]), ('age', [51, 120]), ('fbs', 1)], 1)) + + +print(p_property_if_target('sex', 1, 1)) +print(p_property_if_target('sex', 0, 1)) +d={'sex':['Woman', 'Man'], 'Probability': [p_property_if_target('sex', 0, 1), p_property_if_target('sex', 1, 1)]} +p=pd.DataFrame(d) + +pd.DataFrame(p).to_csv('sex_if_heart_attack.csv',index=False, header=True) + +age_sex_target = [] +for a in range(30, 80, 5): + print(a, a+5) + for i in [0,1]: + age_sex_target.append([[a, a + 5], i, p_target_if_property([('sex', i), ('age', [a, a + 5])], 1), p_target_if_property([('sex', i), ('age', [a, a + 5])], 0)]) + +with open('age_sex_heart_attack.csv', 'w', newline='') as file: + writer = csv.writer(file) + writer.writerow(["age", "sex", "heart attack", "no heart attack"]) + for i in age_sex_target: + writer.writerow(i) -print(licznik) -print(mianownik) -print(licznik/mianownik) \ No newline at end of file diff --git a/sex_if_heart_attack.csv b/sex_if_heart_attack.csv new file mode 100644 index 0000000..acb7888 --- /dev/null +++ b/sex_if_heart_attack.csv @@ -0,0 +1,3 @@ +sex,Probability +Woman,0.43636363636363634 +Man,0.5636363636363636