2021-05-31 19:11:26 +02:00
|
|
|
import pandas as pd
|
2021-05-31 22:09:09 +02:00
|
|
|
import numpy
|
|
|
|
import csv
|
|
|
|
data = pd.read_csv('heart.csv')
|
|
|
|
|
|
|
|
|
|
|
|
def p_target(target):
|
|
|
|
count = 0
|
|
|
|
for i in data['target']:
|
|
|
|
if i == target:
|
|
|
|
count += 1
|
|
|
|
return count / len(data)
|
|
|
|
|
|
|
|
|
|
|
|
def p_property(property_name, value):
|
|
|
|
count = 0
|
|
|
|
if value == 0 or value == 1:
|
|
|
|
for i in data[property_name]:
|
|
|
|
if i == value:
|
|
|
|
count += 1
|
|
|
|
else:
|
|
|
|
for i in data[property_name]:
|
|
|
|
if value[0] <= i and i < value[1]:
|
|
|
|
count += 1
|
|
|
|
return count / len(data)
|
|
|
|
|
|
|
|
|
|
|
|
def p_property_if_target(property_name, property_value, target):
|
|
|
|
property_if_target = 0
|
|
|
|
count_target = 0
|
|
|
|
for i in range(len(data['target'])):
|
|
|
|
if data['target'][i] == target:
|
|
|
|
count_target += 1
|
|
|
|
if property_value == 0 or property_value == 1:
|
|
|
|
if data[property_name][i] == property_value:
|
|
|
|
property_if_target += 1
|
|
|
|
else:
|
|
|
|
if property_value[0] <= data[property_name][i] and data[property_name][i] < property_value[1]:
|
|
|
|
property_if_target += 1
|
|
|
|
return property_if_target / count_target
|
|
|
|
|
|
|
|
|
|
|
|
def p_target_if_property(properties_values, target):
|
|
|
|
p_properties_if_target = []
|
|
|
|
for p in properties_values:
|
|
|
|
p_properties_if_target.append(p_property_if_target(p[0], p[1], target))
|
|
|
|
numerator = numpy.prod(p_properties_if_target) * p_target(target)
|
|
|
|
print(numerator)
|
|
|
|
p_properties_if_target = [[], []]
|
|
|
|
for t in [0, 1]:
|
|
|
|
for p in properties_values:
|
|
|
|
p_properties_if_target[t].append(p_property_if_target(p[0], p[1], t))
|
|
|
|
denominator = sum([numpy.prod(p_properties_if_target[t]) * p_target(t) for t in [0, 1]]) # coś tutaj jest nie tak
|
|
|
|
print(denominator)
|
|
|
|
return numerator / denominator
|
|
|
|
|
|
|
|
|
|
|
|
print(p_property('sex', value=1), p_property('sex', value=0))
|
|
|
|
print(p_property('chol', value=[200, 1000]))
|
|
|
|
print(p_property('age', value=[50, 1000]))
|
|
|
|
print(p_property('fbs', value=1))
|
|
|
|
print(p_property('target', value=1))
|
|
|
|
|
|
|
|
print(p_property_if_target('sex', 1, 1))
|
|
|
|
print(p_property_if_target('age', [51, 1000], 1))
|
|
|
|
print(p_property_if_target('chol', [201, 1000], 1))
|
|
|
|
print(p_property_if_target('fbs', 1, 1))
|
|
|
|
|
|
|
|
print()
|
|
|
|
|
|
|
|
print(p_target_if_property([('sex', 1), ('chol', [201, 1000]), ('age', [51, 120]), ('fbs', 1)], 1))
|
|
|
|
|
|
|
|
|
|
|
|
print(p_property_if_target('sex', 1, 1))
|
|
|
|
print(p_property_if_target('sex', 0, 1))
|
|
|
|
d={'sex':['Woman', 'Man'], 'Probability': [p_property_if_target('sex', 0, 1), p_property_if_target('sex', 1, 1)]}
|
|
|
|
p=pd.DataFrame(d)
|
|
|
|
|
|
|
|
pd.DataFrame(p).to_csv('sex_if_heart_attack.csv',index=False, header=True)
|
|
|
|
|
|
|
|
age_sex_target = []
|
|
|
|
for a in range(30, 80, 5):
|
|
|
|
print(a, a+5)
|
|
|
|
for i in [0,1]:
|
|
|
|
age_sex_target.append([[a, a + 5], i, p_target_if_property([('sex', i), ('age', [a, a + 5])], 1), p_target_if_property([('sex', i), ('age', [a, a + 5])], 0)])
|
|
|
|
|
|
|
|
with open('age_sex_heart_attack.csv', 'w', newline='') as file:
|
|
|
|
writer = csv.writer(file)
|
|
|
|
writer.writerow(["age", "sex", "heart attack", "no heart attack"])
|
|
|
|
for i in age_sex_target:
|
|
|
|
writer.writerow(i)
|
2021-05-31 19:11:26 +02:00
|
|
|
|