Matematyka_Bayes/main.py

93 lines
3.0 KiB
Python
Raw Permalink Normal View History

2021-05-31 19:11:26 +02:00
import pandas as pd
2021-05-31 22:09:09 +02:00
import numpy
import csv
data = pd.read_csv('heart.csv')
def p_target(target):
count = 0
for i in data['target']:
if i == target:
count += 1
return count / len(data)
def p_property(property_name, value):
count = 0
if value == 0 or value == 1:
for i in data[property_name]:
if i == value:
count += 1
else:
for i in data[property_name]:
if value[0] <= i and i < value[1]:
count += 1
return count / len(data)
def p_property_if_target(property_name, property_value, target):
property_if_target = 0
count_target = 0
for i in range(len(data['target'])):
if data['target'][i] == target:
count_target += 1
if property_value == 0 or property_value == 1:
if data[property_name][i] == property_value:
property_if_target += 1
else:
if property_value[0] <= data[property_name][i] and data[property_name][i] < property_value[1]:
property_if_target += 1
return property_if_target / count_target
def p_target_if_property(properties_values, target):
p_properties_if_target = []
for p in properties_values:
p_properties_if_target.append(p_property_if_target(p[0], p[1], target))
numerator = numpy.prod(p_properties_if_target) * p_target(target)
print(numerator)
p_properties_if_target = [[], []]
for t in [0, 1]:
for p in properties_values:
p_properties_if_target[t].append(p_property_if_target(p[0], p[1], t))
denominator = sum([numpy.prod(p_properties_if_target[t]) * p_target(t) for t in [0, 1]]) # coś tutaj jest nie tak
print(denominator)
return numerator / denominator
print(p_property('sex', value=1), p_property('sex', value=0))
print(p_property('chol', value=[200, 1000]))
print(p_property('age', value=[50, 1000]))
print(p_property('fbs', value=1))
print(p_property('target', value=1))
print(p_property_if_target('sex', 1, 1))
print(p_property_if_target('age', [51, 1000], 1))
print(p_property_if_target('chol', [201, 1000], 1))
print(p_property_if_target('fbs', 1, 1))
print()
print(p_target_if_property([('sex', 1), ('chol', [201, 1000]), ('age', [51, 120]), ('fbs', 1)], 1))
print(p_property_if_target('sex', 1, 1))
print(p_property_if_target('sex', 0, 1))
d={'sex':['Woman', 'Man'], 'Probability': [p_property_if_target('sex', 0, 1), p_property_if_target('sex', 1, 1)]}
p=pd.DataFrame(d)
pd.DataFrame(p).to_csv('sex_if_heart_attack.csv',index=False, header=True)
age_sex_target = []
for a in range(30, 80, 5):
print(a, a+5)
for i in [0,1]:
age_sex_target.append([[a, a + 5], i, p_target_if_property([('sex', i), ('age', [a, a + 5])], 1), p_target_if_property([('sex', i), ('age', [a, a + 5])], 0)])
with open('age_sex_heart_attack.csv', 'w', newline='') as file:
writer = csv.writer(file)
writer.writerow(["age", "sex", "heart attack", "no heart attack"])
for i in age_sex_target:
writer.writerow(i)
2021-05-31 19:11:26 +02:00