Initial commit

This commit is contained in:
Andrzej Preibisz 2022-06-18 23:02:40 +02:00
commit 2b2e6c8073
2 changed files with 319920 additions and 0 deletions

319796
heart_2020_cleaned.csv Normal file

File diff suppressed because it is too large Load Diff

124
main.py Normal file
View File

@ -0,0 +1,124 @@
import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow_addons as tfa
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, recall_score, log_loss, precision_score
from sklearn.preprocessing import StandardScaler
from sklearn.naive_bayes import GaussianNB
def main():
no_of_epochs = 50
batch_size = 64
bayes = GaussianNB()
feature_names = ["BMI", "SleepTime", "Sex", "Diabetic", "PhysicalActivity", "Smoking", "AlcoholDrinking",
"HeartDisease", "KidneyDisease", "Stroke", "Asthma"]
# feature_names = ["Sex", "KidneyDisease"]
train_feature_names = ["BMI", "SleepTime", "Sex", "Diabetic", "PhysicalActivity", "Smoking", "AlcoholDrinking",
"KidneyDisease", "Stroke", "Asthma"]
dataset = pd.read_csv('heart_2020_cleaned.csv')
dataset = dataset.dropna()
dataset = dataset[feature_names]
dataset["Diabetic"] = dataset["Diabetic"].apply(lambda x: int("Yes" in x))
dataset["HeartDisease"] = dataset["HeartDisease"].apply(lambda x: int(x == "Yes"))
dataset["KidneyDisease"] = dataset["KidneyDisease"].apply(lambda x: int(x == "Yes"))
dataset["PhysicalActivity"] = dataset["PhysicalActivity"].apply(lambda x: int(x == "Yes"))
dataset["Stroke"] = dataset["Stroke"].apply(lambda x: int(x == "Yes"))
dataset["Smoking"] = dataset["Smoking"].apply(lambda x: int(x == "Yes"))
dataset["Asthma"] = dataset["Asthma"].apply(lambda x: int(x == "Yes"))
dataset["AlcoholDrinking"] = dataset["AlcoholDrinking"].apply(lambda x: int(x == "Yes"))
dataset["Sex"] = dataset["Sex"].apply(lambda x: 1 if x == "Female" else 0)
#PRZYGOTOWANIE DANYCH Z ODPOWIEDNIM PODZIAŁEM, PONIEWAŻ W ORYGINALNYCH DANYCH KLASA 0 STANOIWŁA 91% PRZYKŁADÓW
classes_counts = dataset["HeartDisease"].value_counts()
positive_counts = classes_counts.get(1)
offset = int(round(positive_counts * 3, 0))
dataset = dataset.sort_values(by=["HeartDisease"], ascending=False)[:offset]
dataset = dataset.sample(frac=1)
dataset_train, dataset_test = train_test_split(dataset, test_size=.3, train_size=.7, random_state=42)
scaler = StandardScaler()
model = tf.keras.Sequential([
tf.keras.layers.Dense(64, activation='relu'),
tf.keras.layers.Dense(32, activation='relu'),
tf.keras.layers.Dense(4, activation='relu'),
tf.keras.layers.Dense(1, activation='sigmoid')
])
model2 = tf.keras.Sequential([
tf.keras.layers.Dense(4, activation='relu'),
tf.keras.layers.Dense(1, activation='sigmoid')
])
model.compile(
loss=tf.keras.losses.binary_crossentropy,
optimizer='sgd',
metrics=[tf.keras.metrics.Precision(name="precision"),
"accuracy",
tf.keras.metrics.Recall(name='recall')]
)
model2.compile(
loss=tf.keras.losses.msle,
optimizer='sgd',
metrics=[tf.keras.metrics.Precision(name="precision"),
"accuracy",
tf.keras.metrics.Recall(name='recall')]
)
train_X = dataset_train[train_feature_names].astype(np.float32)
train_Y = dataset_train["HeartDisease"]
test_X = dataset_test[train_feature_names].astype(np.float32)
test_Y = dataset_test["HeartDisease"]
train_X = scaler.fit_transform(train_X)
test_X = scaler.fit_transform(test_X)
train_X = tf.convert_to_tensor(train_X)
# train_Y = tf.convert_to_tensor(train_Y)
test_X = tf.convert_to_tensor(test_X)
# test_Y = tf.convert_to_tensor(test_Y)
print("================ MODEL 1 TRAINING =======================")
model.fit(train_X, train_Y, epochs=no_of_epochs, batch_size=batch_size)
print("================ MODEL 2 TRAINING =======================")
model2.fit(train_X, train_Y, epochs=no_of_epochs, batch_size=batch_size)
prediction_1 = np.round(model.predict(test_X), 0)
prediction_2 = np.round(model2.predict(test_X), 0)
evaluation = model.evaluate(test_X, test_Y, batch_size=batch_size, return_dict=True)
evaluation_2 = model2.evaluate(test_X, test_Y, batch_size=batch_size, return_dict=True)
f1_model_1 = f1_score(prediction_1, test_Y)
f1_model_2 = f1_score(prediction_2, test_Y)
print(f"MODEL 1 EVALUATION: LOSS:{round(evaluation.get('loss'), 4)}, ACCURACY: {round(evaluation.get('accuracy'), 4)}, RECALL: {round(evaluation.get('recall'), 4)}, F1_SCORE:{round(f1_model_1, 4)}, PRECISION: {round(evaluation.get('precision'), 4)}")
print(f"MODEL 2 EVALUATION: LOSS:{round(evaluation_2.get('loss'), 4)}, ACCURACY: {round(evaluation_2.get('accuracy'), 4)}, RECALL: {round(evaluation_2.get('recall'), 4)}, F1_SCORE:{round(f1_model_2, 4)}, PRECISION: {round(evaluation_2.get('precision'), 4)}")
bayes.fit(train_X, train_Y)
pred_bayes = bayes.predict(test_X)
score_bayes = bayes.score(test_X, test_Y)
loss_bayes = log_loss(pred_bayes, test_Y)
precision_bayes = precision_score(pred_bayes, test_Y)
bayes_recall = recall_score(pred_bayes, test_Y)
bayes_f1 = f1_score(pred_bayes, test_Y)
print(f"NAIVE BAYES CLASSIFIER: LOSS: {loss_bayes} ,ACCURACY:{score_bayes}, RECALL: {bayes_recall},F1_SCORE: {bayes_f1}, PRECISION: {precision_bayes}")
main()