import numpy as np import pandas as pd import tensorflow as tf import tensorflow_addons as tfa from sklearn.model_selection import train_test_split from sklearn.metrics import f1_score, recall_score, log_loss, precision_score from sklearn.preprocessing import StandardScaler from sklearn.naive_bayes import GaussianNB def main(): no_of_epochs = 50 batch_size = 64 bayes = GaussianNB() feature_names = ["BMI", "SleepTime", "Sex", "Diabetic", "PhysicalActivity", "Smoking", "AlcoholDrinking", "HeartDisease", "KidneyDisease", "Stroke", "Asthma"] # feature_names = ["Sex", "KidneyDisease"] train_feature_names = ["BMI", "SleepTime", "Sex", "Diabetic", "PhysicalActivity", "Smoking", "AlcoholDrinking", "KidneyDisease", "Stroke", "Asthma"] dataset = pd.read_csv('heart_2020_cleaned.csv') dataset = dataset.dropna() dataset = dataset[feature_names] dataset["Diabetic"] = dataset["Diabetic"].apply(lambda x: int("Yes" in x)) dataset["HeartDisease"] = dataset["HeartDisease"].apply(lambda x: int(x == "Yes")) dataset["KidneyDisease"] = dataset["KidneyDisease"].apply(lambda x: int(x == "Yes")) dataset["PhysicalActivity"] = dataset["PhysicalActivity"].apply(lambda x: int(x == "Yes")) dataset["Stroke"] = dataset["Stroke"].apply(lambda x: int(x == "Yes")) dataset["Smoking"] = dataset["Smoking"].apply(lambda x: int(x == "Yes")) dataset["Asthma"] = dataset["Asthma"].apply(lambda x: int(x == "Yes")) dataset["AlcoholDrinking"] = dataset["AlcoholDrinking"].apply(lambda x: int(x == "Yes")) dataset["Sex"] = dataset["Sex"].apply(lambda x: 1 if x == "Female" else 0) #PRZYGOTOWANIE DANYCH Z ODPOWIEDNIM PODZIAŁEM, PONIEWAŻ W ORYGINALNYCH DANYCH KLASA 0 STANOIWŁA 91% PRZYKŁADÓW classes_counts = dataset["HeartDisease"].value_counts() positive_counts = classes_counts.get(1) offset = int(round(positive_counts * 3, 0)) dataset = dataset.sort_values(by=["HeartDisease"], ascending=False)[:offset] dataset = dataset.sample(frac=1) dataset_train, dataset_test = train_test_split(dataset, test_size=.3, train_size=.7, random_state=42) scaler = StandardScaler() model = tf.keras.Sequential([ tf.keras.layers.Dense(64, activation='relu'), tf.keras.layers.Dense(32, activation='relu'), tf.keras.layers.Dense(4, activation='relu'), tf.keras.layers.Dense(1, activation='sigmoid') ]) model2 = tf.keras.Sequential([ tf.keras.layers.Dense(4, activation='relu'), tf.keras.layers.Dense(1, activation='sigmoid') ]) model.compile( loss=tf.keras.losses.binary_crossentropy, optimizer='sgd', metrics=[tf.keras.metrics.Precision(name="precision"), "accuracy", tf.keras.metrics.Recall(name='recall')] ) model2.compile( loss=tf.keras.losses.msle, optimizer='sgd', metrics=[tf.keras.metrics.Precision(name="precision"), "accuracy", tf.keras.metrics.Recall(name='recall')] ) train_X = dataset_train[train_feature_names].astype(np.float32) train_Y = dataset_train["HeartDisease"] test_X = dataset_test[train_feature_names].astype(np.float32) test_Y = dataset_test["HeartDisease"] train_X = scaler.fit_transform(train_X) test_X = scaler.fit_transform(test_X) train_X = tf.convert_to_tensor(train_X) # train_Y = tf.convert_to_tensor(train_Y) test_X = tf.convert_to_tensor(test_X) # test_Y = tf.convert_to_tensor(test_Y) print("================ MODEL 1 TRAINING =======================") model.fit(train_X, train_Y, epochs=no_of_epochs, batch_size=batch_size) print("================ MODEL 2 TRAINING =======================") model2.fit(train_X, train_Y, epochs=no_of_epochs, batch_size=batch_size) prediction_1 = np.round(model.predict(test_X), 0) prediction_2 = np.round(model2.predict(test_X), 0) evaluation = model.evaluate(test_X, test_Y, batch_size=batch_size, return_dict=True) evaluation_2 = model2.evaluate(test_X, test_Y, batch_size=batch_size, return_dict=True) f1_model_1 = f1_score(prediction_1, test_Y) f1_model_2 = f1_score(prediction_2, test_Y) print(f"MODEL 1 EVALUATION: LOSS:{round(evaluation.get('loss'), 4)}, ACCURACY: {round(evaluation.get('accuracy'), 4)}, RECALL: {round(evaluation.get('recall'), 4)}, F1_SCORE:{round(f1_model_1, 4)}, PRECISION: {round(evaluation.get('precision'), 4)}") print(f"MODEL 2 EVALUATION: LOSS:{round(evaluation_2.get('loss'), 4)}, ACCURACY: {round(evaluation_2.get('accuracy'), 4)}, RECALL: {round(evaluation_2.get('recall'), 4)}, F1_SCORE:{round(f1_model_2, 4)}, PRECISION: {round(evaluation_2.get('precision'), 4)}") bayes.fit(train_X, train_Y) pred_bayes = bayes.predict(test_X) score_bayes = bayes.score(test_X, test_Y) loss_bayes = log_loss(pred_bayes, test_Y) precision_bayes = precision_score(pred_bayes, test_Y) bayes_recall = recall_score(pred_bayes, test_Y) bayes_f1 = f1_score(pred_bayes, test_Y) print(f"NAIVE BAYES CLASSIFIER: LOSS: {loss_bayes} ,ACCURACY:{score_bayes}, RECALL: {bayes_recall},F1_SCORE: {bayes_f1}, PRECISION: {precision_bayes}") main()