Initial commit
This commit is contained in:
commit
2b2e6c8073
319796
heart_2020_cleaned.csv
Normal file
319796
heart_2020_cleaned.csv
Normal file
File diff suppressed because it is too large
Load Diff
124
main.py
Normal file
124
main.py
Normal file
@ -0,0 +1,124 @@
|
|||||||
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
|
import tensorflow as tf
|
||||||
|
import tensorflow_addons as tfa
|
||||||
|
from sklearn.model_selection import train_test_split
|
||||||
|
from sklearn.metrics import f1_score, recall_score, log_loss, precision_score
|
||||||
|
from sklearn.preprocessing import StandardScaler
|
||||||
|
from sklearn.naive_bayes import GaussianNB
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
no_of_epochs = 50
|
||||||
|
batch_size = 64
|
||||||
|
|
||||||
|
bayes = GaussianNB()
|
||||||
|
|
||||||
|
feature_names = ["BMI", "SleepTime", "Sex", "Diabetic", "PhysicalActivity", "Smoking", "AlcoholDrinking",
|
||||||
|
"HeartDisease", "KidneyDisease", "Stroke", "Asthma"]
|
||||||
|
|
||||||
|
# feature_names = ["Sex", "KidneyDisease"]
|
||||||
|
train_feature_names = ["BMI", "SleepTime", "Sex", "Diabetic", "PhysicalActivity", "Smoking", "AlcoholDrinking",
|
||||||
|
"KidneyDisease", "Stroke", "Asthma"]
|
||||||
|
|
||||||
|
dataset = pd.read_csv('heart_2020_cleaned.csv')
|
||||||
|
dataset = dataset.dropna()
|
||||||
|
|
||||||
|
dataset = dataset[feature_names]
|
||||||
|
|
||||||
|
dataset["Diabetic"] = dataset["Diabetic"].apply(lambda x: int("Yes" in x))
|
||||||
|
dataset["HeartDisease"] = dataset["HeartDisease"].apply(lambda x: int(x == "Yes"))
|
||||||
|
dataset["KidneyDisease"] = dataset["KidneyDisease"].apply(lambda x: int(x == "Yes"))
|
||||||
|
dataset["PhysicalActivity"] = dataset["PhysicalActivity"].apply(lambda x: int(x == "Yes"))
|
||||||
|
dataset["Stroke"] = dataset["Stroke"].apply(lambda x: int(x == "Yes"))
|
||||||
|
dataset["Smoking"] = dataset["Smoking"].apply(lambda x: int(x == "Yes"))
|
||||||
|
dataset["Asthma"] = dataset["Asthma"].apply(lambda x: int(x == "Yes"))
|
||||||
|
dataset["AlcoholDrinking"] = dataset["AlcoholDrinking"].apply(lambda x: int(x == "Yes"))
|
||||||
|
dataset["Sex"] = dataset["Sex"].apply(lambda x: 1 if x == "Female" else 0)
|
||||||
|
|
||||||
|
#PRZYGOTOWANIE DANYCH Z ODPOWIEDNIM PODZIAŁEM, PONIEWAŻ W ORYGINALNYCH DANYCH KLASA 0 STANOIWŁA 91% PRZYKŁADÓW
|
||||||
|
classes_counts = dataset["HeartDisease"].value_counts()
|
||||||
|
positive_counts = classes_counts.get(1)
|
||||||
|
offset = int(round(positive_counts * 3, 0))
|
||||||
|
dataset = dataset.sort_values(by=["HeartDisease"], ascending=False)[:offset]
|
||||||
|
dataset = dataset.sample(frac=1)
|
||||||
|
|
||||||
|
dataset_train, dataset_test = train_test_split(dataset, test_size=.3, train_size=.7, random_state=42)
|
||||||
|
|
||||||
|
|
||||||
|
scaler = StandardScaler()
|
||||||
|
|
||||||
|
model = tf.keras.Sequential([
|
||||||
|
tf.keras.layers.Dense(64, activation='relu'),
|
||||||
|
tf.keras.layers.Dense(32, activation='relu'),
|
||||||
|
tf.keras.layers.Dense(4, activation='relu'),
|
||||||
|
tf.keras.layers.Dense(1, activation='sigmoid')
|
||||||
|
])
|
||||||
|
|
||||||
|
model2 = tf.keras.Sequential([
|
||||||
|
tf.keras.layers.Dense(4, activation='relu'),
|
||||||
|
tf.keras.layers.Dense(1, activation='sigmoid')
|
||||||
|
])
|
||||||
|
|
||||||
|
model.compile(
|
||||||
|
loss=tf.keras.losses.binary_crossentropy,
|
||||||
|
optimizer='sgd',
|
||||||
|
metrics=[tf.keras.metrics.Precision(name="precision"),
|
||||||
|
"accuracy",
|
||||||
|
tf.keras.metrics.Recall(name='recall')]
|
||||||
|
)
|
||||||
|
|
||||||
|
model2.compile(
|
||||||
|
loss=tf.keras.losses.msle,
|
||||||
|
optimizer='sgd',
|
||||||
|
metrics=[tf.keras.metrics.Precision(name="precision"),
|
||||||
|
"accuracy",
|
||||||
|
tf.keras.metrics.Recall(name='recall')]
|
||||||
|
)
|
||||||
|
|
||||||
|
train_X = dataset_train[train_feature_names].astype(np.float32)
|
||||||
|
train_Y = dataset_train["HeartDisease"]
|
||||||
|
|
||||||
|
test_X = dataset_test[train_feature_names].astype(np.float32)
|
||||||
|
test_Y = dataset_test["HeartDisease"]
|
||||||
|
|
||||||
|
train_X = scaler.fit_transform(train_X)
|
||||||
|
|
||||||
|
test_X = scaler.fit_transform(test_X)
|
||||||
|
|
||||||
|
train_X = tf.convert_to_tensor(train_X)
|
||||||
|
# train_Y = tf.convert_to_tensor(train_Y)
|
||||||
|
|
||||||
|
test_X = tf.convert_to_tensor(test_X)
|
||||||
|
# test_Y = tf.convert_to_tensor(test_Y)
|
||||||
|
|
||||||
|
print("================ MODEL 1 TRAINING =======================")
|
||||||
|
model.fit(train_X, train_Y, epochs=no_of_epochs, batch_size=batch_size)
|
||||||
|
|
||||||
|
print("================ MODEL 2 TRAINING =======================")
|
||||||
|
model2.fit(train_X, train_Y, epochs=no_of_epochs, batch_size=batch_size)
|
||||||
|
|
||||||
|
|
||||||
|
prediction_1 = np.round(model.predict(test_X), 0)
|
||||||
|
prediction_2 = np.round(model2.predict(test_X), 0)
|
||||||
|
|
||||||
|
evaluation = model.evaluate(test_X, test_Y, batch_size=batch_size, return_dict=True)
|
||||||
|
evaluation_2 = model2.evaluate(test_X, test_Y, batch_size=batch_size, return_dict=True)
|
||||||
|
|
||||||
|
f1_model_1 = f1_score(prediction_1, test_Y)
|
||||||
|
f1_model_2 = f1_score(prediction_2, test_Y)
|
||||||
|
|
||||||
|
print(f"MODEL 1 EVALUATION: LOSS:{round(evaluation.get('loss'), 4)}, ACCURACY: {round(evaluation.get('accuracy'), 4)}, RECALL: {round(evaluation.get('recall'), 4)}, F1_SCORE:{round(f1_model_1, 4)}, PRECISION: {round(evaluation.get('precision'), 4)}")
|
||||||
|
print(f"MODEL 2 EVALUATION: LOSS:{round(evaluation_2.get('loss'), 4)}, ACCURACY: {round(evaluation_2.get('accuracy'), 4)}, RECALL: {round(evaluation_2.get('recall'), 4)}, F1_SCORE:{round(f1_model_2, 4)}, PRECISION: {round(evaluation_2.get('precision'), 4)}")
|
||||||
|
bayes.fit(train_X, train_Y)
|
||||||
|
|
||||||
|
pred_bayes = bayes.predict(test_X)
|
||||||
|
score_bayes = bayes.score(test_X, test_Y)
|
||||||
|
loss_bayes = log_loss(pred_bayes, test_Y)
|
||||||
|
precision_bayes = precision_score(pred_bayes, test_Y)
|
||||||
|
bayes_recall = recall_score(pred_bayes, test_Y)
|
||||||
|
bayes_f1 = f1_score(pred_bayes, test_Y)
|
||||||
|
|
||||||
|
print(f"NAIVE BAYES CLASSIFIER: LOSS: {loss_bayes} ,ACCURACY:{score_bayes}, RECALL: {bayes_recall},F1_SCORE: {bayes_f1}, PRECISION: {precision_bayes}")
|
||||||
|
|
||||||
|
main()
|
Loading…
Reference in New Issue
Block a user