This commit is contained in:
Andrzej Preibisz 2022-05-28 15:23:08 +02:00
parent e9d48eeed1
commit f8f841c344
9 changed files with 319838 additions and 49 deletions

5
dvc.yaml Normal file
View File

@ -0,0 +1,5 @@
stages:
prepare:
cmd: '" -d" ml_prepare.py -o training_data.csv test_data.csv " python" ml_prepare.py'
training:
cmd: '" -d" ml_training.py " -o" trained_model/ " python" ml_training.py 15'

26
ml_prepare.py Normal file
View File

@ -0,0 +1,26 @@
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
def main():
feature_names = ["BMI", "SleepTime", "Sex", "Diabetic", "PhysicalActivity", "Smoking", "AlcoholDrinking", "HeartDisease"]
dataset = pd.read_csv('heart_2020_cleaned.csv')
dataset = dataset.dropna()
dataset["Diabetic"] = dataset["Diabetic"].apply(lambda x: int("Yes" in x))
dataset["HeartDisease"] = dataset["HeartDisease"].apply(lambda x: int(x == "Yes"))
dataset["PhysicalActivity"] = dataset["PhysicalActivity"].apply(lambda x: int(x == "Yes"))
dataset["Smoking"] = dataset["Smoking"].apply(lambda x: (x == "Yes"))
dataset["AlcoholDrinking"] = dataset["AlcoholDrinking"].apply(lambda x: int(x == "Yes"))
dataset["Sex"] = dataset["Sex"].apply(lambda x: 1 if x == "Female" else 0)
dataset = dataset[feature_names]
dataset_train, dataset_test = train_test_split(dataset, test_size=.1, train_size=.9, random_state=1)
dataset_train.to_csv("training_data.csv")
dataset_test.to_csv("test_data.csv")
main()

View File

@ -4,51 +4,17 @@ import tensorflow as tf
from sklearn.model_selection import train_test_split from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler from sklearn.preprocessing import StandardScaler
import sys import sys
import sacred
from sacred.observers import FileStorageObserver, MongoObserver
ex = sacred.Experiment("Training model")
ex.observers.append(FileStorageObserver('training_experiment'))
# ex.observers.append(MongoObserver(url='mongodb://admin:IUM_2021@172.17.0.1:27017',
# db_name='sacred'))
@ex.config def main():
def get_config(): no_of_epochs = int(sys.argv[1]) if len(sys.argv) == 2 else 10
no_of_epochs = 10 feature_names = ["BMI", "SleepTime", "Sex", "Diabetic", "PhysicalActivity", "Smoking", "AlcoholDrinking",
if len(sys.argv) == 2: "HeartDisease"]
no_of_epochs = int(sys.argv[1])
@ex.capture
def evaluate_model(model, test_x, test_y):
test_loss, test_acc, test_rec = model.evaluate(test_x, test_y, verbose=1)
# print("Accuracy:", test_acc)
# print("Loss:", test_loss)
# print("Recall:", test_rec)
return f"Accuracy: {test_acc}, Loss: {test_loss}, Recall: {test_rec}"
@ex.main
def main(no_of_epochs, _run):
# no_of_epochs = get_config()
scaler = StandardScaler() scaler = StandardScaler()
feature_names = ["BMI", "SleepTime", "Sex", "Diabetic", "PhysicalActivity", "Smoking", "AlcoholDrinking"]
dataset = pd.read_csv('heart_2020_cleaned.csv') dataset_train = pd.read_csv("training_data.csv")
dataset = dataset.dropna() dataset_test = pd.read_csv("test_data.csv")
dataset["Diabetic"] = dataset["Diabetic"].apply(lambda x: True if "Yes" in x else False)
dataset["HeartDisease"] = dataset["HeartDisease"].apply(lambda x: True if x == "Yes" else False)
dataset["PhysicalActivity"] = dataset["PhysicalActivity"].apply(lambda x: True if x == "Yes" else False)
dataset["Smoking"] = dataset["Smoking"].apply(lambda x: True if x == "Yes" else False)
dataset["AlcoholDrinking"] = dataset["AlcoholDrinking"].apply(lambda x: True if x == "Yes" else False)
dataset["Sex"] = dataset["Sex"].apply(lambda x: 1 if x == "Female" else 0)
dataset_train, dataset_test = train_test_split(dataset, test_size=.1, train_size=.9, random_state=1)
print(dataset_test.shape)
model = tf.keras.Sequential([ model = tf.keras.Sequential([
tf.keras.layers.Dense(16, activation='relu'), tf.keras.layers.Dense(16, activation='relu'),
@ -73,8 +39,8 @@ def main(no_of_epochs, _run):
test_X = scaler.fit_transform(test_X) test_X = scaler.fit_transform(test_X)
# test_Y = scaler.fit_transform(test_Y) # test_Y = scaler.fit_transform(test_Y)
print(train_Y.value_counts()) print(train_Y.value_counts())
train_X = tf.convert_to_tensor(train_X) train_X = tf.convert_to_tensor(train_X)
train_Y = tf.convert_to_tensor(train_Y) train_Y = tf.convert_to_tensor(train_Y)
@ -84,10 +50,5 @@ def main(no_of_epochs, _run):
model.fit(train_X, train_Y, epochs=no_of_epochs) model.fit(train_X, train_Y, epochs=no_of_epochs)
model.save("trained_model") model.save("trained_model")
metrics = evaluate_model(model, test_X, test_Y)
_run.log_scalar("model.eval", metrics)
ex.add_artifact("trained_model/saved_model.pb")
ex.add_artifact("trained_model/keras_metadata.pb")
main()
ex.run()

31981
test_data.csv Normal file

File diff suppressed because it is too large Load Diff

File diff suppressed because one or more lines are too long

Binary file not shown.

287816
training_data.csv Normal file

File diff suppressed because it is too large Load Diff