DVC V2
This commit is contained in:
parent
e9d48eeed1
commit
f8f841c344
5
dvc.yaml
Normal file
5
dvc.yaml
Normal file
@ -0,0 +1,5 @@
|
||||
stages:
|
||||
prepare:
|
||||
cmd: '" -d" ml_prepare.py -o training_data.csv test_data.csv " python" ml_prepare.py'
|
||||
training:
|
||||
cmd: '" -d" ml_training.py " -o" trained_model/ " python" ml_training.py 15'
|
26
ml_prepare.py
Normal file
26
ml_prepare.py
Normal file
@ -0,0 +1,26 @@
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from sklearn.model_selection import train_test_split
|
||||
|
||||
|
||||
def main():
|
||||
feature_names = ["BMI", "SleepTime", "Sex", "Diabetic", "PhysicalActivity", "Smoking", "AlcoholDrinking", "HeartDisease"]
|
||||
|
||||
dataset = pd.read_csv('heart_2020_cleaned.csv')
|
||||
dataset = dataset.dropna()
|
||||
|
||||
dataset["Diabetic"] = dataset["Diabetic"].apply(lambda x: int("Yes" in x))
|
||||
dataset["HeartDisease"] = dataset["HeartDisease"].apply(lambda x: int(x == "Yes"))
|
||||
dataset["PhysicalActivity"] = dataset["PhysicalActivity"].apply(lambda x: int(x == "Yes"))
|
||||
dataset["Smoking"] = dataset["Smoking"].apply(lambda x: (x == "Yes"))
|
||||
dataset["AlcoholDrinking"] = dataset["AlcoholDrinking"].apply(lambda x: int(x == "Yes"))
|
||||
dataset["Sex"] = dataset["Sex"].apply(lambda x: 1 if x == "Female" else 0)
|
||||
|
||||
dataset = dataset[feature_names]
|
||||
dataset_train, dataset_test = train_test_split(dataset, test_size=.1, train_size=.9, random_state=1)
|
||||
|
||||
dataset_train.to_csv("training_data.csv")
|
||||
dataset_test.to_csv("test_data.csv")
|
||||
|
||||
|
||||
main()
|
@ -4,51 +4,17 @@ import tensorflow as tf
|
||||
from sklearn.model_selection import train_test_split
|
||||
from sklearn.preprocessing import StandardScaler
|
||||
import sys
|
||||
import sacred
|
||||
from sacred.observers import FileStorageObserver, MongoObserver
|
||||
|
||||
ex = sacred.Experiment("Training model")
|
||||
ex.observers.append(FileStorageObserver('training_experiment'))
|
||||
# ex.observers.append(MongoObserver(url='mongodb://admin:IUM_2021@172.17.0.1:27017',
|
||||
# db_name='sacred'))
|
||||
|
||||
|
||||
@ex.config
|
||||
def get_config():
|
||||
no_of_epochs = 10
|
||||
if len(sys.argv) == 2:
|
||||
no_of_epochs = int(sys.argv[1])
|
||||
|
||||
|
||||
@ex.capture
|
||||
def evaluate_model(model, test_x, test_y):
|
||||
test_loss, test_acc, test_rec = model.evaluate(test_x, test_y, verbose=1)
|
||||
# print("Accuracy:", test_acc)
|
||||
# print("Loss:", test_loss)
|
||||
# print("Recall:", test_rec)
|
||||
return f"Accuracy: {test_acc}, Loss: {test_loss}, Recall: {test_rec}"
|
||||
|
||||
|
||||
@ex.main
|
||||
def main(no_of_epochs, _run):
|
||||
# no_of_epochs = get_config()
|
||||
def main():
|
||||
no_of_epochs = int(sys.argv[1]) if len(sys.argv) == 2 else 10
|
||||
feature_names = ["BMI", "SleepTime", "Sex", "Diabetic", "PhysicalActivity", "Smoking", "AlcoholDrinking",
|
||||
"HeartDisease"]
|
||||
|
||||
scaler = StandardScaler()
|
||||
feature_names = ["BMI", "SleepTime", "Sex", "Diabetic", "PhysicalActivity", "Smoking", "AlcoholDrinking"]
|
||||
|
||||
dataset = pd.read_csv('heart_2020_cleaned.csv')
|
||||
dataset = dataset.dropna()
|
||||
|
||||
dataset["Diabetic"] = dataset["Diabetic"].apply(lambda x: True if "Yes" in x else False)
|
||||
dataset["HeartDisease"] = dataset["HeartDisease"].apply(lambda x: True if x == "Yes" else False)
|
||||
dataset["PhysicalActivity"] = dataset["PhysicalActivity"].apply(lambda x: True if x == "Yes" else False)
|
||||
dataset["Smoking"] = dataset["Smoking"].apply(lambda x: True if x == "Yes" else False)
|
||||
dataset["AlcoholDrinking"] = dataset["AlcoholDrinking"].apply(lambda x: True if x == "Yes" else False)
|
||||
dataset["Sex"] = dataset["Sex"].apply(lambda x: 1 if x == "Female" else 0)
|
||||
|
||||
dataset_train, dataset_test = train_test_split(dataset, test_size=.1, train_size=.9, random_state=1)
|
||||
|
||||
print(dataset_test.shape)
|
||||
dataset_train = pd.read_csv("training_data.csv")
|
||||
dataset_test = pd.read_csv("test_data.csv")
|
||||
|
||||
model = tf.keras.Sequential([
|
||||
tf.keras.layers.Dense(16, activation='relu'),
|
||||
@ -73,8 +39,8 @@ def main(no_of_epochs, _run):
|
||||
test_X = scaler.fit_transform(test_X)
|
||||
# test_Y = scaler.fit_transform(test_Y)
|
||||
|
||||
|
||||
print(train_Y.value_counts())
|
||||
|
||||
train_X = tf.convert_to_tensor(train_X)
|
||||
train_Y = tf.convert_to_tensor(train_Y)
|
||||
|
||||
@ -84,10 +50,5 @@ def main(no_of_epochs, _run):
|
||||
model.fit(train_X, train_Y, epochs=no_of_epochs)
|
||||
model.save("trained_model")
|
||||
|
||||
metrics = evaluate_model(model, test_X, test_Y)
|
||||
_run.log_scalar("model.eval", metrics)
|
||||
ex.add_artifact("trained_model/saved_model.pb")
|
||||
ex.add_artifact("trained_model/keras_metadata.pb")
|
||||
|
||||
|
||||
ex.run()
|
||||
main()
|
||||
|
31981
test_data.csv
Normal file
31981
test_data.csv
Normal file
File diff suppressed because it is too large
Load Diff
File diff suppressed because one or more lines are too long
Binary file not shown.
Binary file not shown.
Binary file not shown.
287816
training_data.csv
Normal file
287816
training_data.csv
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user