ium_444465/ml_training.py

55 lines
1.7 KiB
Python
Raw Normal View History

2022-04-07 21:34:56 +02:00
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
2022-05-01 19:48:29 +02:00
import sys
2022-04-07 21:34:56 +02:00
2022-05-08 18:41:08 +02:00
2022-05-28 15:23:08 +02:00
def main():
no_of_epochs = int(sys.argv[1]) if len(sys.argv) == 2 else 10
feature_names = ["BMI", "SleepTime", "Sex", "Diabetic", "PhysicalActivity", "Smoking", "AlcoholDrinking",
"HeartDisease"]
2022-05-08 18:41:08 +02:00
2022-04-07 21:34:56 +02:00
scaler = StandardScaler()
2022-05-28 15:23:08 +02:00
dataset_train = pd.read_csv("training_data.csv")
dataset_test = pd.read_csv("test_data.csv")
2022-04-07 21:34:56 +02:00
model = tf.keras.Sequential([
tf.keras.layers.Dense(16, activation='relu'),
tf.keras.layers.Dense(8, activation='relu'),
tf.keras.layers.Dense(4, activation='relu'),
tf.keras.layers.Dense(1, activation='sigmoid')
])
model.compile(
loss=tf.keras.losses.binary_crossentropy,
optimizer=tf.keras.optimizers.Adam(lr=0.01),
metrics=["accuracy", tf.keras.metrics.Recall(name='recall')]
)
train_X = dataset_train[feature_names].astype(np.float32)
train_Y = dataset_train["HeartDisease"].astype(np.float32)
test_X = dataset_test[feature_names].astype(np.float32)
test_Y = dataset_test["HeartDisease"].astype(np.float32)
train_X = scaler.fit_transform(train_X)
# train_Y = scaler.fit_transform(train_Y)
test_X = scaler.fit_transform(test_X)
# test_Y = scaler.fit_transform(test_Y)
print(train_Y.value_counts())
2022-05-28 15:23:08 +02:00
2022-04-07 21:34:56 +02:00
train_X = tf.convert_to_tensor(train_X)
train_Y = tf.convert_to_tensor(train_Y)
test_X = tf.convert_to_tensor(test_X)
test_Y = tf.convert_to_tensor(test_Y)
2022-05-01 19:48:29 +02:00
model.fit(train_X, train_Y, epochs=no_of_epochs)
2022-04-07 21:34:56 +02:00
model.save("trained_model")
2022-05-28 15:23:08 +02:00
main()