Update train.py

This commit is contained in:
s452487 2024-05-06 12:15:12 +02:00
parent 1c31c18aa1
commit c56c88db3c
1 changed files with 90 additions and 86 deletions

176
train.py
View File

@ -1,87 +1,91 @@
import pandas as pd
train = pd.read_csv("train.csv")
test = pd.read_csv("test.csv")
valid = pd.read_csv("valid.csv")
num_columns = train.select_dtypes(['float64']).columns
print(num_columns)
len(num_columns)
x_columns = ['Male', 'GeneralHealth', 'PhysicalHealthDays', 'MentalHealthDays',
'PhysicalActivities', 'SleepHours', 'RemovedTeeth',
'HadAngina', 'HadStroke', 'HadAsthma', 'HadSkinCancer', 'HadCOPD',
'HadDepressiveDisorder', 'HadKidneyDisease', 'HadArthritis',
'HadDiabetes', 'DeafOrHardOfHearing', 'BlindOrVisionDifficulty',
'DifficultyConcentrating', 'DifficultyWalking',
'DifficultyDressingBathing', 'DifficultyErrands', 'SmokerStatus',
'ECigaretteUsage', 'ChestScan', 'HeightInMeters', 'WeightInKilograms',
'BMI', 'AlcoholDrinkers', 'HIVTesting', 'FluVaxLast12', 'PneumoVaxEver',
'TetanusLast10Tdap', 'HighRiskLastYear', 'CovidPos']
print(x_columns)
len(x_columns)
y_column = 'HadHeartAttack'
train_x = train[x_columns]
train_y = train[y_column]
test_x = test[x_columns]
test_y = test[y_column]
train.info()
import tensorflow as tf
from tensorflow import keras
from keras import layers
from keras.optimizers import Adam
def create_model():
inputs = keras.Input(shape=(35,))
dense1 = layers.Dense(64, activation="relu")(inputs)
dropout1 = layers.Dropout(0.2)(dense1)
dense2 = layers.Dense(32, activation="relu")(dropout1)
dropout2 = layers.Dropout(0.2)(dense2)
output = layers.Dense(1, activation="sigmoid")(dropout2)
model = keras.Model(inputs=inputs, outputs=output)
model.compile(loss='binary_crossentropy', optimizer=Adam(), metrics=['accuracy'])
return model
model = create_model()
model.summary()
import os
import json
if 'training_parameters' not in os.environ:
parameter_epochs = 11
else:
training_parameters = os.environ.get('training_parameters')
parameter_dict = json.loads(training_parameters)
parameter_epochs = parameter_dict["epochs"]
# Early stopping dla regularyzacji
callback = keras.callbacks.EarlyStopping(monitor='val_loss', mode='min', patience=3, restore_best_weights=True)
history = model.fit(train_x, train_y, validation_data=(test_x, test_y), epochs=parameter_epochs, callbacks=[callback])
model.save("model.keras")
valid_x = valid[x_columns]
valid_y = valid[y_column]
import numpy as np
predictions = model.predict(valid_x)[:,0]
true_answers = valid_y.to_numpy()
validation_accuracy = np.sum(np.rint(predictions) == true_answers)/len(true_answers)
print(f"Poprawność na zbiorze walidacyjnym: {validation_accuracy:.2%}")
print(predictions[:100])
print(np.rint(predictions)[:100])
import pandas as pd
train = pd.read_csv("train.csv")
test = pd.read_csv("test.csv")
valid = pd.read_csv("valid.csv")
num_columns = train.select_dtypes(['float64']).columns
print(num_columns)
len(num_columns)
x_columns = ['Male', 'GeneralHealth', 'PhysicalHealthDays', 'MentalHealthDays',
'PhysicalActivities', 'SleepHours', 'RemovedTeeth',
'HadAngina', 'HadStroke', 'HadAsthma', 'HadSkinCancer', 'HadCOPD',
'HadDepressiveDisorder', 'HadKidneyDisease', 'HadArthritis',
'HadDiabetes', 'DeafOrHardOfHearing', 'BlindOrVisionDifficulty',
'DifficultyConcentrating', 'DifficultyWalking',
'DifficultyDressingBathing', 'DifficultyErrands', 'SmokerStatus',
'ECigaretteUsage', 'ChestScan', 'HeightInMeters', 'WeightInKilograms',
'BMI', 'AlcoholDrinkers', 'HIVTesting', 'FluVaxLast12', 'PneumoVaxEver',
'TetanusLast10Tdap', 'HighRiskLastYear', 'CovidPos']
print(x_columns)
len(x_columns)
train.dropna(subset = x_columns, inplace=True)
valid.dropna(subset = x_columns, inplace=True)
test.dropna(subset = x_columns, inplace=True)
y_column = 'HadHeartAttack'
train_x = train[x_columns]
train_y = train[y_column]
test_x = test[x_columns]
test_y = test[y_column]
train.info()
import tensorflow as tf
from tensorflow import keras
from keras import layers
from keras.optimizers import Adam
def create_model():
inputs = keras.Input(shape=(35,))
dense1 = layers.Dense(64, activation="relu")(inputs)
dropout1 = layers.Dropout(0.2)(dense1)
dense2 = layers.Dense(32, activation="relu")(dropout1)
dropout2 = layers.Dropout(0.2)(dense2)
output = layers.Dense(1, activation="sigmoid")(dropout2)
model = keras.Model(inputs=inputs, outputs=output)
model.compile(loss='binary_crossentropy', optimizer=Adam(), metrics=['accuracy'])
return model
model = create_model()
model.summary()
import os
import json
if 'training_parameters' not in os.environ:
parameter_epochs = 11
else:
training_parameters = os.environ.get('training_parameters')
parameter_dict = json.loads(training_parameters)
parameter_epochs = parameter_dict["epochs"]
# Early stopping dla regularyzacji
callback = keras.callbacks.EarlyStopping(monitor='val_loss', mode='min', patience=3, restore_best_weights=True)
history = model.fit(train_x, train_y, validation_data=(test_x, test_y), epochs=parameter_epochs, callbacks=[callback])
model.save("model.keras")
valid_x = valid[x_columns]
valid_y = valid[y_column]
import numpy as np
predictions = model.predict(valid_x)[:,0]
true_answers = valid_y.to_numpy()
validation_accuracy = np.sum(np.rint(predictions) == true_answers)/len(true_answers)
print(f"Poprawność na zbiorze walidacyjnym: {validation_accuracy:.2%}")
print(predictions[:100])
print(np.rint(predictions)[:100])
print(true_answers[:100])