Update train.py
This commit is contained in:
parent
1c31c18aa1
commit
c56c88db3c
176
train.py
176
train.py
|
@ -1,87 +1,91 @@
|
|||
import pandas as pd
|
||||
train = pd.read_csv("train.csv")
|
||||
test = pd.read_csv("test.csv")
|
||||
valid = pd.read_csv("valid.csv")
|
||||
|
||||
num_columns = train.select_dtypes(['float64']).columns
|
||||
print(num_columns)
|
||||
|
||||
len(num_columns)
|
||||
|
||||
x_columns = ['Male', 'GeneralHealth', 'PhysicalHealthDays', 'MentalHealthDays',
|
||||
'PhysicalActivities', 'SleepHours', 'RemovedTeeth',
|
||||
'HadAngina', 'HadStroke', 'HadAsthma', 'HadSkinCancer', 'HadCOPD',
|
||||
'HadDepressiveDisorder', 'HadKidneyDisease', 'HadArthritis',
|
||||
'HadDiabetes', 'DeafOrHardOfHearing', 'BlindOrVisionDifficulty',
|
||||
'DifficultyConcentrating', 'DifficultyWalking',
|
||||
'DifficultyDressingBathing', 'DifficultyErrands', 'SmokerStatus',
|
||||
'ECigaretteUsage', 'ChestScan', 'HeightInMeters', 'WeightInKilograms',
|
||||
'BMI', 'AlcoholDrinkers', 'HIVTesting', 'FluVaxLast12', 'PneumoVaxEver',
|
||||
'TetanusLast10Tdap', 'HighRiskLastYear', 'CovidPos']
|
||||
print(x_columns)
|
||||
|
||||
len(x_columns)
|
||||
|
||||
y_column = 'HadHeartAttack'
|
||||
|
||||
train_x = train[x_columns]
|
||||
train_y = train[y_column]
|
||||
|
||||
test_x = test[x_columns]
|
||||
test_y = test[y_column]
|
||||
|
||||
train.info()
|
||||
|
||||
import tensorflow as tf
|
||||
from tensorflow import keras
|
||||
from keras import layers
|
||||
from keras.optimizers import Adam
|
||||
def create_model():
|
||||
inputs = keras.Input(shape=(35,))
|
||||
dense1 = layers.Dense(64, activation="relu")(inputs)
|
||||
dropout1 = layers.Dropout(0.2)(dense1)
|
||||
dense2 = layers.Dense(32, activation="relu")(dropout1)
|
||||
dropout2 = layers.Dropout(0.2)(dense2)
|
||||
output = layers.Dense(1, activation="sigmoid")(dropout2)
|
||||
model = keras.Model(inputs=inputs, outputs=output)
|
||||
|
||||
model.compile(loss='binary_crossentropy', optimizer=Adam(), metrics=['accuracy'])
|
||||
return model
|
||||
|
||||
model = create_model()
|
||||
|
||||
model.summary()
|
||||
|
||||
import os
|
||||
import json
|
||||
|
||||
if 'training_parameters' not in os.environ:
|
||||
parameter_epochs = 11
|
||||
else:
|
||||
training_parameters = os.environ.get('training_parameters')
|
||||
parameter_dict = json.loads(training_parameters)
|
||||
parameter_epochs = parameter_dict["epochs"]
|
||||
|
||||
|
||||
|
||||
# Early stopping dla regularyzacji
|
||||
callback = keras.callbacks.EarlyStopping(monitor='val_loss', mode='min', patience=3, restore_best_weights=True)
|
||||
|
||||
history = model.fit(train_x, train_y, validation_data=(test_x, test_y), epochs=parameter_epochs, callbacks=[callback])
|
||||
|
||||
model.save("model.keras")
|
||||
|
||||
valid_x = valid[x_columns]
|
||||
valid_y = valid[y_column]
|
||||
|
||||
import numpy as np
|
||||
predictions = model.predict(valid_x)[:,0]
|
||||
true_answers = valid_y.to_numpy()
|
||||
validation_accuracy = np.sum(np.rint(predictions) == true_answers)/len(true_answers)
|
||||
print(f"Poprawność na zbiorze walidacyjnym: {validation_accuracy:.2%}")
|
||||
|
||||
print(predictions[:100])
|
||||
|
||||
print(np.rint(predictions)[:100])
|
||||
|
||||
import pandas as pd
|
||||
train = pd.read_csv("train.csv")
|
||||
test = pd.read_csv("test.csv")
|
||||
valid = pd.read_csv("valid.csv")
|
||||
|
||||
num_columns = train.select_dtypes(['float64']).columns
|
||||
print(num_columns)
|
||||
|
||||
len(num_columns)
|
||||
|
||||
x_columns = ['Male', 'GeneralHealth', 'PhysicalHealthDays', 'MentalHealthDays',
|
||||
'PhysicalActivities', 'SleepHours', 'RemovedTeeth',
|
||||
'HadAngina', 'HadStroke', 'HadAsthma', 'HadSkinCancer', 'HadCOPD',
|
||||
'HadDepressiveDisorder', 'HadKidneyDisease', 'HadArthritis',
|
||||
'HadDiabetes', 'DeafOrHardOfHearing', 'BlindOrVisionDifficulty',
|
||||
'DifficultyConcentrating', 'DifficultyWalking',
|
||||
'DifficultyDressingBathing', 'DifficultyErrands', 'SmokerStatus',
|
||||
'ECigaretteUsage', 'ChestScan', 'HeightInMeters', 'WeightInKilograms',
|
||||
'BMI', 'AlcoholDrinkers', 'HIVTesting', 'FluVaxLast12', 'PneumoVaxEver',
|
||||
'TetanusLast10Tdap', 'HighRiskLastYear', 'CovidPos']
|
||||
print(x_columns)
|
||||
|
||||
len(x_columns)
|
||||
|
||||
train.dropna(subset = x_columns, inplace=True)
|
||||
valid.dropna(subset = x_columns, inplace=True)
|
||||
test.dropna(subset = x_columns, inplace=True)
|
||||
|
||||
y_column = 'HadHeartAttack'
|
||||
|
||||
train_x = train[x_columns]
|
||||
train_y = train[y_column]
|
||||
|
||||
test_x = test[x_columns]
|
||||
test_y = test[y_column]
|
||||
|
||||
train.info()
|
||||
|
||||
import tensorflow as tf
|
||||
from tensorflow import keras
|
||||
from keras import layers
|
||||
from keras.optimizers import Adam
|
||||
def create_model():
|
||||
inputs = keras.Input(shape=(35,))
|
||||
dense1 = layers.Dense(64, activation="relu")(inputs)
|
||||
dropout1 = layers.Dropout(0.2)(dense1)
|
||||
dense2 = layers.Dense(32, activation="relu")(dropout1)
|
||||
dropout2 = layers.Dropout(0.2)(dense2)
|
||||
output = layers.Dense(1, activation="sigmoid")(dropout2)
|
||||
model = keras.Model(inputs=inputs, outputs=output)
|
||||
|
||||
model.compile(loss='binary_crossentropy', optimizer=Adam(), metrics=['accuracy'])
|
||||
return model
|
||||
|
||||
model = create_model()
|
||||
|
||||
model.summary()
|
||||
|
||||
import os
|
||||
import json
|
||||
|
||||
if 'training_parameters' not in os.environ:
|
||||
parameter_epochs = 11
|
||||
else:
|
||||
training_parameters = os.environ.get('training_parameters')
|
||||
parameter_dict = json.loads(training_parameters)
|
||||
parameter_epochs = parameter_dict["epochs"]
|
||||
|
||||
|
||||
|
||||
# Early stopping dla regularyzacji
|
||||
callback = keras.callbacks.EarlyStopping(monitor='val_loss', mode='min', patience=3, restore_best_weights=True)
|
||||
|
||||
history = model.fit(train_x, train_y, validation_data=(test_x, test_y), epochs=parameter_epochs, callbacks=[callback])
|
||||
|
||||
model.save("model.keras")
|
||||
|
||||
valid_x = valid[x_columns]
|
||||
valid_y = valid[y_column]
|
||||
|
||||
import numpy as np
|
||||
predictions = model.predict(valid_x)[:,0]
|
||||
true_answers = valid_y.to_numpy()
|
||||
validation_accuracy = np.sum(np.rint(predictions) == true_answers)/len(true_answers)
|
||||
print(f"Poprawność na zbiorze walidacyjnym: {validation_accuracy:.2%}")
|
||||
|
||||
print(predictions[:100])
|
||||
|
||||
print(np.rint(predictions)[:100])
|
||||
|
||||
print(true_answers[:100])
|
Loading…
Reference in New Issue