feat: sacred

This commit is contained in:
Jakub Zaręba 2023-05-10 19:49:38 +02:00
parent b2d430825e
commit cf89b66ba2
2 changed files with 110 additions and 64 deletions

View File

@ -1,14 +1,29 @@
from sacred import Experiment
from sacred.observers import MongoObserver, FileStorageObserver
ex = Experiment('s487187_experiment', interactive=True)
ex.observers.append(MongoObserver(url='mongodb://admin:IUM_2021@172.17.0.1:27017', db_name='sacred'))
ex.observers.append(FileStorageObserver('results'))
@ex.config
def my_config():
model_path = 'model.h5'
test_data_path = 'data.csv'
metrics_file_path = 'metrics.txt'
plot_path = 'plot.png'
@ex.capture
def evaluate_model(model_path, test_data_path, metrics_file_path, plot_path):
import tensorflow as tf import tensorflow as tf
import pandas as pd import pandas as pd
import numpy as np import numpy as np
from sklearn.preprocessing import MinMaxScaler from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import accuracy_score, f1_score, mean_squared_error
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
import os import os
model = tf.keras.models.load_model('model.h5') model = tf.keras.models.load_model(model_path)
test_data = pd.read_csv('data.csv', sep=';') test_data = pd.read_csv(test_data_path, sep=';')
test_data = pd.get_dummies(test_data, columns=['Sex', 'Medal']) test_data = pd.get_dummies(test_data, columns=['Sex', 'Medal'])
test_data = test_data.drop(columns=['Name', 'Team', 'NOC', 'Games', 'Year', 'Season', 'City', 'Sport', 'Event']) test_data = test_data.drop(columns=['Name', 'Team', 'NOC', 'Games', 'Year', 'Season', 'City', 'Sport', 'Event'])
@ -27,18 +42,27 @@ y_pred = model.predict(X_test)
top_1_accuracy = tf.keras.metrics.categorical_accuracy(y_test, y_pred) top_1_accuracy = tf.keras.metrics.categorical_accuracy(y_test, y_pred)
top_5_accuracy = tf.keras.metrics.top_k_categorical_accuracy(y_test, y_pred, k=5) top_5_accuracy = tf.keras.metrics.top_k_categorical_accuracy(y_test, y_pred, k=5)
metrics_file = 'metrics.txt' if os.path.exists(metrics_file_path):
if os.path.exists(metrics_file): metrics_df = pd.read_csv(metrics_file_path)
metrics_df = pd.read_csv(metrics_file)
else: else:
metrics_df = pd.DataFrame(columns=['top_1_accuracy', 'top_5_accuracy']) metrics_df = pd.DataFrame(columns=['top_1_accuracy', 'top_5_accuracy'])
new_row = pd.DataFrame([{'top_1_accuracy': np.mean(top_1_accuracy.numpy()), 'top_5_accuracy': np.mean(top_5_accuracy.numpy())}]) new_row = pd.DataFrame([{'top_1_accuracy': np.mean(top_1_accuracy.numpy()), 'top_5_accuracy': np.mean(top_5_accuracy.numpy())}])
metrics_df = pd.concat([metrics_df, new_row], ignore_index=True) metrics_df = pd.concat([metrics_df, new_row], ignore_index=True)
metrics_df.to_csv(metrics_file, index=False) metrics_df.to_csv(metrics_file_path, index=False)
plt.figure(figsize=(10, 6)) plt.figure(figsize=(10, 6))
plt.plot(metrics_df['top_1_accuracy'], label='Top-1 Accuracy') plt.plot(metrics_df['top_1_accuracy'], label='Top-1 Accuracy')
plt.plot(metrics_df['top_5_accuracy'], label='Top-5 Accuracy') plt.plot(metrics_df['top_5_accuracy'], label='Top-5 Accuracy')
plt.legend() plt.legend()
plt.savefig('plot.png') plt.savefig(plot_path)
ex.log_scalar('top_1_accuracy', np.mean(top_1_accuracy.numpy()))
ex.log_scalar('top_5_accuracy', np.mean(top_5_accuracy.numpy()))
ex.add_artifact(model_path)
ex.add_artifact(metrics_file_path)
ex.add_artifact(plot_path)
@ex.automain
def main():
evaluate_model()

View File

@ -1,18 +1,34 @@
from sacred import Experiment
from sacred.observers import MongoObserver, FileStorageObserver
ex = Experiment('s487187-training')
ex.observers.append(MongoObserver(url='mongodb://admin:IUM_2021@172.17.0.1:27017', db_name='sacred'))
ex.observers.append(FileStorageObserver('results'))
@ex.config
def my_config():
data_file = 'data.csv'
model_file = 'model.h5'
epochs = 10
batch_size = 32
test_size = 0.2
random_state = 42
@ex.capture
def train_model(data_file, model_file, epochs, batch_size, test_size, random_state):
import pandas as pd import pandas as pd
from sklearn.model_selection import train_test_split from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf import tensorflow as tf
from imblearn.over_sampling import SMOTE from imblearn.over_sampling import SMOTE
smote = SMOTE(random_state=42) smote = SMOTE(random_state=random_state)
data = pd.read_csv('data.csv', sep=';') data = pd.read_csv(data_file, sep=';')
print('Total rows:', len(data)) print('Total rows:', len(data))
print('Rows with medal:', len(data.dropna(subset=['Medal']))) print('Rows with medal:', len(data.dropna(subset=['Medal'])))
data = pd.get_dummies(data, columns=['Sex', 'Medal']) data = pd.get_dummies(data, columns=['Sex', 'Medal'])
data = data.drop(columns=['Name', 'Team', 'NOC', 'Games', 'Year', 'Season', 'City', 'Sport', 'Event']) data = data.drop(columns=['Name', 'Team', 'NOC', 'Games', 'Year', 'Season', 'City', 'Sport', 'Event'])
scaler = MinMaxScaler() scaler = MinMaxScaler()
@ -28,7 +44,7 @@ y = y.fillna(0)
y = y.values y = y.values
X_resampled, y_resampled = smote.fit_resample(X, y) X_resampled, y_resampled = smote.fit_resample(X, y)
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42) X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=test_size, random_state=random_state)
model = tf.keras.models.Sequential() model = tf.keras.models.Sequential()
model.add(tf.keras.layers.Dense(64, input_dim=X_train.shape[1], activation='relu')) model.add(tf.keras.layers.Dense(64, input_dim=X_train.shape[1], activation='relu'))
@ -37,10 +53,16 @@ model.add(tf.keras.layers.Dense(y.shape[1], activation='softmax'))
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.fit(X_train, y_train, epochs=10, batch_size=32) model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size)
loss, accuracy = model.evaluate(X_test, y_test) loss, accuracy = model.evaluate(X_test, y_test)
print('Test accuracy:', accuracy) print('Test accuracy:', accuracy)
model.save('model.h5') model.save(model_file)
return accuracy
@ex.main
def run_experiment():
accuracy = train_model()
ex.log_scalar('accuracy', accuracy)
ex.add_artifact('model.h5')