diff --git a/.env b/.env new file mode 100644 index 0000000..6710ea5 --- /dev/null +++ b/.env @@ -0,0 +1,5 @@ +MONGO_INITDB_ROOT_USERNAME=admin +MONGO_INITDB_ROOT_PASSWORD=IUM_2021 +ME_CONFIG_BASICAUTH_USERNAME=mongo_express_user +ME_CONFIG_BASICAUTH_PASSWORD=mongo_express_pw +MONGO_DATABASE=sacred \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index def7842..c0a12e0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,4 +4,6 @@ tensorflow numpy matplotlib mlflow -dvc \ No newline at end of file +dvc +sacred +pymongo \ No newline at end of file diff --git a/sacredboard/Dockerfile b/sacredboard/Dockerfile new file mode 100644 index 0000000..a307c7a --- /dev/null +++ b/sacredboard/Dockerfile @@ -0,0 +1,5 @@ +FROM python:3.6-jessie + +RUN pip install https://github.com/chovanecm/sacredboard/archive/develop.zip + +ENTRYPOINT sacredboard -mu mongodb://$MONGO_INITDB_ROOT_USERNAME:$MONGO_INITDB_ROOT_PASSWORD@mongo:27017/?authMechanism=SCRAM-SHA-1 $MONGO_DATABASE \ No newline at end of file diff --git a/train_sacred.py b/train_sacred.py new file mode 100644 index 0000000..451c0e5 --- /dev/null +++ b/train_sacred.py @@ -0,0 +1,73 @@ +import pandas as pd +from tensorflow import keras +from tensorflow.keras import layers +import argparse +from sacred import Experiment +from sacred.observers import FileStorageObserver, MongoObserver + +ex = Experiment("464980", interactive=True, save_git_info=False) +ex.observers.append(FileStorageObserver('experiments')) +ex.observers.append(MongoObserver(url='mongodb://admin:IUM_2021@tzietkiewicz.vm.wmi.amu.edu.pl:27017', + db_name='sacred')) + + +@ex.capture +def capture_params(epochs): + print(f"epochs: {epochs}") + + +class RegressionModel: + def __init__(self, optimizer="adam", loss="mean_squared_error"): + self.model = keras.Sequential([ + layers.Input(shape=(5,)), # Input layer + layers.Dense(32, activation='relu'), # Hidden layer with 32 neurons and ReLU activation + layers.Dense(1) # Output layer with a single neuron (for regression) + ]) + self.optimizer = optimizer + self.loss = loss + self.X_train = None + self.X_test = None + self.y_train = None + self.y_test = None + + def load_data(self, train_path, test_path): + data_train = pd.read_csv(train_path) + data_test = pd.read_csv(test_path) + self.X_train = data_train.drop("Performance Index", axis=1) + self.y_train = data_train["Performance Index"] + self.X_test = data_test.drop("Performance Index", axis=1) + self.y_test = data_test["Performance Index"] + + def train(self, epochs=30): + self.model.compile(optimizer=self.optimizer, loss=self.loss) + self.model.fit(self.X_train, self.y_train, epochs=epochs, batch_size=32, validation_data=(self.X_test, self.y_test)) + capture_params(epochs) + + def predict(self, data): + prediction = self.model.predict(data) + return prediction + + def evaluate(self): + test_loss = self.model.evaluate(self.X_test, self.y_test) + print(f"Test Loss: {test_loss:.4f}") + return test_loss + + def save_model(self): + self.model.save("model.keras") + ex.add_artifact("model.keras") + + +@ex.main +def main(_run): + parser = argparse.ArgumentParser() + parser.add_argument('--epochs') + + args = parser.parse_args() + model = RegressionModel() + model.load_data("df_train.csv", "df_test.csv") + model.train(epochs=int(args.epochs)) + _run.log_scalar("testing.mean_square_error", model.evaluate()) + model.save_model() + + +ex.run() \ No newline at end of file diff --git a/training/Jenkinsfile b/training/Jenkinsfile index db92f16..3753adb 100644 --- a/training/Jenkinsfile +++ b/training/Jenkinsfile @@ -37,6 +37,19 @@ pipeline { archiveArtifacts artifacts: 'model.keras', onlyIfSuccessful: true } } + stage('Experiments') { + agent { + dockerfile { + filename 'Dockerfile' + reuseNode true + } + } + steps { + sh "chmod +x ./train_sacred.py" + sh "python ./train_sacred.py --epochs ${params.EPOCHS}" + archiveArtifacts artifacts: 'experiments', onlyIfSuccessful: true + } + } stage('Run training'){ steps{ script {