diff --git a/JenkinsfileDatasetStats b/JenkinsfileDatasetStats index e86ee27..b8b38a2 100644 --- a/JenkinsfileDatasetStats +++ b/JenkinsfileDatasetStats @@ -20,20 +20,27 @@ pipeline { } stage('Prepare stats') { agent { - docker { - image 'mmoryl/ium:latest' + // no idea why it doesn't work + // docker { + // image 'mmoryl/ium:latest' + // reuseNode true + // } + dockerfile { + filename 'Lab4.dockerfile' reuseNode true } } steps { copyArtifacts projectName: 'z487183-create-dataset' sh './prepare-stats.sh' - archiveArtifacts 'stats.txt' + sh 'mkdir sacred-files' sh 'python3 property_model.py' sh 'python3 predict_values.py' - archiveArtifacts 'test_predictions.csv' - archiveArtifacts 'train_predictions.csv' - archiveArtifacts 'dev_predictions.csv' + // archiveArtifacts 'test_predictions.csv' + // archiveArtifacts 'train_predictions.csv' + // archiveArtifacts 'dev_predictions.csv' + // archiveArtifacts 'stats.txt' + archiveArtifacts 'my_runs/**' } } } diff --git a/Lab4.dockerfile b/Lab4.dockerfile index 214a2a6..65d97c5 100644 --- a/Lab4.dockerfile +++ b/Lab4.dockerfile @@ -7,6 +7,9 @@ RUN pip install scikit-learn RUN pip install kaggle RUN pip install keras RUN pip install tensorflow +RUN apt-get install -y git +RUN pip install sacred +RUN pip install pymongo ARG DEBIAN_FRONTEND=noninteractive diff --git a/property_model.py b/property_model.py index 7a54d44..f3e75e2 100644 --- a/property_model.py +++ b/property_model.py @@ -1,33 +1,74 @@ import pandas as pd from keras.models import Sequential from keras.layers import Dense +from sacred import Experiment +import json +from sacred.observers import MongoObserver +from sacred.observers import FileStorageObserver -# prepare dataset -features = ['Rooms', 'Distance', 'Bedroom2', 'Bathroom'] -target = 'PriceAboveMedian' +ex = Experiment("z487183", interactive=True) +ex.observers.append(MongoObserver(url='mongodb://admin:IUM_2021@172.17.0.1:27017', db_name='sacred')) +ex.observers.append(FileStorageObserver('my_runs')) -X_train = pd.read_csv('X_train.csv').values -y_train = pd.read_csv('Y_train.csv').values +@ex.config +def my_config(): + optimizer = 'sgd' + loss_function = 'binary_crossentropy' -X_dev = pd.read_csv('X_val.csv').values -y_dev = pd.read_csv('Y_val.csv').values +@ex.automain +def my_main(optimizer, loss_function, _run): + # prepare dataset + features = ['Rooms', 'Distance', 'Bedroom2', 'Bathroom'] -X_test = pd.read_csv('X_test.csv').values -y_test = pd.read_csv('Y_test.csv').values + X_train_resource = ex.open_resource('X_train.csv') + y_train_resource = ex.open_resource('Y_train.csv') -# model definition -model = Sequential([ - Dense(32, activation='relu', input_shape=(len(features),)), - Dense(32, activation='relu'), - Dense(1, activation='sigmoid'), -]) + X_dev_resource = ex.open_resource('X_val.csv') + y_dev_resource = ex.open_resource('Y_val.csv') -#compile and train -model.compile(optimizer='sgd', loss='binary_crossentropy', metrics=['accuracy']) -hist = model.fit(X_train, y_train, - batch_size=32, epochs=100, - validation_data=(X_dev, y_dev)) + X_test_resource = ex.open_resource('X_test.csv') + y_test_resource = ex.open_resource('Y_test.csv') -model.evaluate(X_test, y_test)[1] + X_train = pd.read_csv(X_train_resource).values + y_train = pd.read_csv(y_train_resource).values -model.save('model.h5') + X_dev = pd.read_csv(X_dev_resource).values + y_dev = pd.read_csv(y_dev_resource).values + + X_test = pd.read_csv(X_test_resource).values + y_test = pd.read_csv(y_test_resource).values + + # model definition + model = Sequential([ + Dense(32, activation='relu', input_shape=(len(features),)), + Dense(32, activation='relu'), + Dense(1, activation='sigmoid'), + ]) + + #compile and train + model.compile(optimizer=optimizer, loss=loss_function, metrics=['accuracy']) + hist = model.fit(X_train, y_train, + batch_size=32, epochs=100, + validation_data=(X_dev, y_dev)) + model.save('model.h5') + + loss, accuracy = model.evaluate(X_test, y_test) + _run.log_scalar("training.loss", loss) + _run.log_scalar("training.accuracy", accuracy) + + with open("sacred-files/parameters.json", 'w') as file: + json.dump(my_config(), file) + + with open('property_model.py', 'r') as current_file: + file_content = current_file.read() + with open('sacred-files/source_code.py', 'w') as new_file: + new_file.write(file_content) + + with open("sacred-files/metrics.txt", 'w') as file: + file.write(f"loss: {loss}") + file.write(f"accuracy: {accuracy}") + + ex.add_artifact("model.h5") + ex.add_artifact("sacred-files/parameters.json") + ex.add_artifact("sacred-files/source_code.py") + ex.add_artifact("sacred-files/metrics.txt")