diff --git a/Jenkinsfile_train b/Jenkinsfile_train index bb69b03..47bf9b6 100644 --- a/Jenkinsfile_train +++ b/Jenkinsfile_train @@ -17,10 +17,12 @@ pipeline { stage("Run training"){ steps { sh "python3 training.py ${verbose} ${epochs}" + sh "python3 sacred_exp.py" } } stage('Save trained model files') { steps{ + archiveArtifacts 'sacred_file/**' archiveArtifacts 'linear_regression.h5' } } diff --git a/linear_regression.h5 b/linear_regression.h5 index e15841a..ef7701b 100644 Binary files a/linear_regression.h5 and b/linear_regression.h5 differ diff --git a/requirements.txt b/requirements.txt index 37bf8a9..7ba6374 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,9 +3,13 @@ astunparse==1.6.3 cachetools==4.2.1 certifi==2020.12.5 chardet==4.0.0 +colorama==0.4.4 cycler==0.10.0 +docopt==0.6.2 flatbuffers==1.12 gast==0.4.0 +gitdb==4.0.7 +GitPython==3.1.17 google-auth==1.29.0 google-auth-oauthlib==0.4.4 google-pasta==0.2.0 @@ -13,20 +17,25 @@ grpcio==1.34.1 h5py==3.1.0 idna==2.10 joblib==1.0.1 +jsonpickle==1.5.2 kaggle==1.5.12 keras-nightly==2.5.0.dev2021032900 Keras-Preprocessing==1.1.2 kiwisolver==1.3.1 Markdown==3.3.4 matplotlib==3.4.2 +munch==2.5.0 numpy==1.19.5 oauthlib==3.1.0 opt-einsum==3.3.0 +packaging==20.9 pandas==1.2.4 Pillow==8.2.0 protobuf==3.15.8 +py-cpuinfo==8.0.0 pyasn1==0.4.8 pyasn1-modules==0.2.8 +pymongo==3.11.4 pyparsing==2.4.7 python-dateutil==2.8.1 python-slugify==4.0.1 @@ -34,10 +43,12 @@ pytz==2021.1 requests==2.25.1 requests-oauthlib==1.3.0 rsa==4.7.2 +sacred==0.8.2 scikit-learn==0.24.1 scipy==1.6.1 six==1.15.0 sklearn==0.0 +smmap==4.0.0 tensorboard==2.5.0 tensorboard-data-server==0.6.0 tensorboard-plugin-wit==1.8.0 diff --git a/sacred_exp.py b/sacred_exp.py new file mode 100644 index 0000000..477873a --- /dev/null +++ b/sacred_exp.py @@ -0,0 +1,80 @@ +import sys + +import pandas as pd +import numpy as np +import tensorflow as tf +import os.path + +from sacred import Experiment +from sacred.observers import FileStorageObserver, MongoObserver + +from tensorflow import keras +from tensorflow.keras import layers +from tensorflow.keras.layers.experimental import preprocessing + +exp = Experiment("s434704", interactive=False, save_git_info=False) +exp.observers.append(FileStorageObserver("sacred_file")) +exp.observers.append(MongoObserver(url='mongodb://mongo_user:mongo_password_IUM_2021@172.17.0.1:27017', db_name="sacred")) + +@exp.config +def my_config(): + verbose = 0 + epochs = 100 + +@exp.capture +def training(verbose, epochs, _log): + + pd.set_option("display.max_columns", None) + + # Wczytanie danych + train_data = pd.read_csv("./MoviesOnStreamingPlatforms_updated.train") + + # Stworzenie modelu + columns_to_use = ['Year', 'Runtime', 'Netflix'] + train_X = tf.convert_to_tensor(train_data[columns_to_use]) + train_Y = tf.convert_to_tensor(train_data[["IMDb"]]) + + normalizer = preprocessing.Normalization(input_shape=[3,]) + normalizer.adapt(train_X) + + model = keras.Sequential([ + keras.Input(shape=(len(columns_to_use),)), + normalizer, + layers.Dense(30, activation='relu'), + layers.Dense(10, activation='relu'), + layers.Dense(25, activation='relu'), + layers.Dense(1) + ]) + + model.compile(loss='mean_absolute_error', + optimizer=tf.keras.optimizers.Adam(0.001), + metrics=[tf.keras.metrics.RootMeanSquaredError()]) + + params = f"Verbose: {verbose}, Epochs: {epochs}" + _log.info(params) + + model.fit(train_X, train_Y, verbose=verbose, epochs=epochs) + + model.save('linear_regression.h5') + + # Evaluation + + test_data = pd.read_csv("./MoviesOnStreamingPlatforms_updated.test") + + columns_to_use = ['Year', 'Runtime', 'Netflix'] + test_X = tf.convert_to_tensor(test_data[columns_to_use]) + test_Y = tf.convert_to_tensor(test_data[["IMDb"]]) + + scores = model.evaluate(x=test_X, + y=test_Y) + + evaluation_info = f"RMSE: {scores[1]}" + _log.info(evaluation_info) + +@exp.automain +def run(verbose, epochs, _run): + training() + +runner = exp.run() +exp.add_source_file("./training.py") +exp.add_artifact("linear_regression.h5") \ No newline at end of file diff --git a/sacred_file/1/config.json b/sacred_file/1/config.json new file mode 100644 index 0000000..58da8f5 --- /dev/null +++ b/sacred_file/1/config.json @@ -0,0 +1,5 @@ +{ + "epochs": 100, + "seed": 80188794, + "verbose": 0 +} \ No newline at end of file diff --git a/sacred_file/1/cout.txt b/sacred_file/1/cout.txt new file mode 100644 index 0000000..65019ac --- /dev/null +++ b/sacred_file/1/cout.txt @@ -0,0 +1,11 @@ +INFO - s434704 - Running command 'run' +INFO - s434704 - Started run with ID "1" +2021-05-15 16:30:17.771747: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 FMA +To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. +2021-05-15 16:30:18.525767: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:176] None of the MLIR Optimization Passes are enabled (registered 2) +WARNING:tensorflow:Please add `keras.layers.InputLayer` instead of `keras.Input` to Sequential model. `keras.Input` is intended to be used by Functional model. +WARNING - tensorflow - Please add `keras.layers.InputLayer` instead of `keras.Input` to Sequential model. `keras.Input` is intended to be used by Functional model. +INFO - training - Verbose: 0, Epochs: 100 + 1/11 [=>............................] - ETA: 1s - loss: 0.0957 - root_mean_squared_error: 0.1177 11/11 [==============================] - 0s 674us/step - loss: 0.1033 - root_mean_squared_error: 0.1313 +INFO - training - RMSE: 0.1313309669494629 +INFO - s434704 - Completed after 0:00:08 diff --git a/sacred_file/1/metrics.json b/sacred_file/1/metrics.json new file mode 100644 index 0000000..9e26dfe --- /dev/null +++ b/sacred_file/1/metrics.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/sacred_file/1/run.json b/sacred_file/1/run.json new file mode 100644 index 0000000..8ba1fc0 --- /dev/null +++ b/sacred_file/1/run.json @@ -0,0 +1,66 @@ +{ + "artifacts": [], + "command": "run", + "experiment": { + "base_dir": "/Volumes/seagate/ium_434704", + "dependencies": [ + "numpy==1.19.5", + "pandas==1.2.4", + "sacred==0.8.2", + "tensorflow==2.5.0rc1" + ], + "mainfile": "sacred_exp.py", + "name": "s434704", + "repositories": [], + "sources": [ + [ + "sacred_exp.py", + "_sources/sacred_exp_8150ed54d93299dfccf6867ea7220971.py" + ] + ] + }, + "heartbeat": "2021-05-15T14:30:25.850078", + "host": { + "ENV": {}, + "cpu": "Intel(R) Core(TM) i9-9880H CPU @ 2.30GHz", + "hostname": "Wojciechs-MacBook-Pro.local", + "os": [ + "Darwin", + "macOS-11.2.1-x86_64-i386-64bit" + ], + "python_version": "3.9.1" + }, + "meta": { + "command": "run", + "options": { + "--beat-interval": null, + "--capture": null, + "--comment": null, + "--debug": false, + "--enforce_clean": false, + "--file_storage": null, + "--force": false, + "--help": false, + "--loglevel": null, + "--mongo_db": null, + "--name": null, + "--pdb": false, + "--print-config": false, + "--priority": null, + "--queue": false, + "--s3": null, + "--sql": null, + "--tiny_db": null, + "--unobserved": false, + "COMMAND": null, + "UPDATE": [], + "help": false, + "with": false + } + }, + "resources": [], + "result": null, + "start_time": "2021-05-15T14:30:17.351901", + "status": "COMPLETED", + "stop_time": "2021-05-15T14:30:25.848159" +} \ No newline at end of file diff --git a/sacred_file/2/config.json b/sacred_file/2/config.json new file mode 100644 index 0000000..10050d5 --- /dev/null +++ b/sacred_file/2/config.json @@ -0,0 +1,5 @@ +{ + "epochs": 100, + "seed": 426629893, + "verbose": 0 +} \ No newline at end of file diff --git a/sacred_file/2/cout.txt b/sacred_file/2/cout.txt new file mode 100644 index 0000000..8daa125 --- /dev/null +++ b/sacred_file/2/cout.txt @@ -0,0 +1,8 @@ +INFO - s434704 - Running command 'run' +INFO - s434704 - Started run with ID "2" +WARNING:tensorflow:Please add `keras.layers.InputLayer` instead of `keras.Input` to Sequential model. `keras.Input` is intended to be used by Functional model. +WARNING - tensorflow - Please add `keras.layers.InputLayer` instead of `keras.Input` to Sequential model. `keras.Input` is intended to be used by Functional model. +INFO - training - Verbose: 0, Epochs: 100 + 1/11 [=>............................] - ETA: 0s - loss: 0.0914 - root_mean_squared_error: 0.1140 11/11 [==============================] - 0s 638us/step - loss: 0.1024 - root_mean_squared_error: 0.1294 +INFO - training - RMSE: 0.12944550812244415 +INFO - s434704 - Completed after 0:00:05 diff --git a/sacred_file/2/linear_regression.h5 b/sacred_file/2/linear_regression.h5 new file mode 100644 index 0000000..ef7701b Binary files /dev/null and b/sacred_file/2/linear_regression.h5 differ diff --git a/sacred_file/2/metrics.json b/sacred_file/2/metrics.json new file mode 100644 index 0000000..9e26dfe --- /dev/null +++ b/sacred_file/2/metrics.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/sacred_file/2/run.json b/sacred_file/2/run.json new file mode 100644 index 0000000..294e67c --- /dev/null +++ b/sacred_file/2/run.json @@ -0,0 +1,64 @@ +{ + "artifacts": [ + "linear_regression.h5" + ], + "command": "run", + "experiment": { + "base_dir": "/Volumes/seagate/ium_434704", + "dependencies": [ + "numpy==1.19.5", + "pandas==1.2.4", + "sacred==0.8.2", + "tensorflow==2.5.0rc1" + ], + "mainfile": "sacred_exp.py", + "name": "s434704", + "repositories": [], + "sources": [ + [ + "sacred_exp.py", + "_sources/sacred_exp_8150ed54d93299dfccf6867ea7220971.py" + ] + ] + }, + "heartbeat": "2021-05-15T14:30:31.335228", + "host": { + "ENV": {}, + "cpu": "Intel(R) Core(TM) i9-9880H CPU @ 2.30GHz", + "hostname": "Wojciechs-MacBook-Pro.local", + "os": [ + "Darwin", + "macOS-11.2.1-x86_64-i386-64bit" + ], + "python_version": "3.9.1" + }, + "meta": { + "command": "run", + "options": { + "--beat-interval": null, + "--capture": null, + "--comment": null, + "--debug": false, + "--enforce_clean": false, + "--file_storage": null, + "--force": false, + "--help": false, + "--loglevel": null, + "--mongo_db": null, + "--name": null, + "--pdb": false, + "--print-config": false, + "--priority": null, + "--queue": false, + "--s3": null, + "--sql": null, + "--tiny_db": null, + "--unobserved": false + } + }, + "resources": [], + "result": null, + "start_time": "2021-05-15T14:30:25.893032", + "status": "COMPLETED", + "stop_time": "2021-05-15T14:30:31.333523" +} \ No newline at end of file diff --git a/sacred_file/_sources/sacred_exp_8150ed54d93299dfccf6867ea7220971.py b/sacred_file/_sources/sacred_exp_8150ed54d93299dfccf6867ea7220971.py new file mode 100644 index 0000000..9fd53d6 --- /dev/null +++ b/sacred_file/_sources/sacred_exp_8150ed54d93299dfccf6867ea7220971.py @@ -0,0 +1,80 @@ +import sys + +import pandas as pd +import numpy as np +import tensorflow as tf +import os.path + +from sacred import Experiment +from sacred.observers import FileStorageObserver, MongoObserver + +from tensorflow import keras +from tensorflow.keras import layers +from tensorflow.keras.layers.experimental import preprocessing + +exp = Experiment("s434704", interactive=False, save_git_info=False) +exp.observers.append(FileStorageObserver("sacred_file")) +# exp.observers.append(MongoObserver(url='mongodb://mongo_user:mongo_password_IUM_2021@172.17.0.1:27017', db_name="sacred")) + +@exp.config +def my_config(): + verbose = 0 + epochs = 100 + +@exp.capture +def training(verbose, epochs, _log): + + pd.set_option("display.max_columns", None) + + # Wczytanie danych + train_data = pd.read_csv("./MoviesOnStreamingPlatforms_updated.train") + + # Stworzenie modelu + columns_to_use = ['Year', 'Runtime', 'Netflix'] + train_X = tf.convert_to_tensor(train_data[columns_to_use]) + train_Y = tf.convert_to_tensor(train_data[["IMDb"]]) + + normalizer = preprocessing.Normalization(input_shape=[3,]) + normalizer.adapt(train_X) + + model = keras.Sequential([ + keras.Input(shape=(len(columns_to_use),)), + normalizer, + layers.Dense(30, activation='relu'), + layers.Dense(10, activation='relu'), + layers.Dense(25, activation='relu'), + layers.Dense(1) + ]) + + model.compile(loss='mean_absolute_error', + optimizer=tf.keras.optimizers.Adam(0.001), + metrics=[tf.keras.metrics.RootMeanSquaredError()]) + + params = f"Verbose: {verbose}, Epochs: {epochs}" + _log.info(params) + + model.fit(train_X, train_Y, verbose=verbose, epochs=epochs) + + model.save('linear_regression.h5') + + # Evaluation + + test_data = pd.read_csv("./MoviesOnStreamingPlatforms_updated.test") + + columns_to_use = ['Year', 'Runtime', 'Netflix'] + test_X = tf.convert_to_tensor(test_data[columns_to_use]) + test_Y = tf.convert_to_tensor(test_data[["IMDb"]]) + + scores = model.evaluate(x=test_X, + y=test_Y) + + evaluation_info = f"RMSE: {scores[1]}" + _log.info(evaluation_info) + +@exp.automain +def run(verbose, epochs, _run): + training() + +runner = exp.run() +exp.add_source_file("./training.py") +exp.add_artifact("linear_regression.h5") \ No newline at end of file