diff --git a/MLproject b/MLproject new file mode 100644 index 0000000..f5dc422 --- /dev/null +++ b/MLproject @@ -0,0 +1,12 @@ +name: tutorial + +docker_env: + image: karopa/ium:27 + +entry_points: + main: + parameters: + epochs: {type: float, default: 30} + command: "python3 neural_network.py {epochs}" + test: + command: "python3 evaluate_network.py" \ No newline at end of file diff --git a/evaluate_network.py b/evaluate_network.py index e2cd8f0..5e82a22 100644 --- a/evaluate_network.py +++ b/evaluate_network.py @@ -4,47 +4,50 @@ from sklearn.metrics import mean_squared_error from tensorflow import keras import matplotlib.pyplot as plt -model = keras.models.load_model('model') -data = pd.read_csv("data_dev", sep=',', error_bad_lines=False, - skip_blank_lines=True, nrows=527, names=["video_id", "last_trending_date", - "publish_date", "publish_hour", "category_id", - "channel_title", "views", "likes", "dislikes", - "comment_count"]).dropna() -X_test = data.loc[:, data.columns == "views"].astype(int) -y_test = data.loc[:, data.columns == "likes"].astype(int) -min_val_sub = np.min(X_test) -max_val_sub = np.max(X_test) -X_test = (X_test - min_val_sub) / (max_val_sub - min_val_sub) -print(min_val_sub) -print(max_val_sub) +def evaluate_model(): + model = keras.models.load_model('model') + data = pd.read_csv("data_dev", sep=',', error_bad_lines=False, + skip_blank_lines=True, nrows=527, names=["video_id", "last_trending_date", + "publish_date", "publish_hour", "category_id", + "channel_title", "views", "likes", "dislikes", + "comment_count"]).dropna() + X_test = data.loc[:, data.columns == "views"].astype(int) + y_test = data.loc[:, data.columns == "likes"].astype(int) -min_val_like = np.min(y_test) -max_val_like = np.max(y_test) -print(min_val_like) -print(max_val_like) + min_val_sub = np.min(X_test) + max_val_sub = np.max(X_test) + X_test = (X_test - min_val_sub) / (max_val_sub - min_val_sub) + print(min_val_sub) + print(max_val_sub) -prediction = model.predict(X_test) + min_val_like = np.min(y_test) + max_val_like = np.max(y_test) + print(min_val_like) + print(max_val_like) -prediction_denormalized = [] -for pred in prediction: - denorm = pred[0] * (max_val_like[0] - min_val_like[0]) + min_val_like[0] - prediction_denormalized.append(denorm) + prediction = model.predict(X_test) -f = open("predictions.txt", "w") -for (pred, test) in zip(prediction_denormalized, y_test.values): - f.write("predicted: %s expected: %s\n" % (str(pred), str(test[0]))) + prediction_denormalized = [] + for pred in prediction: + denorm = pred[0] * (max_val_like[0] - min_val_like[0]) + min_val_like[0] + prediction_denormalized.append(denorm) -error = mean_squared_error(y_test, prediction_denormalized) -print(error) + f = open("predictions.txt", "w") + for (pred, test) in zip(prediction_denormalized, y_test.values): + f.write("predicted: %s expected: %s\n" % (str(pred), str(test[0]))) -with open("rmse.txt", "a") as file: - file.write(str(error) + "\n") + error = mean_squared_error(y_test, prediction_denormalized) + print(error) -with open("rmse.txt", "r") as file: - lines = file.readlines() - plt.plot(range(len(lines)), [line[:-2] for line in lines]) - plt.tight_layout() - plt.ylabel('RMSE') - plt.xlabel('evaluation no') - plt.savefig('evaluation.png') + with open("rmse.txt", "a") as file: + file.write(str(error) + "\n") + + with open("rmse.txt", "r") as file: + lines = file.readlines() + plt.plot(range(len(lines)), [line[:-2] for line in lines]) + plt.tight_layout() + plt.ylabel('RMSE') + plt.xlabel('evaluation no') + plt.savefig('evaluation.png') + return error diff --git a/neural_network.py b/neural_network.py index aa6f4a4..fbf75c1 100644 --- a/neural_network.py +++ b/neural_network.py @@ -1,9 +1,24 @@ +import warnings + import pandas as pd import numpy as np -from sklearn.metrics import mean_squared_error from tensorflow import keras import sys +import mlflow.sklearn + +import logging + +from evaluate_network import evaluate_model + +logging.basicConfig(level=logging.WARN) +logger = logging.getLogger(__name__) + +mlflow.set_tracking_uri("http://localhost:5000") +mlflow.set_experiment("s434765") + +warnings.filterwarnings("ignore") +np.random.seed(40) def normalize_data(data): return (data - np.min(data)) / (np.max(data) - np.min(data)) @@ -29,17 +44,25 @@ y = (y - min_val_like) / (max_val_like - min_val_like) print(min_val_like) print(max_val_like) +with mlflow.start_run() as run: + print("MLflow run experiment_id: {0}".format(run.info.experiment_id)) + print("MLflow run artifact_uri: {0}".format(run.info.artifact_uri)) -model = keras.Sequential([ - keras.layers.Dense(512,input_dim = X.shape[1], activation='relu'), - keras.layers.Dense(256, activation='relu'), - keras.layers.Dense(256, activation='relu'), - keras.layers.Dense(128, activation='relu'), - keras.layers.Dense(1,activation='linear'), -]) + mlflow.keras.autolog() + mlflow.log_param("epochs", int(sys.argv[1])) + model = keras.Sequential([ + keras.layers.Dense(512,input_dim = X.shape[1], activation='relu'), + keras.layers.Dense(256, activation='relu'), + keras.layers.Dense(256, activation='relu'), + keras.layers.Dense(128, activation='relu'), + keras.layers.Dense(1,activation='linear'), + ]) -model.compile(loss='mean_absolute_error', optimizer="Adam", metrics=['mean_absolute_error']) + model.compile(loss='mean_absolute_error', optimizer="Adam", metrics=['mean_absolute_error']) -model.fit(X, y, epochs=int(sys.argv[1]), validation_split = 0.3) + model.fit(X, y, epochs=int(sys.argv[1]), validation_split = 0.3) -model.save('model') \ No newline at end of file + model.save('model') + + error = evaluate_model() + mlflow.log_metric("rmse", error)