import warnings import pandas as pd import numpy as np from tensorflow import keras import sys import mlflow import mlflow.models import logging from evaluate_network import evaluate_model logging.basicConfig(level=logging.WARN) logger = logging.getLogger(__name__) mlflow.set_tracking_uri("http://172.17.0.1:5000") mlflow.set_experiment("s434765") warnings.filterwarnings("ignore") np.random.seed(40) def normalize_data(data): return (data - np.min(data)) / (np.max(data) - np.min(data)) data = pd.read_csv("data_train", sep=',', skip_blank_lines=True, nrows=1087, error_bad_lines=False, names=["video_id", "last_trending_date", "publish_date", "publish_hour", "category_id", "channel_title", "views", "likes", "dislikes", "comment_count"]).dropna() X = data.loc[:,data.columns == "views"].astype(int) y = data.loc[:,data.columns == "likes"].astype(int) min_val_sub = np.min(X) max_val_sub = np.max(X) X = (X - min_val_sub) / (max_val_sub - min_val_sub) print(min_val_sub) print(max_val_sub) min_val_like = np.min(y) max_val_like = np.max(y) y = (y - min_val_like) / (max_val_like - min_val_like) print(min_val_like) print(max_val_like) with mlflow.start_run() as run: print("MLflow run experiment_id: {0}".format(run.info.experiment_id)) print("MLflow run artifact_uri: {0}".format(run.info.artifact_uri)) mlflow.keras.autolog() mlflow.log_param("epochs", int(sys.argv[1])) model = keras.Sequential([ keras.layers.Dense(512,input_dim = X.shape[1], activation='relu'), keras.layers.Dense(256, activation='relu'), keras.layers.Dense(256, activation='relu'), keras.layers.Dense(128, activation='relu'), keras.layers.Dense(1,activation='linear'), ]) model.compile(loss='mean_absolute_error', optimizer="Adam", metrics=['mean_absolute_error']) model.fit(X, y, epochs=int(sys.argv[1]), validation_split = 0.3) model.save('model') error = evaluate_model() mlflow.log_metric("rmse", error) signature = mlflow.models.signature.infer_signature(X, model.predict(y)) data = pd.read_csv("data_dev", sep=',', error_bad_lines=False, skip_blank_lines=True, nrows=527, names=["video_id", "last_trending_date", "publish_date", "publish_hour", "category_id", "channel_title", "views", "likes", "dislikes", "comment_count"]).dropna() X_test = data.loc[:, data.columns == "views"].astype(int) mlflow.keras.log_model(model, "youtube_model", registered_model_name="youtube_model", input_example=X_test, signature=signature) mlflow.keras.save_model(model, "youtube_model", registered_model_name="youtube_model", signature=signature, input_example=X_test)