from datetime import datetime import pandas as pd import numpy as np from sacred.observers import FileStorageObserver, MongoObserver from sacred import Experiment from sklearn.metrics import mean_squared_error from tensorflow import keras ex = Experiment("sacred_scopes", interactive=True) # ex.observers.append(MongoObserver(url='mongodb://mongo_user:mongo_password_IUM_2021@172.17.0.1:27017', # db_name='sacred')) ex.observers.append(FileStorageObserver('my_runs')) @ex.config def my_config(): epochs_amount = 30 def normalize_data(data): return (data - np.min(data)) / (np.max(data) - np.min(data)) @ex.capture def prepare_model(epochs_amount, _run): _run.info["prepare_message_ts"] = str(datetime.now()) data = pd.read_csv("data_train", sep=',', skip_blank_lines=True, nrows=1087, error_bad_lines=False, names=["vipip install sacreddeo_id", "last_trending_date", "publish_date", "publish_hour", "category_id", "channel_title", "views", "likes", "dislikes", "comment_count"]).dropna() X = data.loc[:, data.columns == "views"].astype(int) y = data.loc[:, data.columns == "likes"].astype(int) min_val_sub = np.min(X) max_val_sub = np.max(X) X = (X - min_val_sub) / (max_val_sub - min_val_sub) print(min_val_sub) print(max_val_sub) min_val_like = np.min(y) max_val_like = np.max(y) y = (y - min_val_like) / (max_val_like - min_val_like) print(min_val_like) print(max_val_like) model = keras.Sequential([ keras.layers.Dense(512, input_dim=X.shape[1], activation='relu'), keras.layers.Dense(256, activation='relu'), keras.layers.Dense(256, activation='relu'), keras.layers.Dense(128, activation='relu'), keras.layers.Dense(1, activation='linear'), ]) model.compile(loss='mean_absolute_error', optimizer="Adam", metrics=['mean_absolute_error']) model.fit(X, y, epochs=int(epochs_amount), validation_split=0.3) data = pd.read_csv("data_dev", sep=',', error_bad_lines=False, skip_blank_lines=True, nrows=527, names=["video_id", "last_trending_date", "publish_date", "publish_hour", "category_id", "channel_title", "views", "likes", "dislikes", "comment_count"]).dropna() X_test = data.loc[:, data.columns == "views"].astype(int) y_test = data.loc[:, data.columns == "likes"].astype(int) min_val_sub = np.min(X_test) max_val_sub = np.max(X_test) X_test = (X_test - min_val_sub) / (max_val_sub - min_val_sub) print(min_val_sub) print(max_val_sub) min_val_like = np.min(y_test) max_val_like = np.max(y_test) print(min_val_like) print(max_val_like) prediction = model.predict(X_test) prediction_denormalized = [] for pred in prediction: denorm = pred[0] * (max_val_like[0] - min_val_like[0]) + min_val_like[0] prediction_denormalized.append(denorm) f = open("predictions.txt", "w") for (pred, test) in zip(prediction_denormalized, y_test.values): f.write("predicted: %s expected: %s\n" % (str(pred), str(test[0]))) error = mean_squared_error(y_test, prediction_denormalized) print(error) model.save('model') _run.log_scalar("training.metrics", error) return error @ex.main def my_main(epochs_amount): print(prepare_model()) ex.run() ex.add_artifact("model.pb")