ium_434765/neural_network.py
Karolina Oparczyk c6e97633ef
All checks were successful
s434765-training/pipeline/head This commit looks good
sacred
2021-05-20 23:28:26 +02:00

109 lines
3.6 KiB
Python

import sys
from datetime import datetime
import pandas as pd
import numpy as np
from sacred.observers import FileStorageObserver, MongoObserver
from sacred import Experiment
from sklearn.metrics import mean_squared_error
from tensorflow import keras
ex = Experiment("s434765", interactive=True, save_git_info=False)
ex.observers.append(MongoObserver(url='mongodb://mongo_user:mongo_password_IUM_2021@172.17.0.1:27017',
db_name='sacred'))
ex.observers.append(FileStorageObserver('my_runs'))
@ex.config
def my_config():
epochs_amount = int(sys.argv[1])
def normalize_data(data):
return (data - np.min(data)) / (np.max(data) - np.min(data))
@ex.capture
def prepare_model(epochs_amount, _run):
_run.info["prepare_message_ts"] = str(datetime.now())
data = pd.read_csv("data_train", sep=',', skip_blank_lines=True, nrows=1087, error_bad_lines=False,
names=["vipip install sacreddeo_id", "last_trending_date", "publish_date", "publish_hour",
"category_id",
"channel_title", "views", "likes", "dislikes", "comment_count"]).dropna()
X = data.loc[:, data.columns == "views"].astype(int)
y = data.loc[:, data.columns == "likes"].astype(int)
min_val_sub = np.min(X)
max_val_sub = np.max(X)
X = (X - min_val_sub) / (max_val_sub - min_val_sub)
print(min_val_sub)
print(max_val_sub)
min_val_like = np.min(y)
max_val_like = np.max(y)
y = (y - min_val_like) / (max_val_like - min_val_like)
print(min_val_like)
print(max_val_like)
model = keras.Sequential([
keras.layers.Dense(512, input_dim=X.shape[1], activation='relu'),
keras.layers.Dense(256, activation='relu'),
keras.layers.Dense(256, activation='relu'),
keras.layers.Dense(128, activation='relu'),
keras.layers.Dense(1, activation='linear'),
])
model.compile(loss='mean_absolute_error', optimizer="Adam", metrics=['mean_absolute_error'])
model.fit(X, y, epochs=int(epochs_amount), validation_split=0.3)
data = pd.read_csv("data_dev", sep=',', error_bad_lines=False,
skip_blank_lines=True, nrows=527, names=["video_id", "last_trending_date",
"publish_date", "publish_hour", "category_id",
"channel_title", "views", "likes", "dislikes",
"comment_count"]).dropna()
X_test = data.loc[:, data.columns == "views"].astype(int)
y_test = data.loc[:, data.columns == "likes"].astype(int)
min_val_sub = np.min(X_test)
max_val_sub = np.max(X_test)
X_test = (X_test - min_val_sub) / (max_val_sub - min_val_sub)
print(min_val_sub)
print(max_val_sub)
min_val_like = np.min(y_test)
max_val_like = np.max(y_test)
print(min_val_like)
print(max_val_like)
prediction = model.predict(X_test)
prediction_denormalized = []
for pred in prediction:
denorm = pred[0] * (max_val_like[0] - min_val_like[0]) + min_val_like[0]
prediction_denormalized.append(denorm)
f = open("predictions.txt", "w")
for (pred, test) in zip(prediction_denormalized, y_test.values):
f.write("predicted: %s expected: %s\n" % (str(pred), str(test[0])))
error = mean_squared_error(y_test, prediction_denormalized)
print(error)
model.save('model')
_run.log_scalar("training.metrics", error)
return error
@ex.main
def my_main(epochs_amount):
print(prepare_model())
ex.run()
ex.add_artifact("model/saved_model.pb")