2021-05-15 19:14:37 +02:00
|
|
|
import sys
|
2021-05-15 11:50:27 +02:00
|
|
|
import pandas as pd
|
|
|
|
import numpy as np
|
2021-05-15 19:14:37 +02:00
|
|
|
from sklearn import preprocessing
|
|
|
|
from sklearn.linear_model import LinearRegression
|
|
|
|
from sklearn.metrics import mean_squared_error
|
|
|
|
import tensorflow as tf
|
|
|
|
from tensorflow import keras
|
|
|
|
from tensorflow.keras.layers import Input, Dense, Activation,Dropout
|
|
|
|
from tensorflow.keras.models import Model
|
|
|
|
from tensorflow.keras.callbacks import EarlyStopping
|
|
|
|
from tensorflow.keras.models import Sequential
|
2021-05-16 19:48:11 +02:00
|
|
|
from sacred import Experiment
|
|
|
|
from datetime import datetime
|
|
|
|
from sacred.observers import FileStorageObserver
|
|
|
|
from sacred.observers import MongoObserver
|
|
|
|
import pymongo
|
2021-05-15 19:14:37 +02:00
|
|
|
|
2021-05-16 19:48:11 +02:00
|
|
|
ex = Experiment("434695-mongo", interactive=False, save_git_info=False)
|
|
|
|
ex.observers.append(MongoObserver(url='mongodb://mongo_user:mongo_password_IUM_2021@172.17.0.1:27017', db_name='sacred'))
|
|
|
|
ex.observers.append(FileStorageObserver('my_runs'))
|
2021-05-15 19:14:37 +02:00
|
|
|
|
|
|
|
|
2021-05-16 19:48:11 +02:00
|
|
|
@ex.config
|
|
|
|
def my_config():
|
|
|
|
batch_param = int(sys.argv[1])
|
|
|
|
epoch_param = int(sys.argv[2])
|
2021-05-15 19:14:37 +02:00
|
|
|
|
2021-05-16 22:00:03 +02:00
|
|
|
@ex.capture
|
2021-05-16 21:50:54 +02:00
|
|
|
def prepare_model(epoch_param, batch_param, _run):
|
2021-05-16 19:48:11 +02:00
|
|
|
_run.info["prepare_model_ts"] = str(datetime.now())
|
2021-05-16 22:00:03 +02:00
|
|
|
|
2021-05-16 19:48:11 +02:00
|
|
|
vgsales_train = pd.read_csv('train.csv')
|
|
|
|
vgsales_test = pd.read_csv('test.csv')
|
|
|
|
vgsales_dev = pd.read_csv('dev.csv')
|
2021-05-15 19:14:37 +02:00
|
|
|
|
2021-05-16 19:48:11 +02:00
|
|
|
vgsales_train['Nintendo'] = vgsales_train['Publisher'].apply(lambda x: 1 if x=='Nintendo' else 0)
|
|
|
|
vgsales_test['Nintendo'] = vgsales_test['Publisher'].apply(lambda x: 1 if x=='Nintendo' else 0)
|
|
|
|
vgsales_dev['Nintendo'] = vgsales_dev['Publisher'].apply(lambda x: 1 if x=='Nintendo' else 0)
|
2021-05-15 19:14:37 +02:00
|
|
|
|
2021-05-16 19:48:11 +02:00
|
|
|
X_train = vgsales_train.drop(['Rank','Name','Platform','Year','Genre','Publisher'],axis = 1)
|
|
|
|
y_train = vgsales_train[['Nintendo']]
|
|
|
|
X_test = vgsales_test.drop(['Rank','Name','Platform','Year','Genre','Publisher'],axis = 1)
|
|
|
|
y_test = vgsales_test[['Nintendo']]
|
2021-05-15 19:14:37 +02:00
|
|
|
|
2021-05-16 19:48:11 +02:00
|
|
|
print(X_train.shape[1])
|
2021-05-16 22:00:03 +02:00
|
|
|
|
2021-05-16 19:48:11 +02:00
|
|
|
model = Sequential()
|
|
|
|
model.add(Dense(9, input_dim = X_train.shape[1], kernel_initializer='normal', activation='relu'))
|
|
|
|
model.add(Dense(1,kernel_initializer='normal', activation='sigmoid'))
|
2021-05-15 19:14:37 +02:00
|
|
|
|
2021-05-16 19:48:11 +02:00
|
|
|
early_stop = EarlyStopping(monitor="val_loss", mode="min", verbose=1, patience=10)
|
2021-05-15 19:14:37 +02:00
|
|
|
|
2021-05-16 19:48:11 +02:00
|
|
|
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
|
|
|
|
|
|
|
|
epochs = int(sys.argv[1])
|
|
|
|
batch_size = int(sys.argv[2])
|
|
|
|
|
|
|
|
model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, validation_data=(X_test, y_test))
|
|
|
|
|
2021-05-16 22:00:03 +02:00
|
|
|
prediction = model.predict(X_test)
|
|
|
|
|
|
|
|
rmse = mean_squared_error(y_test, prediction)
|
|
|
|
_run.log_scalar("rmse", rmse)
|
|
|
|
|
2021-05-16 19:48:11 +02:00
|
|
|
model.save('vgsales_model.h5')
|
|
|
|
|
2021-05-16 22:00:03 +02:00
|
|
|
return rmse
|
|
|
|
|
2021-05-16 19:48:11 +02:00
|
|
|
@ex.main
|
|
|
|
def my_main(epoch_param, batch_param):
|
2021-05-16 21:50:54 +02:00
|
|
|
print(prepare_model())
|
2021-05-16 19:48:11 +02:00
|
|
|
|
|
|
|
r = ex.run()
|
|
|
|
ex.add_artifact("vgsales_model.h5")
|