2021-05-15 11:50:27 +02:00
|
|
|
#! /usr/bin/python3
|
2021-05-16 19:48:11 +02:00
|
|
|
import sys
|
2021-05-15 11:50:27 +02:00
|
|
|
import pandas as pd
|
|
|
|
import numpy as np
|
2021-05-16 19:48:11 +02:00
|
|
|
from sklearn import preprocessing
|
|
|
|
from sklearn.linear_model import LinearRegression
|
|
|
|
from sklearn.metrics import mean_squared_error
|
|
|
|
import tensorflow as tf
|
|
|
|
from tensorflow import keras
|
|
|
|
from tensorflow.keras.layers import Input, Dense, Activation,Dropout
|
|
|
|
from tensorflow.keras.models import Model
|
|
|
|
from tensorflow.keras.callbacks import EarlyStopping
|
|
|
|
from tensorflow.keras.models import Sequential
|
2021-05-15 11:50:27 +02:00
|
|
|
from sacred.observers import FileStorageObserver
|
|
|
|
from sacred import Experiment
|
|
|
|
from datetime import datetime
|
|
|
|
import os
|
|
|
|
|
|
|
|
ex = Experiment("ium_s434695", interactive=False)
|
|
|
|
|
|
|
|
ex.observers.append(FileStorageObserver('ium_s434695/my_runs'))
|
|
|
|
|
|
|
|
@ex.config
|
|
|
|
def my_config():
|
|
|
|
train_size_param = 0.8
|
|
|
|
test_size_param = 0.2
|
|
|
|
|
|
|
|
@ex.capture
|
|
|
|
def prepare_model(train_size_param, test_size_param, _run):
|
|
|
|
_run.info["prepare_model_ts"] = str(datetime.now())
|
|
|
|
|
|
|
|
url = 'https://git.wmi.amu.edu.pl/s434695/ium_434695/raw/commit/2301fb86e434734376f73503307a8f3255a75cc6/vgsales.csv'
|
|
|
|
r = requests.get(url, allow_redirects=True)
|
|
|
|
|
|
|
|
open('vgsales.csv', 'wb').write(r.content)
|
|
|
|
df = pd.read_csv('vgsales.csv')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def regression_model():
|
|
|
|
model = Sequential()
|
|
|
|
model.add(Dense(32,activation = "relu", input_shape = (x_train.shape[1],)))
|
|
|
|
model.add(Dense(64,activation = "relu"))
|
|
|
|
model.add(Dense(1,activation = "relu"))
|
|
|
|
|
|
|
|
model.compile(optimizer = "adam", loss = "mean_squared_error")
|
|
|
|
return model
|
|
|
|
|
|
|
|
df['Nintendo'] = df['Publisher'].apply(lambda x: 1 if x=='Nintendo' else 0)
|
|
|
|
df = df.drop(['Rank','Name','Platform','Year','Genre','Publisher'],axis = 1)
|
|
|
|
df
|
|
|
|
|
|
|
|
y = df.Nintendo
|
|
|
|
|
|
|
|
df=((df-df.min())/(df.max()-df.min()))
|
|
|
|
|
|
|
|
x = df.drop(['Nintendo'],axis = 1)
|
|
|
|
|
|
|
|
x_train, x_test, y_train, y_test = train_test_split(x,y , test_size=0.2,train_size=0.8, random_state=21)
|
|
|
|
|
|
|
|
model = regression_model()
|
|
|
|
model.fit(x_train, y_train, epochs = 600, verbose = 1)
|
|
|
|
|
|
|
|
y_pred = model.predict(x_test)
|
|
|
|
|
|
|
|
y_pred[:5]
|
|
|
|
|
|
|
|
y_pred = np.around(y_pred, decimals=0)
|
|
|
|
|
|
|
|
y_pred[:5]
|
|
|
|
|
|
|
|
return(classification_report(y_test,y_pred))
|
|
|
|
|
|
|
|
@ex.main
|
|
|
|
def my_main(train_size_param, test_size_param):
|
|
|
|
print(prepare_model())
|
|
|
|
|
|
|
|
|
|
|
|
r = ex.run()
|
|
|
|
ex.add_artifact("vgsales_model/saved_model/saved_model.pb")
|