import tensorflow as tf import os import pandas as pd import numpy as np import csv from sklearn.model_selection import train_test_split import sys from sacred.observers import MongoObserver from sacred.observers import FileStorageObserver from sacred import Experiment ex = Experiment("444386 sacred_scopes", interactive=True, save_git_info=False) ex.observers.append(MongoObserver(url='mongodb://admin:IUM_2021@172.17.0.1:27017',db_name='sacred')) ex.observers.append(FileStorageObserver('training')) epochs = int(sys.argv[1]) @ex.config def my_config(): epoch = epochs layerDenseRelu = 256 layerDropout = 0.01 layerDenseSoftMax = 1000.0 #ex.add_config("config.json") @ex.capture def prepare_data(): steam=pd.read_csv('data.csv',usecols=[0,1,2,3],names=['userId','game','behavior','hoursPlayed']) steam.isnull().values.any() steam['userId'] = steam.userId.astype(str) purchaseCount = steam[steam["behavior"] != "play"]["game"].value_counts() playCount = steam[steam["behavior"] != "purchase"]["game"].value_counts() playerPurchaseCount = steam[steam["behavior"] != "play"]["userId"].value_counts() playerPlayCount = steam[steam["behavior"] != "purchase"]["userId"].value_counts() steam = steam[steam['behavior'] != 'purchase'] steam = steam.groupby("game").filter(lambda x: len(x)>10) size=int(len(steam)/10) meanGame = steam[steam["behavior"] != "purchase"].groupby("game").mean() meanGame = meanGame.to_dict() meanGame = meanGame['hoursPlayed'] purchaseCount = purchaseCount.to_dict() playCount = playCount.to_dict() playerPurchaseCount = playerPurchaseCount.to_dict() playerPlayCount = playerPlayCount.to_dict() steam['meanTime'] = 0; steam['purchaseCount'] = 0; steam['playCount'] = 0; steam['playerPurchaseCount'] =0; steam['playerPlayCount'] =0; steam['playPercent'] =0; for i in steam.index: steam.at[i,'meanTime'] = meanGame[steam.at[i,'game']] steam.at[i,'purchaseCount'] = purchaseCount[steam.at[i,'game']] steam.at[i,'playCount'] = playCount[steam.at[i,'game']] steam.at[i,'playerPurchaseCount'] = playerPurchaseCount[steam.at[i,'userId']] steam.at[i,'playerPlayCount'] = playerPlayCount[steam.at[i,'userId']] steam.at[i,'playPercent'] = playerPlayCount[steam.at[i,'userId']]/playerPurchaseCount[steam.at[i,'userId']] steam_train, steam_test = train_test_split(steam, test_size=size, random_state=1, stratify=steam["game"]) steam_train, steam_dev = train_test_split(steam_train, test_size=size, random_state=1, stratify=steam_train["game"]) games = {} for i in steam['game']: games[i] = 0 j=0 for key,game in games.items(): games[key]=j j=j+1 for i in steam['game']: i = games[i] invGames = {v: k for k, v in games.items()} x_train = steam_train[['hoursPlayed','purchaseCount','playCount','playerPlayCount','playerPurchaseCount']] y_train = steam_train['game'] x_test = steam_test[['hoursPlayed','purchaseCount','playCount','playerPlayCount','playerPurchaseCount']] y_test = steam_test['game'] x_train = np.array(x_train) y_train = np.array(y_train) x_test = np.array(x_test) y_test = np.array(y_test) with open('xtest.csv','w',encoding='UTF-8',newline='') as xtest: writer = csv.writer(xtest) for i in x_test: writer.writerow(i) for i,j in enumerate(y_train): y_train[i] = games[j] for i,j in enumerate(y_test): y_test[i] = games[j] y_train = np.array(y_train).astype(np.float32) y_test = np.array(y_test).astype(np.float32) np.savetxt("ytest.csv",y_test,delimiter=",",fmt='%d') return x_train, y_train, x_test, y_test, invGames @ex.main def my_main(epoch,layerDenseRelu,layerDropout,layerDenseSoftMax,_run): x_train, y_train, x_test, y_test, invGames = prepare_data() model = tf.keras.models.Sequential([ tf.keras.layers.Flatten(input_shape=(5,1)), tf.keras.layers.Dense(layerDenseRelu, activation='relu'), tf.keras.layers.Dropout(layerDropout), tf.keras.layers.Dense(layerDenseSoftMax, activation='softmax') ]) model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy']) model.fit(x_train, y_train, epochs=epoch) evaluation = model.evaluate(x_test, y_test) _run.log_scalar("training.loss", evaluation[0]) _run.log_scalar("training.accuracy", evaluation[1]) prediction = model.predict(x_test) classes_x=np.argmax(prediction,axis=1) rows = [] for j,i in enumerate(classes_x): row = [invGames[i],invGames[y_test[j]]] rows.append(row) with open('results.csv','w',encoding='UTF-8',newline='') as f: writer = csv.writer(f) writer.writerow(["predicted", "expected"]) for row in rows: writer.writerow(row) model.save('./model') ex.add_artifact('./model/saved_model.pb') ex.run()