Zaktualizuj 'sacred_training.py'

This commit is contained in:
Szymon Jadczak 2022-05-08 11:56:12 +02:00
parent fc0267cad2
commit 59790b4bf1

View File

@ -1,146 +1,147 @@
import tensorflow as tf import tensorflow as tf
import os import os
import pandas as pd import pandas as pd
import numpy as np import numpy as np
import csv import csv
from sklearn.model_selection import train_test_split from sklearn.model_selection import train_test_split
import sys import sys
from sacred.observers import MongoObserver from sacred.observers import MongoObserver
from sacred.observers import FileStorageObserver from sacred.observers import FileStorageObserver
from sacred import Experiment from sacred import Experiment
ex = Experiment() ex = Experiment()
#ex.observers.append(MongoObserver(url='mongodb://mongo_user:mongo_password@127.0.0.1:27017',db_name='sacred')) #ex.observers.append(MongoObserver(url='mongodb://mongo_user:mongo_password@127.0.0.1:27017',db_name='sacred'))
ex.observers.append(FileStorageObserver('training')) ex.observers.append(FileStorageObserver('training'))
epochs = int(sys.argv[1]) epochs = int(sys.argv[1])
@ex.config @ex.config
def my_config(): def my_config():
epoch = epochs epoch = epochs
layerDenseRelu = 256 layerDenseRelu = 256
layerDropout = 0.01 layerDropout = 0.01
layerDenseSoftMax = 1000.0 layerDenseSoftMax = 1000.0
#ex.add_config("config.json") #ex.add_config("config.json")
@ex.capture @ex.capture
def prepare_data(): def prepare_data():
steam=pd.read_csv('data.csv',usecols=[0,1,2,3],names=['userId','game','behavior','hoursPlayed']) steam=pd.read_csv('data.csv',usecols=[0,1,2,3],names=['userId','game','behavior','hoursPlayed'])
steam.isnull().values.any() steam.isnull().values.any()
steam['userId'] = steam.userId.astype(str) steam['userId'] = steam.userId.astype(str)
purchaseCount = steam[steam["behavior"] != "play"]["game"].value_counts() purchaseCount = steam[steam["behavior"] != "play"]["game"].value_counts()
playCount = steam[steam["behavior"] != "purchase"]["game"].value_counts() playCount = steam[steam["behavior"] != "purchase"]["game"].value_counts()
playerPurchaseCount = steam[steam["behavior"] != "play"]["userId"].value_counts() playerPurchaseCount = steam[steam["behavior"] != "play"]["userId"].value_counts()
playerPlayCount = steam[steam["behavior"] != "purchase"]["userId"].value_counts() playerPlayCount = steam[steam["behavior"] != "purchase"]["userId"].value_counts()
steam = steam[steam['behavior'] != 'purchase'] steam = steam[steam['behavior'] != 'purchase']
steam = steam.groupby("game").filter(lambda x: len(x)>10) steam = steam.groupby("game").filter(lambda x: len(x)>10)
size=int(len(steam)/10) size=int(len(steam)/10)
meanGame = steam[steam["behavior"] != "purchase"].groupby("game").mean() meanGame = steam[steam["behavior"] != "purchase"].groupby("game").mean()
meanGame = meanGame.to_dict() meanGame = meanGame.to_dict()
meanGame = meanGame['hoursPlayed'] meanGame = meanGame['hoursPlayed']
purchaseCount = purchaseCount.to_dict() purchaseCount = purchaseCount.to_dict()
playCount = playCount.to_dict() playCount = playCount.to_dict()
playerPurchaseCount = playerPurchaseCount.to_dict() playerPurchaseCount = playerPurchaseCount.to_dict()
playerPlayCount = playerPlayCount.to_dict() playerPlayCount = playerPlayCount.to_dict()
steam['meanTime'] = 0; steam['meanTime'] = 0;
steam['purchaseCount'] = 0; steam['purchaseCount'] = 0;
steam['playCount'] = 0; steam['playCount'] = 0;
steam['playerPurchaseCount'] =0; steam['playerPurchaseCount'] =0;
steam['playerPlayCount'] =0; steam['playerPlayCount'] =0;
steam['playPercent'] =0; steam['playPercent'] =0;
for i in steam.index: for i in steam.index:
steam.at[i,'meanTime'] = meanGame[steam.at[i,'game']] steam.at[i,'meanTime'] = meanGame[steam.at[i,'game']]
steam.at[i,'purchaseCount'] = purchaseCount[steam.at[i,'game']] steam.at[i,'purchaseCount'] = purchaseCount[steam.at[i,'game']]
steam.at[i,'playCount'] = playCount[steam.at[i,'game']] steam.at[i,'playCount'] = playCount[steam.at[i,'game']]
steam.at[i,'playerPurchaseCount'] = playerPurchaseCount[steam.at[i,'userId']] steam.at[i,'playerPurchaseCount'] = playerPurchaseCount[steam.at[i,'userId']]
steam.at[i,'playerPlayCount'] = playerPlayCount[steam.at[i,'userId']] steam.at[i,'playerPlayCount'] = playerPlayCount[steam.at[i,'userId']]
steam.at[i,'playPercent'] = playerPlayCount[steam.at[i,'userId']]/playerPurchaseCount[steam.at[i,'userId']] steam.at[i,'playPercent'] = playerPlayCount[steam.at[i,'userId']]/playerPurchaseCount[steam.at[i,'userId']]
steam_train, steam_test = train_test_split(steam, test_size=size, random_state=1, stratify=steam["game"]) steam_train, steam_test = train_test_split(steam, test_size=size, random_state=1, stratify=steam["game"])
steam_train, steam_dev = train_test_split(steam_train, test_size=size, random_state=1, stratify=steam_train["game"]) steam_train, steam_dev = train_test_split(steam_train, test_size=size, random_state=1, stratify=steam_train["game"])
games = {} games = {}
for i in steam['game']: for i in steam['game']:
games[i] = 0 games[i] = 0
j=0 j=0
for key,game in games.items(): for key,game in games.items():
games[key]=j games[key]=j
j=j+1 j=j+1
for i in steam['game']: for i in steam['game']:
i = games[i] i = games[i]
invGames = {v: k for k, v in games.items()} invGames = {v: k for k, v in games.items()}
x_train = steam_train[['hoursPlayed','purchaseCount','playCount','playerPlayCount','playerPurchaseCount']] x_train = steam_train[['hoursPlayed','purchaseCount','playCount','playerPlayCount','playerPurchaseCount']]
y_train = steam_train['game'] y_train = steam_train['game']
x_test = steam_test[['hoursPlayed','purchaseCount','playCount','playerPlayCount','playerPurchaseCount']] x_test = steam_test[['hoursPlayed','purchaseCount','playCount','playerPlayCount','playerPurchaseCount']]
y_test = steam_test['game'] y_test = steam_test['game']
x_train = np.array(x_train) x_train = np.array(x_train)
y_train = np.array(y_train) y_train = np.array(y_train)
x_test = np.array(x_test) x_test = np.array(x_test)
y_test = np.array(y_test) y_test = np.array(y_test)
with open('xtest.csv','w',encoding='UTF-8',newline='') as xtest: with open('xtest.csv','w',encoding='UTF-8',newline='') as xtest:
writer = csv.writer(xtest) writer = csv.writer(xtest)
for i in x_test: for i in x_test:
writer.writerow(i) writer.writerow(i)
for i,j in enumerate(y_train): for i,j in enumerate(y_train):
y_train[i] = games[j] y_train[i] = games[j]
for i,j in enumerate(y_test): for i,j in enumerate(y_test):
y_test[i] = games[j] y_test[i] = games[j]
y_train = np.array(y_train).astype(np.float32) y_train = np.array(y_train).astype(np.float32)
y_test = np.array(y_test).astype(np.float32) y_test = np.array(y_test).astype(np.float32)
return x_train, y_train, x_test, y_test, invGames np.savetxt("ytest.csv",y_test,delimiter=",",fmt='%d')
return x_train, y_train, x_test, y_test, invGames
@ex.main
def my_main(epoch,layerDenseRelu,layerDropout,layerDenseSoftMax,_run): @ex.main
x_train, y_train, x_test, y_test, invGames = prepare_data() def my_main(epoch,layerDenseRelu,layerDropout,layerDenseSoftMax,_run):
model = tf.keras.models.Sequential([ x_train, y_train, x_test, y_test, invGames = prepare_data()
tf.keras.layers.Flatten(input_shape=(5,1)), model = tf.keras.models.Sequential([
tf.keras.layers.Dense(layerDenseRelu, activation='relu'), tf.keras.layers.Flatten(input_shape=(5,1)),
tf.keras.layers.Dropout(layerDropout), tf.keras.layers.Dense(layerDenseRelu, activation='relu'),
tf.keras.layers.Dense(layerDenseSoftMax, activation='softmax') tf.keras.layers.Dropout(layerDropout),
]) tf.keras.layers.Dense(layerDenseSoftMax, activation='softmax')
])
model.compile(optimizer='adam',
loss='sparse_categorical_crossentropy', model.compile(optimizer='adam',
metrics=['accuracy']) loss='sparse_categorical_crossentropy',
metrics=['accuracy'])
model.fit(x_train, y_train, epochs=epoch)
evaluation = model.evaluate(x_test, y_test) model.fit(x_train, y_train, epochs=epoch)
_run.log_scalar("training.loss", evaluation[0]) evaluation = model.evaluate(x_test, y_test)
_run.log_scalar("training.accuracy", evaluation[1]) _run.log_scalar("training.loss", evaluation[0])
_run.log_scalar("training.accuracy", evaluation[1])
prediction = model.predict(x_test)
classes_x=np.argmax(prediction,axis=1) prediction = model.predict(x_test)
classes_x=np.argmax(prediction,axis=1)
rows = []
rows = []
for j,i in enumerate(classes_x):
row = [invGames[i],invGames[y_test[j]]] for j,i in enumerate(classes_x):
rows.append(row) row = [invGames[i],invGames[y_test[j]]]
with open('results.csv','w',encoding='UTF-8',newline='') as f: rows.append(row)
writer = csv.writer(f) with open('results.csv','w',encoding='UTF-8',newline='') as f:
writer.writerow(["predicted", "expected"]) writer = csv.writer(f)
for row in rows: writer.writerow(["predicted", "expected"])
writer.writerow(row) for row in rows:
writer.writerow(row)
model.save('./model')
ex.add_artifact('./model/saved_model.pb') model.save('./model')
ex.add_artifact('./model/saved_model.pb')
ex.run() ex.run()