From 59790b4bf1658ff538fc5c46c84c0e8972097d75 Mon Sep 17 00:00:00 2001 From: Szymon Jadczak Date: Sun, 8 May 2022 11:56:12 +0200 Subject: [PATCH] Zaktualizuj 'sacred_training.py' --- sacred_training.py | 291 +++++++++++++++++++++++---------------------- 1 file changed, 146 insertions(+), 145 deletions(-) diff --git a/sacred_training.py b/sacred_training.py index cc2c304..49ccbef 100644 --- a/sacred_training.py +++ b/sacred_training.py @@ -1,146 +1,147 @@ -import tensorflow as tf -import os -import pandas as pd -import numpy as np -import csv -from sklearn.model_selection import train_test_split -import sys -from sacred.observers import MongoObserver -from sacred.observers import FileStorageObserver -from sacred import Experiment - -ex = Experiment() -#ex.observers.append(MongoObserver(url='mongodb://mongo_user:mongo_password@127.0.0.1:27017',db_name='sacred')) -ex.observers.append(FileStorageObserver('training')) -epochs = int(sys.argv[1]) - -@ex.config -def my_config(): - epoch = epochs - layerDenseRelu = 256 - layerDropout = 0.01 - layerDenseSoftMax = 1000.0 - -#ex.add_config("config.json") - -@ex.capture -def prepare_data(): - steam=pd.read_csv('data.csv',usecols=[0,1,2,3],names=['userId','game','behavior','hoursPlayed']) - steam.isnull().values.any() - steam['userId'] = steam.userId.astype(str) - purchaseCount = steam[steam["behavior"] != "play"]["game"].value_counts() - playCount = steam[steam["behavior"] != "purchase"]["game"].value_counts() - - playerPurchaseCount = steam[steam["behavior"] != "play"]["userId"].value_counts() - playerPlayCount = steam[steam["behavior"] != "purchase"]["userId"].value_counts() - - steam = steam[steam['behavior'] != 'purchase'] - steam = steam.groupby("game").filter(lambda x: len(x)>10) - size=int(len(steam)/10) - - meanGame = steam[steam["behavior"] != "purchase"].groupby("game").mean() - meanGame = meanGame.to_dict() - meanGame = meanGame['hoursPlayed'] - - purchaseCount = purchaseCount.to_dict() - playCount = playCount.to_dict() - playerPurchaseCount = playerPurchaseCount.to_dict() - playerPlayCount = playerPlayCount.to_dict() - - steam['meanTime'] = 0; - steam['purchaseCount'] = 0; - steam['playCount'] = 0; - steam['playerPurchaseCount'] =0; - steam['playerPlayCount'] =0; - steam['playPercent'] =0; - - for i in steam.index: - steam.at[i,'meanTime'] = meanGame[steam.at[i,'game']] - steam.at[i,'purchaseCount'] = purchaseCount[steam.at[i,'game']] - steam.at[i,'playCount'] = playCount[steam.at[i,'game']] - steam.at[i,'playerPurchaseCount'] = playerPurchaseCount[steam.at[i,'userId']] - steam.at[i,'playerPlayCount'] = playerPlayCount[steam.at[i,'userId']] - steam.at[i,'playPercent'] = playerPlayCount[steam.at[i,'userId']]/playerPurchaseCount[steam.at[i,'userId']] - - steam_train, steam_test = train_test_split(steam, test_size=size, random_state=1, stratify=steam["game"]) - steam_train, steam_dev = train_test_split(steam_train, test_size=size, random_state=1, stratify=steam_train["game"]) - - games = {} - for i in steam['game']: - games[i] = 0 - - j=0 - for key,game in games.items(): - games[key]=j - j=j+1 - - for i in steam['game']: - i = games[i] - - invGames = {v: k for k, v in games.items()} - - x_train = steam_train[['hoursPlayed','purchaseCount','playCount','playerPlayCount','playerPurchaseCount']] - y_train = steam_train['game'] - - x_test = steam_test[['hoursPlayed','purchaseCount','playCount','playerPlayCount','playerPurchaseCount']] - y_test = steam_test['game'] - - - x_train = np.array(x_train) - y_train = np.array(y_train) - x_test = np.array(x_test) - y_test = np.array(y_test) - - with open('xtest.csv','w',encoding='UTF-8',newline='') as xtest: - writer = csv.writer(xtest) - for i in x_test: - writer.writerow(i) - - for i,j in enumerate(y_train): - y_train[i] = games[j] - - for i,j in enumerate(y_test): - y_test[i] = games[j] - y_train = np.array(y_train).astype(np.float32) - y_test = np.array(y_test).astype(np.float32) - return x_train, y_train, x_test, y_test, invGames - -@ex.main -def my_main(epoch,layerDenseRelu,layerDropout,layerDenseSoftMax,_run): - x_train, y_train, x_test, y_test, invGames = prepare_data() - model = tf.keras.models.Sequential([ - tf.keras.layers.Flatten(input_shape=(5,1)), - tf.keras.layers.Dense(layerDenseRelu, activation='relu'), - tf.keras.layers.Dropout(layerDropout), - tf.keras.layers.Dense(layerDenseSoftMax, activation='softmax') - ]) - - model.compile(optimizer='adam', - loss='sparse_categorical_crossentropy', - metrics=['accuracy']) - - - model.fit(x_train, y_train, epochs=epoch) - evaluation = model.evaluate(x_test, y_test) - _run.log_scalar("training.loss", evaluation[0]) - _run.log_scalar("training.accuracy", evaluation[1]) - - prediction = model.predict(x_test) - classes_x=np.argmax(prediction,axis=1) - - rows = [] - - for j,i in enumerate(classes_x): - row = [invGames[i],invGames[y_test[j]]] - rows.append(row) - with open('results.csv','w',encoding='UTF-8',newline='') as f: - writer = csv.writer(f) - writer.writerow(["predicted", "expected"]) - for row in rows: - writer.writerow(row) - - model.save('./model') - ex.add_artifact('./model/saved_model.pb') - - +import tensorflow as tf +import os +import pandas as pd +import numpy as np +import csv +from sklearn.model_selection import train_test_split +import sys +from sacred.observers import MongoObserver +from sacred.observers import FileStorageObserver +from sacred import Experiment + +ex = Experiment() +#ex.observers.append(MongoObserver(url='mongodb://mongo_user:mongo_password@127.0.0.1:27017',db_name='sacred')) +ex.observers.append(FileStorageObserver('training')) +epochs = int(sys.argv[1]) + +@ex.config +def my_config(): + epoch = epochs + layerDenseRelu = 256 + layerDropout = 0.01 + layerDenseSoftMax = 1000.0 + +#ex.add_config("config.json") + +@ex.capture +def prepare_data(): + steam=pd.read_csv('data.csv',usecols=[0,1,2,3],names=['userId','game','behavior','hoursPlayed']) + steam.isnull().values.any() + steam['userId'] = steam.userId.astype(str) + purchaseCount = steam[steam["behavior"] != "play"]["game"].value_counts() + playCount = steam[steam["behavior"] != "purchase"]["game"].value_counts() + + playerPurchaseCount = steam[steam["behavior"] != "play"]["userId"].value_counts() + playerPlayCount = steam[steam["behavior"] != "purchase"]["userId"].value_counts() + + steam = steam[steam['behavior'] != 'purchase'] + steam = steam.groupby("game").filter(lambda x: len(x)>10) + size=int(len(steam)/10) + + meanGame = steam[steam["behavior"] != "purchase"].groupby("game").mean() + meanGame = meanGame.to_dict() + meanGame = meanGame['hoursPlayed'] + + purchaseCount = purchaseCount.to_dict() + playCount = playCount.to_dict() + playerPurchaseCount = playerPurchaseCount.to_dict() + playerPlayCount = playerPlayCount.to_dict() + + steam['meanTime'] = 0; + steam['purchaseCount'] = 0; + steam['playCount'] = 0; + steam['playerPurchaseCount'] =0; + steam['playerPlayCount'] =0; + steam['playPercent'] =0; + + for i in steam.index: + steam.at[i,'meanTime'] = meanGame[steam.at[i,'game']] + steam.at[i,'purchaseCount'] = purchaseCount[steam.at[i,'game']] + steam.at[i,'playCount'] = playCount[steam.at[i,'game']] + steam.at[i,'playerPurchaseCount'] = playerPurchaseCount[steam.at[i,'userId']] + steam.at[i,'playerPlayCount'] = playerPlayCount[steam.at[i,'userId']] + steam.at[i,'playPercent'] = playerPlayCount[steam.at[i,'userId']]/playerPurchaseCount[steam.at[i,'userId']] + + steam_train, steam_test = train_test_split(steam, test_size=size, random_state=1, stratify=steam["game"]) + steam_train, steam_dev = train_test_split(steam_train, test_size=size, random_state=1, stratify=steam_train["game"]) + + games = {} + for i in steam['game']: + games[i] = 0 + + j=0 + for key,game in games.items(): + games[key]=j + j=j+1 + + for i in steam['game']: + i = games[i] + + invGames = {v: k for k, v in games.items()} + + x_train = steam_train[['hoursPlayed','purchaseCount','playCount','playerPlayCount','playerPurchaseCount']] + y_train = steam_train['game'] + + x_test = steam_test[['hoursPlayed','purchaseCount','playCount','playerPlayCount','playerPurchaseCount']] + y_test = steam_test['game'] + + + x_train = np.array(x_train) + y_train = np.array(y_train) + x_test = np.array(x_test) + y_test = np.array(y_test) + + with open('xtest.csv','w',encoding='UTF-8',newline='') as xtest: + writer = csv.writer(xtest) + for i in x_test: + writer.writerow(i) + + for i,j in enumerate(y_train): + y_train[i] = games[j] + + for i,j in enumerate(y_test): + y_test[i] = games[j] + y_train = np.array(y_train).astype(np.float32) + y_test = np.array(y_test).astype(np.float32) + np.savetxt("ytest.csv",y_test,delimiter=",",fmt='%d') + return x_train, y_train, x_test, y_test, invGames + +@ex.main +def my_main(epoch,layerDenseRelu,layerDropout,layerDenseSoftMax,_run): + x_train, y_train, x_test, y_test, invGames = prepare_data() + model = tf.keras.models.Sequential([ + tf.keras.layers.Flatten(input_shape=(5,1)), + tf.keras.layers.Dense(layerDenseRelu, activation='relu'), + tf.keras.layers.Dropout(layerDropout), + tf.keras.layers.Dense(layerDenseSoftMax, activation='softmax') + ]) + + model.compile(optimizer='adam', + loss='sparse_categorical_crossentropy', + metrics=['accuracy']) + + + model.fit(x_train, y_train, epochs=epoch) + evaluation = model.evaluate(x_test, y_test) + _run.log_scalar("training.loss", evaluation[0]) + _run.log_scalar("training.accuracy", evaluation[1]) + + prediction = model.predict(x_test) + classes_x=np.argmax(prediction,axis=1) + + rows = [] + + for j,i in enumerate(classes_x): + row = [invGames[i],invGames[y_test[j]]] + rows.append(row) + with open('results.csv','w',encoding='UTF-8',newline='') as f: + writer = csv.writer(f) + writer.writerow(["predicted", "expected"]) + for row in rows: + writer.writerow(row) + + model.save('./model') + ex.add_artifact('./model/saved_model.pb') + + ex.run() \ No newline at end of file