Zaktualizuj 'sacred_training.py'

2022-05-08 11:56:12 +02:00 · 2022-05-08 11:56:12 +02:00 · 59790b4bf1
commit 59790b4bf1
parent fc0267cad2
1 changed files with 146 additions and 145 deletions
--- a/sacred_training.py
+++ b/sacred_training.py
@ -1,146 +1,147 @@
-import tensorflow as tf
+import tensorflow as tf
-import os
+import os
-import pandas as pd
+import pandas as pd
-import numpy as np
+import numpy as np
-import csv
+import csv
-from sklearn.model_selection import train_test_split
+from sklearn.model_selection import train_test_split
-import sys
+import sys
-from sacred.observers import MongoObserver
+from sacred.observers import MongoObserver
-from sacred.observers import FileStorageObserver
+from sacred.observers import FileStorageObserver
-from sacred import Experiment
+from sacred import Experiment
-
+
-ex = Experiment()
+ex = Experiment()
-#ex.observers.append(MongoObserver(url='mongodb://mongo_user:mongo_password@127.0.0.1:27017',db_name='sacred'))
+#ex.observers.append(MongoObserver(url='mongodb://mongo_user:mongo_password@127.0.0.1:27017',db_name='sacred'))
-ex.observers.append(FileStorageObserver('training'))
+ex.observers.append(FileStorageObserver('training'))
-epochs = int(sys.argv[1])
+epochs = int(sys.argv[1])
-
+
-@ex.config
+@ex.config
-def my_config():
+def my_config():
-    epoch = epochs
+    epoch = epochs
-    layerDenseRelu = 256
+    layerDenseRelu = 256
-    layerDropout = 0.01
+    layerDropout = 0.01
-    layerDenseSoftMax = 1000.0
+    layerDenseSoftMax = 1000.0
-
+
-#ex.add_config("config.json")
+#ex.add_config("config.json")
-
+
-@ex.capture
+@ex.capture
-def prepare_data():
+def prepare_data():
-    steam=pd.read_csv('data.csv',usecols=[0,1,2,3],names=['userId','game','behavior','hoursPlayed'])
+    steam=pd.read_csv('data.csv',usecols=[0,1,2,3],names=['userId','game','behavior','hoursPlayed'])
-    steam.isnull().values.any()
+    steam.isnull().values.any()
-    steam['userId'] = steam.userId.astype(str)
+    steam['userId'] = steam.userId.astype(str)
-    purchaseCount = steam[steam["behavior"] != "play"]["game"].value_counts()
+    purchaseCount = steam[steam["behavior"] != "play"]["game"].value_counts()
-    playCount = steam[steam["behavior"] != "purchase"]["game"].value_counts()
+    playCount = steam[steam["behavior"] != "purchase"]["game"].value_counts()
-
+
-    playerPurchaseCount = steam[steam["behavior"] != "play"]["userId"].value_counts()
+    playerPurchaseCount = steam[steam["behavior"] != "play"]["userId"].value_counts()
-    playerPlayCount = steam[steam["behavior"] != "purchase"]["userId"].value_counts()
+    playerPlayCount = steam[steam["behavior"] != "purchase"]["userId"].value_counts()
-
+
-    steam = steam[steam['behavior'] != 'purchase']
+    steam = steam[steam['behavior'] != 'purchase']
-    steam = steam.groupby("game").filter(lambda x: len(x)>10)
+    steam = steam.groupby("game").filter(lambda x: len(x)>10)
-    size=int(len(steam)/10)
+    size=int(len(steam)/10)
-
+
-    meanGame = steam[steam["behavior"] != "purchase"].groupby("game").mean()
+    meanGame = steam[steam["behavior"] != "purchase"].groupby("game").mean()
-    meanGame = meanGame.to_dict()
+    meanGame = meanGame.to_dict()
-    meanGame = meanGame['hoursPlayed']
+    meanGame = meanGame['hoursPlayed']
-
+
-    purchaseCount = purchaseCount.to_dict()
+    purchaseCount = purchaseCount.to_dict()
-    playCount = playCount.to_dict()
+    playCount = playCount.to_dict()
-    playerPurchaseCount = playerPurchaseCount.to_dict()
+    playerPurchaseCount = playerPurchaseCount.to_dict()
-    playerPlayCount = playerPlayCount.to_dict()
+    playerPlayCount = playerPlayCount.to_dict()
-
+
-    steam['meanTime'] = 0;
+    steam['meanTime'] = 0;
-    steam['purchaseCount'] = 0;
+    steam['purchaseCount'] = 0;
-    steam['playCount'] = 0;
+    steam['playCount'] = 0;
-    steam['playerPurchaseCount'] =0;
+    steam['playerPurchaseCount'] =0;
-    steam['playerPlayCount'] =0;
+    steam['playerPlayCount'] =0;
-    steam['playPercent'] =0;
+    steam['playPercent'] =0;
-
+
-    for i in steam.index:
+    for i in steam.index:
-        steam.at[i,'meanTime'] = meanGame[steam.at[i,'game']]
+        steam.at[i,'meanTime'] = meanGame[steam.at[i,'game']]
-        steam.at[i,'purchaseCount'] = purchaseCount[steam.at[i,'game']]
+        steam.at[i,'purchaseCount'] = purchaseCount[steam.at[i,'game']]
-        steam.at[i,'playCount'] = playCount[steam.at[i,'game']]
+        steam.at[i,'playCount'] = playCount[steam.at[i,'game']]
-        steam.at[i,'playerPurchaseCount'] = playerPurchaseCount[steam.at[i,'userId']]
+        steam.at[i,'playerPurchaseCount'] = playerPurchaseCount[steam.at[i,'userId']]
-        steam.at[i,'playerPlayCount'] = playerPlayCount[steam.at[i,'userId']]
+        steam.at[i,'playerPlayCount'] = playerPlayCount[steam.at[i,'userId']]
-        steam.at[i,'playPercent'] = playerPlayCount[steam.at[i,'userId']]/playerPurchaseCount[steam.at[i,'userId']]
+        steam.at[i,'playPercent'] = playerPlayCount[steam.at[i,'userId']]/playerPurchaseCount[steam.at[i,'userId']]
-
+
-    steam_train, steam_test = train_test_split(steam, test_size=size, random_state=1, stratify=steam["game"])
+    steam_train, steam_test = train_test_split(steam, test_size=size, random_state=1, stratify=steam["game"])
-    steam_train, steam_dev = train_test_split(steam_train, test_size=size, random_state=1, stratify=steam_train["game"])
+    steam_train, steam_dev = train_test_split(steam_train, test_size=size, random_state=1, stratify=steam_train["game"])
-    
+    
-    games = {}
+    games = {}
-    for i in steam['game']:
+    for i in steam['game']:
-        games[i] = 0
+        games[i] = 0
-
+
-    j=0
+    j=0
-    for key,game in games.items():
+    for key,game in games.items():
-        games[key]=j
+        games[key]=j
-        j=j+1
+        j=j+1
-
+
-    for i in steam['game']:
+    for i in steam['game']:
-        i = games[i]
+        i = games[i]
-        
+        
-    invGames = {v: k for k, v in games.items()}
+    invGames = {v: k for k, v in games.items()}
-
+
-    x_train = steam_train[['hoursPlayed','purchaseCount','playCount','playerPlayCount','playerPurchaseCount']]
+    x_train = steam_train[['hoursPlayed','purchaseCount','playCount','playerPlayCount','playerPurchaseCount']]
-    y_train = steam_train['game']
+    y_train = steam_train['game']
-
+
-    x_test = steam_test[['hoursPlayed','purchaseCount','playCount','playerPlayCount','playerPurchaseCount']]
+    x_test = steam_test[['hoursPlayed','purchaseCount','playCount','playerPlayCount','playerPurchaseCount']]
-    y_test = steam_test['game']
+    y_test = steam_test['game']
-
+
-
+
-    x_train = np.array(x_train)
+    x_train = np.array(x_train)
-    y_train = np.array(y_train)
+    y_train = np.array(y_train)
-    x_test = np.array(x_test)
+    x_test = np.array(x_test)
-    y_test = np.array(y_test)
+    y_test = np.array(y_test)
-
+
-    with open('xtest.csv','w',encoding='UTF-8',newline='') as xtest:
+    with open('xtest.csv','w',encoding='UTF-8',newline='') as xtest:
-        writer = csv.writer(xtest)
+        writer = csv.writer(xtest)
-        for i in x_test:
+        for i in x_test:
-            writer.writerow(i)
+            writer.writerow(i)
-        
+        
-    for i,j in enumerate(y_train):
+    for i,j in enumerate(y_train):
-        y_train[i] = games[j]
+        y_train[i] = games[j]
-        
+        
-    for i,j in enumerate(y_test):
+    for i,j in enumerate(y_test):
-        y_test[i] = games[j]
+        y_test[i] = games[j]
-    y_train = np.array(y_train).astype(np.float32)
+    y_train = np.array(y_train).astype(np.float32)
-    y_test = np.array(y_test).astype(np.float32)
+    y_test = np.array(y_test).astype(np.float32)
-    return x_train, y_train, x_test, y_test, invGames
+    np.savetxt("ytest.csv",y_test,delimiter=",",fmt='%d')
-
+    return x_train, y_train, x_test, y_test, invGames
-@ex.main
+
-def my_main(epoch,layerDenseRelu,layerDropout,layerDenseSoftMax,_run):
+@ex.main
-    x_train, y_train, x_test, y_test, invGames = prepare_data()
+def my_main(epoch,layerDenseRelu,layerDropout,layerDenseSoftMax,_run):
-    model = tf.keras.models.Sequential([
+    x_train, y_train, x_test, y_test, invGames = prepare_data()
-        tf.keras.layers.Flatten(input_shape=(5,1)),
+    model = tf.keras.models.Sequential([
-        tf.keras.layers.Dense(layerDenseRelu, activation='relu'),
+        tf.keras.layers.Flatten(input_shape=(5,1)),
-        tf.keras.layers.Dropout(layerDropout),
+        tf.keras.layers.Dense(layerDenseRelu, activation='relu'),
-        tf.keras.layers.Dense(layerDenseSoftMax, activation='softmax')
+        tf.keras.layers.Dropout(layerDropout),
-    ])
+        tf.keras.layers.Dense(layerDenseSoftMax, activation='softmax')
-
+    ])
-    model.compile(optimizer='adam',
+
-                  loss='sparse_categorical_crossentropy',
+    model.compile(optimizer='adam',
-                  metrics=['accuracy'])
+                  loss='sparse_categorical_crossentropy',
-
+                  metrics=['accuracy'])
-
+
-    model.fit(x_train, y_train, epochs=epoch)
+
-    evaluation = model.evaluate(x_test, y_test)
+    model.fit(x_train, y_train, epochs=epoch)
-    _run.log_scalar("training.loss", evaluation[0])
+    evaluation = model.evaluate(x_test, y_test)
-    _run.log_scalar("training.accuracy", evaluation[1])
+    _run.log_scalar("training.loss", evaluation[0])
-    
+    _run.log_scalar("training.accuracy", evaluation[1])
-    prediction = model.predict(x_test)
+    
-    classes_x=np.argmax(prediction,axis=1)
+    prediction = model.predict(x_test)
-
+    classes_x=np.argmax(prediction,axis=1)
-    rows = []
+
-
+    rows = []
-    for j,i in enumerate(classes_x):
+
-        row = [invGames[i],invGames[y_test[j]]]
+    for j,i in enumerate(classes_x):
-        rows.append(row)
+        row = [invGames[i],invGames[y_test[j]]]
-    with open('results.csv','w',encoding='UTF-8',newline='') as f:
+        rows.append(row)
-            writer = csv.writer(f)
+    with open('results.csv','w',encoding='UTF-8',newline='') as f:
-            writer.writerow(["predicted", "expected"])
+            writer = csv.writer(f)
-            for row in rows:
+            writer.writerow(["predicted", "expected"])
-                writer.writerow(row)
+            for row in rows:
-    
+                writer.writerow(row)
-    model.save('./model')
+    
-    ex.add_artifact('./model/saved_model.pb')
+    model.save('./model')
-    
+    ex.add_artifact('./model/saved_model.pb')
-
+    
 ex.run()