biblioteki_dl
This commit is contained in:
parent
8863f36ca8
commit
ea5a76edbc
@ -7,5 +7,6 @@ RUN mkdir /.kaggle && chmod o+w /.kaggle
|
||||
RUN pip3 install pandas
|
||||
RUN pip3 install numpy
|
||||
RUN pip3 install sklearn
|
||||
RUN pip3 install tensorflow
|
||||
COPY ./steam-200k.csv ./
|
||||
COPY ./kagle.py ./
|
||||
COPY ./biblioteki_dl.py ./
|
121
biblioteki_dl.py
Normal file
121
biblioteki_dl.py
Normal file
@ -0,0 +1,121 @@
|
||||
import tensorflow as tf
|
||||
import os
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import csv
|
||||
from sklearn.model_selection import train_test_split
|
||||
|
||||
os.system("kaggle datasets download -d tamber/steam-video-games")
|
||||
os.system("unzip -o steam-video-games.zip")
|
||||
|
||||
steam=pd.read_csv('steam-200k.csv',usecols=[0,1,2,3],names=['userId','game','behavior','hoursPlayed'])
|
||||
steam.isnull().values.any()
|
||||
steam['userId'] = steam.userId.astype(str)
|
||||
purchaseCount = steam[steam["behavior"] != "play"]["game"].value_counts()
|
||||
playCount = steam[steam["behavior"] != "purchase"]["game"].value_counts()
|
||||
|
||||
playerPurchaseCount = steam[steam["behavior"] != "play"]["userId"].value_counts()
|
||||
playerPlayCount = steam[steam["behavior"] != "purchase"]["userId"].value_counts()
|
||||
|
||||
steam = steam[steam['behavior'] != 'purchase']
|
||||
steam = steam.groupby("game").filter(lambda x: len(x)>10)
|
||||
size=int(len(steam)/10)
|
||||
|
||||
meanGame = steam[steam["behavior"] != "purchase"].groupby("game").mean()
|
||||
meanGame = meanGame.to_dict()
|
||||
meanGame = meanGame['hoursPlayed']
|
||||
|
||||
purchaseCount = purchaseCount.to_dict()
|
||||
playCount = playCount.to_dict()
|
||||
playerPurchaseCount = playerPurchaseCount.to_dict()
|
||||
playerPlayCount = playerPlayCount.to_dict()
|
||||
|
||||
steam['meanTime'] = 0;
|
||||
steam['purchaseCount'] = 0;
|
||||
steam['playCount'] = 0;
|
||||
steam['playerPurchaseCount'] =0;
|
||||
steam['playerPlayCount'] =0;
|
||||
steam['playPercent'] =0;
|
||||
|
||||
for i in steam.index:
|
||||
steam.at[i,'meanTime'] = meanGame[steam.at[i,'game']]
|
||||
steam.at[i,'purchaseCount'] = purchaseCount[steam.at[i,'game']]
|
||||
steam.at[i,'playCount'] = playCount[steam.at[i,'game']]
|
||||
steam.at[i,'playerPurchaseCount'] = playerPurchaseCount[steam.at[i,'userId']]
|
||||
steam.at[i,'playerPlayCount'] = playerPlayCount[steam.at[i,'userId']]
|
||||
steam.at[i,'playPercent'] = playerPlayCount[steam.at[i,'userId']]/playerPurchaseCount[steam.at[i,'userId']]
|
||||
|
||||
|
||||
steam_train, steam_test = train_test_split(steam, test_size=size, random_state=1, stratify=steam["game"])
|
||||
steam_train, steam_dev = train_test_split(steam_train, test_size=size, random_state=1, stratify=steam_train["game"])
|
||||
|
||||
print(steam)
|
||||
|
||||
games = {}
|
||||
for i in steam['game']:
|
||||
games[i] = 0
|
||||
|
||||
j=0
|
||||
for key,game in games.items():
|
||||
games[key]=j
|
||||
j=j+1
|
||||
|
||||
for i in steam['game']:
|
||||
i = games[i]
|
||||
|
||||
invGames = {v: k for k, v in games.items()}
|
||||
|
||||
x_train = steam_train[['hoursPlayed','purchaseCount','playCount','playerPlayCount','playerPurchaseCount']]
|
||||
y_train = steam_train['game']
|
||||
|
||||
x_test = steam_test[['hoursPlayed','purchaseCount','playCount','playerPlayCount','playerPurchaseCount']]
|
||||
y_test = steam_test['game']
|
||||
|
||||
x_train = np.array(x_train)
|
||||
y_train = np.array(y_train)
|
||||
x_test = np.array(x_test)
|
||||
y_test = np.array(y_test)
|
||||
|
||||
for i,j in enumerate(y_train):
|
||||
y_train[i] = games[j]
|
||||
|
||||
for i,j in enumerate(y_test):
|
||||
y_test[i] = games[j]
|
||||
|
||||
|
||||
|
||||
model = tf.keras.models.Sequential([
|
||||
tf.keras.layers.Flatten(input_shape=(5,1)),
|
||||
tf.keras.layers.Dense(256, activation='relu'),
|
||||
tf.keras.layers.Dropout(0.01),
|
||||
tf.keras.layers.Dense(1000, activation='softmax')
|
||||
])
|
||||
|
||||
|
||||
|
||||
model.compile(optimizer='adam',
|
||||
loss='sparse_categorical_crossentropy',
|
||||
metrics=['accuracy'])
|
||||
|
||||
y_train = np.array(y_train).astype(np.float32)
|
||||
y_test = np.array(y_test).astype(np.float32)
|
||||
|
||||
|
||||
|
||||
model.fit(x_train, y_train, epochs=100)
|
||||
model.evaluate(x_test, y_test)
|
||||
prediction = model.predict(x_test)
|
||||
classes_x=np.argmax(prediction,axis=1)
|
||||
|
||||
rows = []
|
||||
|
||||
for j,i in enumerate(classes_x):
|
||||
row = [invGames[i],invGames[y_test[j]]]
|
||||
rows.append(row)
|
||||
with open('results.csv','w',encoding='UTF-8',newline='') as f:
|
||||
writer = csv.writer(f)
|
||||
writer.writerow(["predicted", "expected"])
|
||||
for row in rows:
|
||||
writer.writerow(row)
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user