Compare commits
52 Commits
master
...
training-e
Author | SHA1 | Date | |
---|---|---|---|
e64175dbb2 | |||
95d1232a8f | |||
c0b179f47e | |||
abbca96dbe | |||
82d9e078dc | |||
d88133827f | |||
5314f1039f | |||
8aedc5a1e1 | |||
fc3caf4d57 | |||
c893a1f348 | |||
f57843e875 | |||
e7efd18cec | |||
e56e70ebf7 | |||
e3fd58cf37 | |||
59790b4bf1 | |||
fc0267cad2 | |||
bfc5bdffc2 | |||
55be77b806 | |||
a26ffe67fe | |||
11bdcb2a23 | |||
a3c5996c9f | |||
9bdf7be638 | |||
50f3849829 | |||
|
f416349645 | ||
d7af94d7f9 | |||
f90df94db5 | |||
659f72e27d | |||
cc8e0d3630 | |||
97dc0891c0 | |||
6e150f2009 | |||
7395076989 | |||
|
76a6537844 | ||
|
dcb52af614 | ||
|
f59e2e540b | ||
|
9d70bec54e | ||
|
a2e4417a02 | ||
|
78da89f86f | ||
|
b693a63331 | ||
|
1ea4cf0f27 | ||
|
a5e5ba743d | ||
|
b00a5c3f37 | ||
|
9dbac84880 | ||
|
edb6b8b3b2 | ||
|
80f0fbf88a | ||
|
d07c6fd4a3 | ||
|
fdd9858321 | ||
|
93d69f32f8 | ||
|
f6f6017d98 | ||
|
129c498b2d | ||
|
88514fa942 | ||
|
690e59ac0c | ||
|
27c2cb7956 |
@ -9,5 +9,6 @@ RUN pip3 install numpy
|
|||||||
RUN pip3 install sklearn
|
RUN pip3 install sklearn
|
||||||
RUN pip3 install tensorflow
|
RUN pip3 install tensorflow
|
||||||
RUN pip3 install matplotlib
|
RUN pip3 install matplotlib
|
||||||
COPY ./steam-200k.csv ./
|
RUN pip3 install sacred
|
||||||
COPY ./biblioteki_dl.py ./
|
RUN pip3 install pymongo
|
||||||
|
RUN pip3 install mlflow
|
||||||
|
46
Jenkinsfile
vendored
46
Jenkinsfile
vendored
@ -1,46 +0,0 @@
|
|||||||
pipeline {
|
|
||||||
parameters {
|
|
||||||
string(
|
|
||||||
defaultValue: 'szymonjadczak',
|
|
||||||
description: 'Kaggle username',
|
|
||||||
name: 'KAGGLE_USERNAME',
|
|
||||||
trim: false
|
|
||||||
)
|
|
||||||
password(
|
|
||||||
defaultValue: '',
|
|
||||||
description: 'Kaggle token taken from kaggle.json file, as described in https://github.com/Kaggle/kaggle-api#api-credentials',
|
|
||||||
name: 'KAGGLE_KEY'
|
|
||||||
)
|
|
||||||
string(
|
|
||||||
defaultValue: '',
|
|
||||||
description: 'Value for head command',
|
|
||||||
name: 'CUTOFF'
|
|
||||||
)
|
|
||||||
}
|
|
||||||
environment {
|
|
||||||
KAGGLE_USERNAME="$params.KAGGLE_USERNAME"
|
|
||||||
KAGGLE_KEY="$params.KAGGLE_KEY"
|
|
||||||
CUTOFF="$params.CUTOFF"
|
|
||||||
}
|
|
||||||
agent {
|
|
||||||
dockerfile {
|
|
||||||
additionalBuildArgs "-t ium"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
stages {
|
|
||||||
stage('Stage 1') {
|
|
||||||
steps {
|
|
||||||
echo 'Hello world!!!'
|
|
||||||
checkout([$class: 'GitSCM', branches: [[name: '*/master']], extensions: [], userRemoteConfigs: [[url: 'https://git.wmi.amu.edu.pl/s444386/ium_444386']]])
|
|
||||||
sh "chmod u+x ./dataset_download.sh"
|
|
||||||
sh "KAGGLE_USERNAME=${KAGGLE_USERNAME} KAGGLE_KEY=${KAGGLE_KEY} CUTOFF=${CUTOFF} ./dataset_download.sh"
|
|
||||||
archiveArtifacts 'data.csv'
|
|
||||||
}
|
|
||||||
}
|
|
||||||
stage('Stage 2') {
|
|
||||||
steps {
|
|
||||||
build job: 's444386-training/training-evaluation/'
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
21
Jenkinsfile2
21
Jenkinsfile2
@ -1,21 +0,0 @@
|
|||||||
pipeline{
|
|
||||||
agent {
|
|
||||||
docker { image 'ium' }
|
|
||||||
}
|
|
||||||
parameters {
|
|
||||||
buildSelector(
|
|
||||||
defaultSelector: lastSuccessful(),
|
|
||||||
description: 'Which build to use for copying artifacts',
|
|
||||||
name: 'BUILD_SELECTOR')
|
|
||||||
}
|
|
||||||
stages{
|
|
||||||
stage('copy artefacts') {
|
|
||||||
steps {
|
|
||||||
copyArtifacts filter: 'data.csv', fingerprintArtifacts: true, projectName: 's444386-create-dataset', selector: lastSuccessful()
|
|
||||||
sh 'chmod u+x ./kagle.py'
|
|
||||||
sh 'python3 kagle.py'
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
42
Jenkinsfile_evaluation
Normal file
42
Jenkinsfile_evaluation
Normal file
@ -0,0 +1,42 @@
|
|||||||
|
pipeline {
|
||||||
|
agent {
|
||||||
|
dockerfile true
|
||||||
|
}
|
||||||
|
parameters {
|
||||||
|
gitParameter branchFilter: 'origin/(.*)', defaultValue: 'training-evaluation', name: 'BRANCH', type: 'PT_BRANCH'
|
||||||
|
buildSelector(
|
||||||
|
defaultSelector: upstream(),
|
||||||
|
description: 'Which build to use for copying artifacts',
|
||||||
|
name: 'BUILD_SELECTOR'
|
||||||
|
)
|
||||||
|
}
|
||||||
|
stages {
|
||||||
|
stage('Stage 1') {
|
||||||
|
steps {
|
||||||
|
git branch: "${params.BRANCH}", url: 'https://git.wmi.amu.edu.pl/s444386/ium_444386.git'
|
||||||
|
copyArtifacts filter: 'model.tar.gz', projectName: "s444386-training/${BRANCH}/", selector: buildParameter('BUILD_SELECTOR')
|
||||||
|
copyArtifacts filter: 'xtest.csv', projectName: "s444386-training/${BRANCH}/", selector: buildParameter('BUILD_SELECTOR')
|
||||||
|
copyArtifacts filter: 'ytest.csv', projectName: "s444386-training/${BRANCH}/", selector: buildParameter('BUILD_SELECTOR')
|
||||||
|
copyArtifacts filter: 'eval_results.txt', projectName: 's444386-evaluation/training-evaluation/', optional: true
|
||||||
|
sh 'tar xvzf model.tar.gz'
|
||||||
|
sh 'python3 evaluation.py'
|
||||||
|
archiveArtifacts 'evaluation_acuraccy.txt'
|
||||||
|
archiveArtifacts 'accuraccy.png'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
post {
|
||||||
|
success {
|
||||||
|
emailext body: 'SUCCESS', subject: 's444386-eval-status', to: 'e19191c5.uam.onmicrosoft.com@emea.teams.ms'
|
||||||
|
}
|
||||||
|
failure {
|
||||||
|
emailext body: 'FAILURE', subject: 's444386-eval-status', to: 'e19191c5.uam.onmicrosoft.com@emea.teams.ms'
|
||||||
|
}
|
||||||
|
unstable {
|
||||||
|
emailext body: 'UNSTABLE', subject: 's444386-eval-status', to: 'e19191c5.uam.onmicrosoft.com@emea.teams.ms'
|
||||||
|
}
|
||||||
|
changed {
|
||||||
|
emailext body: 'CHANGED', subject: 's444386-eval-status', to: 'e19191c5.uam.onmicrosoft.com@emea.teams.ms'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
43
Jenkinsfile_train
Normal file
43
Jenkinsfile_train
Normal file
@ -0,0 +1,43 @@
|
|||||||
|
pipeline {
|
||||||
|
agent {
|
||||||
|
dockerfile true
|
||||||
|
}
|
||||||
|
parameters {
|
||||||
|
string(
|
||||||
|
defaultValue: '5',
|
||||||
|
description: 'learning iterations',
|
||||||
|
name: 'epoch'
|
||||||
|
)
|
||||||
|
}
|
||||||
|
stages {
|
||||||
|
stage('Stage 1') {
|
||||||
|
steps {
|
||||||
|
copyArtifacts filter: 'data.csv', fingerprintArtifacts: true, projectName: 's444386-create-dataset', selector: lastSuccessful()
|
||||||
|
sh 'chmod u+x ./sacred_training.py'
|
||||||
|
sh 'python3 sacred_training.py $epoch'
|
||||||
|
sh 'tar -czf model.tar.gz model/'
|
||||||
|
archiveArtifacts 'model.tar.gz'
|
||||||
|
archiveArtifacts 'xtest.csv'
|
||||||
|
archiveArtifacts 'ytest.csv'
|
||||||
|
dir('training') {
|
||||||
|
archiveArtifacts artifacts: '**/**'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
post {
|
||||||
|
success {
|
||||||
|
emailext body: 'SUCCESS', subject: 's444386-train-status', to: 'e19191c5.uam.onmicrosoft.com@emea.teams.ms'
|
||||||
|
build job: 's444386-evaluation/training-evaluation/'
|
||||||
|
}
|
||||||
|
failure {
|
||||||
|
emailext body: 'FAILURE', subject: 's444386-train-status', to: 'e19191c5.uam.onmicrosoft.com@emea.teams.ms'
|
||||||
|
}
|
||||||
|
unstable {
|
||||||
|
emailext body: 'UNSTABLE', subject: 's444386-train-status', to: 'e19191c5.uam.onmicrosoft.com@emea.teams.ms'
|
||||||
|
}
|
||||||
|
changed {
|
||||||
|
emailext body: 'CHANGED', subject: 's444386-train-status', to: 'e19191c5.uam.onmicrosoft.com@emea.teams.ms'
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
11
MLproject
11
MLproject
@ -1,11 +0,0 @@
|
|||||||
name: MLflow
|
|
||||||
|
|
||||||
|
|
||||||
docker_env:
|
|
||||||
image: szymonjadczak/mlflow:latest
|
|
||||||
|
|
||||||
entry_points:
|
|
||||||
main:
|
|
||||||
parameters:
|
|
||||||
epochs: {type: float, default: 3}
|
|
||||||
command: "python ./biblioteki_dl.py {epochs}"
|
|
@ -4,11 +4,14 @@ import pandas as pd
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
import csv
|
import csv
|
||||||
from sklearn.model_selection import train_test_split
|
from sklearn.model_selection import train_test_split
|
||||||
|
import sys
|
||||||
|
|
||||||
os.system("kaggle datasets download -d tamber/steam-video-games")
|
# os.system("kaggle datasets download -d tamber/steam-video-games")
|
||||||
os.system("unzip -o steam-video-games.zip")
|
# os.system("unzip -o steam-video-games.zip")
|
||||||
|
|
||||||
steam=pd.read_csv('steam-200k.csv',usecols=[0,1,2,3],names=['userId','game','behavior','hoursPlayed'])
|
epoch = int(sys.argv[1])
|
||||||
|
|
||||||
|
steam=pd.read_csv('data.csv',usecols=[0,1,2,3],names=['userId','game','behavior','hoursPlayed'])
|
||||||
steam.isnull().values.any()
|
steam.isnull().values.any()
|
||||||
steam['userId'] = steam.userId.astype(str)
|
steam['userId'] = steam.userId.astype(str)
|
||||||
purchaseCount = steam[steam["behavior"] != "play"]["game"].value_counts()
|
purchaseCount = steam[steam["behavior"] != "play"]["game"].value_counts()
|
||||||
@ -71,17 +74,24 @@ y_train = steam_train['game']
|
|||||||
x_test = steam_test[['hoursPlayed','purchaseCount','playCount','playerPlayCount','playerPurchaseCount']]
|
x_test = steam_test[['hoursPlayed','purchaseCount','playCount','playerPlayCount','playerPurchaseCount']]
|
||||||
y_test = steam_test['game']
|
y_test = steam_test['game']
|
||||||
|
|
||||||
|
|
||||||
x_train = np.array(x_train)
|
x_train = np.array(x_train)
|
||||||
y_train = np.array(y_train)
|
y_train = np.array(y_train)
|
||||||
x_test = np.array(x_test)
|
x_test = np.array(x_test)
|
||||||
y_test = np.array(y_test)
|
y_test = np.array(y_test)
|
||||||
|
|
||||||
|
with open('xtest.csv','w',encoding='UTF-8',newline='') as xtest:
|
||||||
|
writer = csv.writer(xtest)
|
||||||
|
for i in x_test:
|
||||||
|
writer.writerow(i)
|
||||||
|
|
||||||
for i,j in enumerate(y_train):
|
for i,j in enumerate(y_train):
|
||||||
y_train[i] = games[j]
|
y_train[i] = games[j]
|
||||||
|
|
||||||
for i,j in enumerate(y_test):
|
for i,j in enumerate(y_test):
|
||||||
y_test[i] = games[j]
|
y_test[i] = games[j]
|
||||||
|
|
||||||
|
np.savetxt("ytest.csv",y_test,delimiter=",",fmt='%d')
|
||||||
|
|
||||||
|
|
||||||
model = tf.keras.models.Sequential([
|
model = tf.keras.models.Sequential([
|
||||||
@ -102,7 +112,7 @@ y_test = np.array(y_test).astype(np.float32)
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
model.fit(x_train, y_train, epochs=100)
|
model.fit(x_train, y_train, epochs=epoch)
|
||||||
model.evaluate(x_test, y_test)
|
model.evaluate(x_test, y_test)
|
||||||
prediction = model.predict(x_test)
|
prediction = model.predict(x_test)
|
||||||
classes_x=np.argmax(prediction,axis=1)
|
classes_x=np.argmax(prediction,axis=1)
|
||||||
@ -118,4 +128,4 @@ with open('results.csv','w',encoding='UTF-8',newline='') as f:
|
|||||||
for row in rows:
|
for row in rows:
|
||||||
writer.writerow(row)
|
writer.writerow(row)
|
||||||
|
|
||||||
|
model.save('./model')
|
||||||
|
23
data.txt
23
data.txt
@ -1,23 +0,0 @@
|
|||||||
151603712,"The Elder Scrolls V Skyrim",purchase,1.0,0
|
|
||||||
151603712,"The Elder Scrolls V Skyrim",play,273.0,0
|
|
||||||
151603712,"Fallout 4",purchase,1.0,0
|
|
||||||
151603712,"Fallout 4",play,87.0,0
|
|
||||||
151603712,"Spore",purchase,1.0,0
|
|
||||||
151603712,"Spore",play,14.9,0
|
|
||||||
151603712,"Fallout New Vegas",purchase,1.0,0
|
|
||||||
151603712,"Fallout New Vegas",play,12.1,0
|
|
||||||
151603712,"Left 4 Dead 2",purchase,1.0,0
|
|
||||||
151603712,"Left 4 Dead 2",play,8.9,0
|
|
||||||
151603712,"HuniePop",purchase,1.0,0
|
|
||||||
151603712,"HuniePop",play,8.5,0
|
|
||||||
151603712,"Path of Exile",purchase,1.0,0
|
|
||||||
151603712,"Path of Exile",play,8.1,0
|
|
||||||
151603712,"Poly Bridge",purchase,1.0,0
|
|
||||||
151603712,"Poly Bridge",play,7.5,0
|
|
||||||
151603712,"Left 4 Dead",purchase,1.0,0
|
|
||||||
151603712,"Left 4 Dead",play,3.3,0
|
|
||||||
151603712,"Team Fortress 2",purchase,1.0,0
|
|
||||||
151603712,"Team Fortress 2",play,2.8,0
|
|
||||||
151603712,"Tomb Raider",purchase,1.0,0
|
|
||||||
151603712,"Tomb Raider",play,2.5,0
|
|
||||||
151603712,"The Banner Saga",purchase,1.0,0
|
|
@ -1,6 +0,0 @@
|
|||||||
|
|
||||||
kaggle datasets download -d tamber/steam-video-games
|
|
||||||
unzip -o steam-video-games.zip
|
|
||||||
> data.csv
|
|
||||||
head -n $CUTOFF steam-200k.csv >> data.csv
|
|
||||||
|
|
@ -1 +0,0 @@
|
|||||||
wc -l data.csv >> number_of_lines.txt
|
|
415
environment.yml
415
environment.yml
@ -1,415 +0,0 @@
|
|||||||
name: null
|
|
||||||
channels:
|
|
||||||
- defaults
|
|
||||||
dependencies:
|
|
||||||
- _anaconda_depends=2021.11=py39_0
|
|
||||||
- alabaster=0.7.12=pyhd3eb1b0_0
|
|
||||||
- anaconda=custom=py39_1
|
|
||||||
- anaconda-client=1.9.0=py39haa95532_0
|
|
||||||
- anaconda-project=0.10.2=pyhd3eb1b0_0
|
|
||||||
- anyio=3.5.0=py39haa95532_0
|
|
||||||
- appdirs=1.4.4=pyhd3eb1b0_0
|
|
||||||
- argh=0.26.2=py39haa95532_0
|
|
||||||
- argon2-cffi=21.3.0=pyhd3eb1b0_0
|
|
||||||
- argon2-cffi-bindings=21.2.0=py39h2bbff1b_0
|
|
||||||
- arrow=1.2.2=pyhd3eb1b0_0
|
|
||||||
- asn1crypto=1.5.1=py39haa95532_0
|
|
||||||
- astroid=2.6.6=py39haa95532_0
|
|
||||||
- astropy=5.0.4=py39h080aedc_0
|
|
||||||
- asttokens=2.0.5=pyhd3eb1b0_0
|
|
||||||
- async_generator=1.10=pyhd3eb1b0_0
|
|
||||||
- atomicwrites=1.4.0=py_0
|
|
||||||
- attrs=21.4.0=pyhd3eb1b0_0
|
|
||||||
- autopep8=1.6.0=pyhd3eb1b0_0
|
|
||||||
- babel=2.9.1=pyhd3eb1b0_0
|
|
||||||
- backcall=0.2.0=pyhd3eb1b0_0
|
|
||||||
- backports=1.1=pyhd3eb1b0_0
|
|
||||||
- backports.shutil_get_terminal_size=1.0.0=pyhd3eb1b0_3
|
|
||||||
- bcrypt=3.2.0=py39h196d8e1_0
|
|
||||||
- beautifulsoup4=4.11.1=py39haa95532_0
|
|
||||||
- binaryornot=0.4.4=pyhd3eb1b0_1
|
|
||||||
- bitarray=2.5.0=py39h2bbff1b_0
|
|
||||||
- bkcharts=0.2=py39haa95532_0
|
|
||||||
- black=19.10b0=py_0
|
|
||||||
- blas=1.0=mkl
|
|
||||||
- bleach=4.1.0=pyhd3eb1b0_0
|
|
||||||
- blosc=1.21.0=h19a0ad4_0
|
|
||||||
- bokeh=2.4.2=py39haa95532_1
|
|
||||||
- boto=2.49.0=py39haa95532_0
|
|
||||||
- bottleneck=1.3.4=py39h080aedc_0
|
|
||||||
- brotli=1.0.9=ha925a31_2
|
|
||||||
- brotlipy=0.7.0=py39h2bbff1b_1003
|
|
||||||
- bzip2=1.0.8=he774522_0
|
|
||||||
- ca-certificates=2022.4.26=haa95532_0
|
|
||||||
- cached-property=1.5.2=py_0
|
|
||||||
- certifi=2022.5.18.1=py39haa95532_0
|
|
||||||
- cffi=1.15.0=py39h2bbff1b_1
|
|
||||||
- cfitsio=3.470=he774522_6
|
|
||||||
- chardet=4.0.0=py39haa95532_1003
|
|
||||||
- charls=2.2.0=h6c2663c_0
|
|
||||||
- charset-normalizer=2.0.4=pyhd3eb1b0_0
|
|
||||||
- click=8.0.4=py39haa95532_0
|
|
||||||
- cloudpickle=2.0.0=pyhd3eb1b0_0
|
|
||||||
- clyent=1.2.2=py39haa95532_1
|
|
||||||
- colorama=0.4.4=pyhd3eb1b0_0
|
|
||||||
- comtypes=1.1.10=py39haa95532_1002
|
|
||||||
- conda=4.12.0=py39haa95532_0
|
|
||||||
- conda-content-trust=0.1.1=pyhd3eb1b0_0
|
|
||||||
- conda-pack=0.6.0=pyhd3eb1b0_0
|
|
||||||
- conda-package-handling=1.8.1=py39h8cc25b3_0
|
|
||||||
- conda-token=0.3.0=pyhd3eb1b0_0
|
|
||||||
- console_shortcut=0.1.1=4
|
|
||||||
- contextlib2=0.6.0.post1=pyhd3eb1b0_0
|
|
||||||
- cookiecutter=1.7.3=pyhd3eb1b0_0
|
|
||||||
- cryptography=37.0.1=py39h21b164f_0
|
|
||||||
- curl=7.82.0=h2bbff1b_0
|
|
||||||
- cycler=0.11.0=pyhd3eb1b0_0
|
|
||||||
- cython=0.29.28=py39hd77b12b_0
|
|
||||||
- cytoolz=0.11.0=py39h2bbff1b_0
|
|
||||||
- daal4py=2021.5.0=py39h8cb3d55_0
|
|
||||||
- dal=2021.5.0=haa95532_796
|
|
||||||
- dask=2022.2.1=pyhd3eb1b0_0
|
|
||||||
- dask-core=2022.2.1=pyhd3eb1b0_0
|
|
||||||
- dataclasses=0.8=pyh6d0b6a4_7
|
|
||||||
- debugpy=1.5.1=py39hd77b12b_0
|
|
||||||
- decorator=5.1.1=pyhd3eb1b0_0
|
|
||||||
- defusedxml=0.7.1=pyhd3eb1b0_0
|
|
||||||
- diff-match-patch=20200713=pyhd3eb1b0_0
|
|
||||||
- distributed=2022.2.1=pyhd3eb1b0_0
|
|
||||||
- docutils=0.17.1=py39haa95532_1
|
|
||||||
- entrypoints=0.4=py39haa95532_0
|
|
||||||
- et_xmlfile=1.1.0=py39haa95532_0
|
|
||||||
- executing=0.8.3=pyhd3eb1b0_0
|
|
||||||
- fastcache=1.1.0=py39h196d8e1_0
|
|
||||||
- filelock=3.6.0=pyhd3eb1b0_0
|
|
||||||
- flake8=3.9.2=pyhd3eb1b0_0
|
|
||||||
- flask=2.0.3=pyhd3eb1b0_0
|
|
||||||
- fonttools=4.25.0=pyhd3eb1b0_0
|
|
||||||
- freetype=2.10.4=hd328e21_0
|
|
||||||
- fsspec=2022.3.0=py39haa95532_0
|
|
||||||
- get_terminal_size=1.0.0=h38e98db_0
|
|
||||||
- gevent=21.8.0=py39h2bbff1b_1
|
|
||||||
- giflib=5.2.1=h62dcd97_0
|
|
||||||
- glob2=0.7=pyhd3eb1b0_0
|
|
||||||
- greenlet=1.1.1=py39hd77b12b_0
|
|
||||||
- h5py=3.6.0=py39h3de5c98_0
|
|
||||||
- hdf5=1.10.6=h7ebc959_0
|
|
||||||
- heapdict=1.0.1=pyhd3eb1b0_0
|
|
||||||
- html5lib=1.1=pyhd3eb1b0_0
|
|
||||||
- icc_rt=2019.0.0=h0cc432a_1
|
|
||||||
- icu=58.2=ha925a31_3
|
|
||||||
- idna=3.3=pyhd3eb1b0_0
|
|
||||||
- imagecodecs=2021.8.26=py39ha1f97ea_0
|
|
||||||
- imageio=2.9.0=pyhd3eb1b0_0
|
|
||||||
- imagesize=1.3.0=pyhd3eb1b0_0
|
|
||||||
- importlib-metadata=4.11.3=py39haa95532_0
|
|
||||||
- importlib_metadata=4.11.3=hd3eb1b0_0
|
|
||||||
- inflection=0.5.1=py39haa95532_0
|
|
||||||
- iniconfig=1.1.1=pyhd3eb1b0_0
|
|
||||||
- intel-openmp=2021.4.0=haa95532_3556
|
|
||||||
- intervaltree=3.1.0=pyhd3eb1b0_0
|
|
||||||
- ipykernel=6.9.1=py39haa95532_0
|
|
||||||
- ipython=8.3.0=py39haa95532_0
|
|
||||||
- ipython_genutils=0.2.0=pyhd3eb1b0_1
|
|
||||||
- ipywidgets=7.6.5=pyhd3eb1b0_1
|
|
||||||
- isort=5.9.3=pyhd3eb1b0_0
|
|
||||||
- itsdangerous=2.0.1=pyhd3eb1b0_0
|
|
||||||
- jdcal=1.4.1=pyhd3eb1b0_0
|
|
||||||
- jedi=0.18.1=py39haa95532_1
|
|
||||||
- jinja2=3.0.3=pyhd3eb1b0_0
|
|
||||||
- jinja2-time=0.2.0=pyhd3eb1b0_3
|
|
||||||
- joblib=1.1.0=pyhd3eb1b0_0
|
|
||||||
- jpeg=9e=h2bbff1b_0
|
|
||||||
- json5=0.9.6=pyhd3eb1b0_0
|
|
||||||
- jsonschema=4.4.0=py39haa95532_0
|
|
||||||
- jupyter=1.0.0=py39haa95532_7
|
|
||||||
- jupyter_client=6.1.12=pyhd3eb1b0_0
|
|
||||||
- jupyter_console=6.4.0=pyhd3eb1b0_0
|
|
||||||
- jupyter_core=4.10.0=py39haa95532_0
|
|
||||||
- jupyter_server=1.13.5=pyhd3eb1b0_0
|
|
||||||
- jupyterlab=3.3.2=pyhd3eb1b0_0
|
|
||||||
- jupyterlab_pygments=0.1.2=py_0
|
|
||||||
- jupyterlab_server=2.12.0=py39haa95532_0
|
|
||||||
- jupyterlab_widgets=1.0.0=pyhd3eb1b0_1
|
|
||||||
- keyring=23.4.0=py39haa95532_0
|
|
||||||
- kiwisolver=1.3.2=py39hd77b12b_0
|
|
||||||
- krb5=1.19.2=h5b6d351_0
|
|
||||||
- lazy-object-proxy=1.6.0=py39h2bbff1b_0
|
|
||||||
- lcms2=2.12=h83e58a3_0
|
|
||||||
- lerc=3.0=hd77b12b_0
|
|
||||||
- libaec=1.0.4=h33f27b4_1
|
|
||||||
- libarchive=3.4.2=h5e25573_0
|
|
||||||
- libcurl=7.82.0=h86230a5_0
|
|
||||||
- libdeflate=1.8=h2bbff1b_5
|
|
||||||
- libiconv=1.16=h2bbff1b_2
|
|
||||||
- liblief=0.11.5=hd77b12b_1
|
|
||||||
- libpng=1.6.37=h2a8f88b_0
|
|
||||||
- libspatialindex=1.9.3=h6c2663c_0
|
|
||||||
- libssh2=1.10.0=hcd4344a_0
|
|
||||||
- libtiff=4.2.0=hd0e1b90_0
|
|
||||||
- libwebp=1.2.2=h2bbff1b_0
|
|
||||||
- libxml2=2.9.12=h0ad7f3c_2
|
|
||||||
- libxslt=1.1.34=he774522_0
|
|
||||||
- libzopfli=1.0.3=ha925a31_0
|
|
||||||
- llvmlite=0.38.0=py39h23ce68f_0
|
|
||||||
- locket=1.0.0=py39haa95532_0
|
|
||||||
- lxml=4.8.0=py39h1985fb9_0
|
|
||||||
- lz4-c=1.9.3=h2bbff1b_1
|
|
||||||
- lzo=2.10=he774522_2
|
|
||||||
- m2w64-gcc-libgfortran=5.3.0=6
|
|
||||||
- m2w64-gcc-libs=5.3.0=7
|
|
||||||
- m2w64-gcc-libs-core=5.3.0=7
|
|
||||||
- m2w64-gmp=6.1.0=2
|
|
||||||
- m2w64-libwinpthread-git=5.0.0.4634.697f757=2
|
|
||||||
- markupsafe=2.0.1=py39h2bbff1b_0
|
|
||||||
- matplotlib=3.5.1=py39haa95532_1
|
|
||||||
- matplotlib-base=3.5.1=py39hd77b12b_1
|
|
||||||
- matplotlib-inline=0.1.2=pyhd3eb1b0_2
|
|
||||||
- mccabe=0.6.1=py39haa95532_1
|
|
||||||
- menuinst=1.4.18=py39h59b6b97_0
|
|
||||||
- mistune=0.8.4=py39h2bbff1b_1000
|
|
||||||
- mkl=2021.4.0=haa95532_640
|
|
||||||
- mkl-service=2.4.0=py39h2bbff1b_0
|
|
||||||
- mkl_fft=1.3.1=py39h277e83a_0
|
|
||||||
- mkl_random=1.2.2=py39hf11a4ad_0
|
|
||||||
- mock=4.0.3=pyhd3eb1b0_0
|
|
||||||
- more-itertools=8.12.0=pyhd3eb1b0_0
|
|
||||||
- mpmath=1.2.1=py39haa95532_0
|
|
||||||
- msgpack-python=1.0.3=py39h59b6b97_0
|
|
||||||
- msys2-conda-epoch=20160418=1
|
|
||||||
- multipledispatch=0.6.0=py39haa95532_0
|
|
||||||
- munkres=1.1.4=py_0
|
|
||||||
- mypy_extensions=0.4.3=py39haa95532_1
|
|
||||||
- nbclassic=0.3.5=pyhd3eb1b0_0
|
|
||||||
- nbclient=0.5.13=py39haa95532_0
|
|
||||||
- nbconvert=6.4.4=py39haa95532_0
|
|
||||||
- nbformat=5.3.0=py39haa95532_0
|
|
||||||
- nest-asyncio=1.5.5=py39haa95532_0
|
|
||||||
- networkx=2.7.1=pyhd3eb1b0_0
|
|
||||||
- nltk=3.7=pyhd3eb1b0_0
|
|
||||||
- nose=1.3.7=pyhd3eb1b0_1008
|
|
||||||
- notebook=6.4.11=py39haa95532_0
|
|
||||||
- numba=0.55.1=py39hf11a4ad_0
|
|
||||||
- numexpr=2.8.1=py39hb80d3ca_0
|
|
||||||
- numpy=1.21.5=py39h7a0a035_2
|
|
||||||
- numpy-base=1.21.5=py39hca35cd5_2
|
|
||||||
- numpydoc=1.2=pyhd3eb1b0_0
|
|
||||||
- olefile=0.46=pyhd3eb1b0_0
|
|
||||||
- openjpeg=2.4.0=h4fc8c34_0
|
|
||||||
- openpyxl=3.0.9=pyhd3eb1b0_0
|
|
||||||
- openssl=1.1.1o=h2bbff1b_0
|
|
||||||
- packaging=21.3=pyhd3eb1b0_0
|
|
||||||
- pandas=1.4.2=py39hd77b12b_0
|
|
||||||
- pandocfilters=1.5.0=pyhd3eb1b0_0
|
|
||||||
- paramiko=2.8.1=pyhd3eb1b0_0
|
|
||||||
- parso=0.8.3=pyhd3eb1b0_0
|
|
||||||
- partd=1.2.0=pyhd3eb1b0_1
|
|
||||||
- path=16.2.0=pyhd3eb1b0_0
|
|
||||||
- path.py=12.5.0=hd3eb1b0_0
|
|
||||||
- pathlib2=2.3.6=py39haa95532_2
|
|
||||||
- pathspec=0.7.0=py_0
|
|
||||||
- patsy=0.5.2=py39haa95532_1
|
|
||||||
- pep8=1.7.1=py39haa95532_0
|
|
||||||
- pexpect=4.8.0=pyhd3eb1b0_3
|
|
||||||
- pickleshare=0.7.5=pyhd3eb1b0_1003
|
|
||||||
- pillow=9.0.1=py39hdc2b20a_0
|
|
||||||
- pip=21.2.4=py39haa95532_0
|
|
||||||
- pkginfo=1.8.2=pyhd3eb1b0_0
|
|
||||||
- pluggy=1.0.0=py39haa95532_1
|
|
||||||
- ply=3.11=py39haa95532_0
|
|
||||||
- powershell_shortcut=0.0.1=3
|
|
||||||
- poyo=0.5.0=pyhd3eb1b0_0
|
|
||||||
- prometheus_client=0.13.1=pyhd3eb1b0_0
|
|
||||||
- prompt-toolkit=3.0.20=pyhd3eb1b0_0
|
|
||||||
- prompt_toolkit=3.0.20=hd3eb1b0_0
|
|
||||||
- psutil=5.8.0=py39h2bbff1b_1
|
|
||||||
- ptyprocess=0.7.0=pyhd3eb1b0_2
|
|
||||||
- pure_eval=0.2.2=pyhd3eb1b0_0
|
|
||||||
- py=1.11.0=pyhd3eb1b0_0
|
|
||||||
- py-lief=0.11.5=py39hd77b12b_1
|
|
||||||
- pycodestyle=2.7.0=pyhd3eb1b0_0
|
|
||||||
- pycosat=0.6.3=py39h2bbff1b_0
|
|
||||||
- pycparser=2.21=pyhd3eb1b0_0
|
|
||||||
- pycurl=7.44.1=py39hcd4344a_1
|
|
||||||
- pydocstyle=6.1.1=pyhd3eb1b0_0
|
|
||||||
- pyerfa=2.0.0=py39h2bbff1b_0
|
|
||||||
- pyflakes=2.3.1=pyhd3eb1b0_0
|
|
||||||
- pygments=2.11.2=pyhd3eb1b0_0
|
|
||||||
- pylint=2.9.6=py39haa95532_1
|
|
||||||
- pyls-spyder=0.4.0=pyhd3eb1b0_0
|
|
||||||
- pynacl=1.4.0=py39hbd8134f_1
|
|
||||||
- pyodbc=4.0.32=py39hd77b12b_1
|
|
||||||
- pyopenssl=22.0.0=pyhd3eb1b0_0
|
|
||||||
- pyparsing=3.0.4=pyhd3eb1b0_0
|
|
||||||
- pyqt=5.9.2=py39hd77b12b_6
|
|
||||||
- pyreadline=2.1=py39haa95532_1
|
|
||||||
- pyrsistent=0.18.0=py39h196d8e1_0
|
|
||||||
- pysocks=1.7.1=py39haa95532_0
|
|
||||||
- pytables=3.6.1=py39h56d22b6_1
|
|
||||||
- pytest=7.1.1=py39haa95532_0
|
|
||||||
- python=3.9.2=h6244533_0
|
|
||||||
- python-dateutil=2.8.2=pyhd3eb1b0_0
|
|
||||||
- python-fastjsonschema=2.15.1=pyhd3eb1b0_0
|
|
||||||
- python-libarchive-c=2.9=pyhd3eb1b0_1
|
|
||||||
- python-lsp-black=1.0.0=pyhd3eb1b0_0
|
|
||||||
- python-lsp-jsonrpc=1.0.0=pyhd3eb1b0_0
|
|
||||||
- python-lsp-server=1.2.4=pyhd3eb1b0_0
|
|
||||||
- python-slugify=5.0.2=pyhd3eb1b0_0
|
|
||||||
- pytz=2021.3=pyhd3eb1b0_0
|
|
||||||
- pywavelets=1.3.0=py39h2bbff1b_0
|
|
||||||
- pywin32-ctypes=0.2.0=py39haa95532_1000
|
|
||||||
- pywinpty=2.0.2=py39h5da7b33_0
|
|
||||||
- pyyaml=6.0=py39h2bbff1b_1
|
|
||||||
- pyzmq=22.3.0=py39hd77b12b_2
|
|
||||||
- qdarkstyle=3.0.2=pyhd3eb1b0_0
|
|
||||||
- qstylizer=0.1.10=pyhd3eb1b0_0
|
|
||||||
- qt=5.9.7=vc14h73c81de_0
|
|
||||||
- qtawesome=1.0.3=pyhd3eb1b0_0
|
|
||||||
- qtconsole=5.3.0=pyhd3eb1b0_0
|
|
||||||
- qtpy=2.0.1=pyhd3eb1b0_0
|
|
||||||
- regex=2022.3.15=py39h2bbff1b_0
|
|
||||||
- requests=2.27.1=pyhd3eb1b0_0
|
|
||||||
- rope=0.22.0=pyhd3eb1b0_0
|
|
||||||
- rtree=0.9.7=py39h2eaa2aa_1
|
|
||||||
- ruamel_yaml=0.15.100=py39h2bbff1b_0
|
|
||||||
- scikit-image=0.19.2=py39hf11a4ad_0
|
|
||||||
- scikit-learn=1.0.2=py39hf11a4ad_1
|
|
||||||
- scikit-learn-intelex=2021.5.0=py39haa95532_0
|
|
||||||
- scipy=1.7.3=py39h0a974cb_0
|
|
||||||
- seaborn=0.11.2=pyhd3eb1b0_0
|
|
||||||
- send2trash=1.8.0=pyhd3eb1b0_1
|
|
||||||
- setuptools=61.2.0=py39haa95532_0
|
|
||||||
- simplegeneric=0.8.1=py39haa95532_2
|
|
||||||
- singledispatch=3.7.0=pyhd3eb1b0_1001
|
|
||||||
- sip=4.19.13=py39hd77b12b_0
|
|
||||||
- six=1.16.0=pyhd3eb1b0_1
|
|
||||||
- snappy=1.1.9=h6c2663c_0
|
|
||||||
- sniffio=1.2.0=py39haa95532_1
|
|
||||||
- snowballstemmer=2.2.0=pyhd3eb1b0_0
|
|
||||||
- sortedcollections=2.1.0=pyhd3eb1b0_0
|
|
||||||
- sortedcontainers=2.4.0=pyhd3eb1b0_0
|
|
||||||
- soupsieve=2.3.1=pyhd3eb1b0_0
|
|
||||||
- sphinx=4.4.0=pyhd3eb1b0_0
|
|
||||||
- sphinxcontrib=1.0=py39haa95532_1
|
|
||||||
- sphinxcontrib-applehelp=1.0.2=pyhd3eb1b0_0
|
|
||||||
- sphinxcontrib-devhelp=1.0.2=pyhd3eb1b0_0
|
|
||||||
- sphinxcontrib-htmlhelp=2.0.0=pyhd3eb1b0_0
|
|
||||||
- sphinxcontrib-jsmath=1.0.1=pyhd3eb1b0_0
|
|
||||||
- sphinxcontrib-qthelp=1.0.3=pyhd3eb1b0_0
|
|
||||||
- sphinxcontrib-serializinghtml=1.1.5=pyhd3eb1b0_0
|
|
||||||
- sphinxcontrib-websupport=1.2.4=py_0
|
|
||||||
- spyder=5.1.5=py39haa95532_1
|
|
||||||
- spyder-kernels=2.1.3=py39haa95532_0
|
|
||||||
- sqlalchemy=1.4.32=py39h2bbff1b_0
|
|
||||||
- sqlite=3.38.3=h2bbff1b_0
|
|
||||||
- stack_data=0.2.0=pyhd3eb1b0_0
|
|
||||||
- statsmodels=0.13.2=py39h2bbff1b_0
|
|
||||||
- sympy=1.10.1=py39haa95532_0
|
|
||||||
- tbb=2021.5.0=h59b6b97_0
|
|
||||||
- tbb4py=2021.5.0=py39h59b6b97_0
|
|
||||||
- tblib=1.7.0=pyhd3eb1b0_0
|
|
||||||
- terminado=0.13.1=py39haa95532_0
|
|
||||||
- testpath=0.5.0=pyhd3eb1b0_0
|
|
||||||
- text-unidecode=1.3=pyhd3eb1b0_0
|
|
||||||
- textdistance=4.2.1=pyhd3eb1b0_0
|
|
||||||
- threadpoolctl=2.2.0=pyh0d69192_0
|
|
||||||
- three-merge=0.1.1=pyhd3eb1b0_0
|
|
||||||
- tifffile=2021.7.2=pyhd3eb1b0_2
|
|
||||||
- tinycss=0.4=pyhd3eb1b0_1002
|
|
||||||
- tk=8.6.11=h2bbff1b_1
|
|
||||||
- toml=0.10.2=pyhd3eb1b0_0
|
|
||||||
- tomli=1.2.2=pyhd3eb1b0_0
|
|
||||||
- toolz=0.11.2=pyhd3eb1b0_0
|
|
||||||
- tornado=6.1=py39h2bbff1b_0
|
|
||||||
- tqdm=4.64.0=py39haa95532_0
|
|
||||||
- traitlets=5.1.1=pyhd3eb1b0_0
|
|
||||||
- typed-ast=1.4.3=py39h2bbff1b_1
|
|
||||||
- typing-extensions=4.1.1=hd3eb1b0_0
|
|
||||||
- typing_extensions=4.1.1=pyh06a4308_0
|
|
||||||
- tzdata=2022a=hda174b7_0
|
|
||||||
- ujson=5.1.0=py39hd77b12b_0
|
|
||||||
- unicodecsv=0.14.1=py39haa95532_0
|
|
||||||
- unidecode=1.2.0=pyhd3eb1b0_0
|
|
||||||
- urllib3=1.26.9=py39haa95532_0
|
|
||||||
- vc=14.2=h21ff451_1
|
|
||||||
- vs2015_runtime=14.27.29016=h5e58377_2
|
|
||||||
- watchdog=2.1.6=py39haa95532_0
|
|
||||||
- wcwidth=0.2.5=pyhd3eb1b0_0
|
|
||||||
- webencodings=0.5.1=py39haa95532_1
|
|
||||||
- websocket-client=0.58.0=py39haa95532_4
|
|
||||||
- werkzeug=2.0.3=pyhd3eb1b0_0
|
|
||||||
- wheel=0.37.1=pyhd3eb1b0_0
|
|
||||||
- whichcraft=0.6.1=pyhd3eb1b0_0
|
|
||||||
- widgetsnbextension=3.5.2=py39haa95532_0
|
|
||||||
- win_inet_pton=1.1.0=py39haa95532_0
|
|
||||||
- win_unicode_console=0.5=py39haa95532_0
|
|
||||||
- wincertstore=0.2=py39haa95532_2
|
|
||||||
- winpty=0.4.3=4
|
|
||||||
- wrapt=1.12.1=py39h196d8e1_1
|
|
||||||
- xlrd=2.0.1=pyhd3eb1b0_0
|
|
||||||
- xlsxwriter=3.0.3=pyhd3eb1b0_0
|
|
||||||
- xlwings=0.24.9=py39haa95532_0
|
|
||||||
- xlwt=1.3.0=py39haa95532_0
|
|
||||||
- xz=5.2.5=h8cc25b3_1
|
|
||||||
- yaml=0.2.5=he774522_0
|
|
||||||
- yapf=0.31.0=pyhd3eb1b0_0
|
|
||||||
- zfp=0.5.5=hd77b12b_6
|
|
||||||
- zict=2.0.0=pyhd3eb1b0_0
|
|
||||||
- zipp=3.8.0=py39haa95532_0
|
|
||||||
- zlib=1.2.12=h8cc25b3_2
|
|
||||||
- zope=1.0=py39haa95532_1
|
|
||||||
- zope.event=4.5.0=py39haa95532_0
|
|
||||||
- zope.interface=5.4.0=py39h2bbff1b_0
|
|
||||||
- zstd=1.4.9=h19a0ad4_0
|
|
||||||
- pip:
|
|
||||||
- absl-py==1.0.0
|
|
||||||
- alembic==1.7.7
|
|
||||||
- astunparse==1.6.3
|
|
||||||
- cachetools==5.1.0
|
|
||||||
- databricks-cli==0.16.6
|
|
||||||
- docker==5.0.3
|
|
||||||
- docopt==0.6.2
|
|
||||||
- flatbuffers==1.12
|
|
||||||
- gast==0.4.0
|
|
||||||
- gitdb==4.0.9
|
|
||||||
- gitpython==3.1.27
|
|
||||||
- google-auth==2.6.6
|
|
||||||
- google-auth-oauthlib==0.4.6
|
|
||||||
- google-pasta==0.2.0
|
|
||||||
- grpcio==1.46.3
|
|
||||||
- jsonpickle==1.5.2
|
|
||||||
- kaggle==1.5.12
|
|
||||||
- keras==2.9.0
|
|
||||||
- keras-preprocessing==1.1.2
|
|
||||||
- libclang==14.0.1
|
|
||||||
- mako==1.2.0
|
|
||||||
- markdown==3.3.7
|
|
||||||
- mlflow==1.26.0
|
|
||||||
- munch==2.5.0
|
|
||||||
- oauthlib==3.2.0
|
|
||||||
- opt-einsum==3.3.0
|
|
||||||
- prometheus-flask-exporter==0.20.1
|
|
||||||
- protobuf==3.20.1
|
|
||||||
- py-cpuinfo==8.0.0
|
|
||||||
- pyasn1==0.4.8
|
|
||||||
- pyasn1-modules==0.2.8
|
|
||||||
- pyjwt==2.4.0
|
|
||||||
- pymongo==4.1.1
|
|
||||||
- pywin32==227
|
|
||||||
- querystring-parser==1.2.4
|
|
||||||
- requests-oauthlib==1.3.1
|
|
||||||
- rsa==4.8
|
|
||||||
- sacred==0.8.2
|
|
||||||
- sklearn==0.0
|
|
||||||
- smmap==5.0.0
|
|
||||||
- sqlparse==0.4.2
|
|
||||||
- tabulate==0.8.9
|
|
||||||
- tensorboard==2.9.0
|
|
||||||
- tensorboard-data-server==0.6.1
|
|
||||||
- tensorboard-plugin-wit==1.8.1
|
|
||||||
- tensorflow==2.9.0
|
|
||||||
- tensorflow-estimator==2.9.0
|
|
||||||
- tensorflow-io-gcs-filesystem==0.26.0
|
|
||||||
- termcolor==1.1.0
|
|
||||||
- waitress==2.1.1
|
|
||||||
prefix: D:\anaconda\envs\s444386
|
|
27
evaluation.py
Normal file
27
evaluation.py
Normal file
@ -0,0 +1,27 @@
|
|||||||
|
import tensorflow as tf
|
||||||
|
import os
|
||||||
|
import pandas as pd
|
||||||
|
import numpy as np
|
||||||
|
import csv
|
||||||
|
from sklearn.model_selection import train_test_split
|
||||||
|
from matplotlib import pyplot as plt
|
||||||
|
|
||||||
|
x_test = pd.read_csv('xtest.csv')
|
||||||
|
y_test = pd.read_csv('ytest.csv')
|
||||||
|
|
||||||
|
model = tf.keras.models.load_model('./model')
|
||||||
|
|
||||||
|
res = model.evaluate(x_test, y_test,verbose=0)
|
||||||
|
|
||||||
|
with open('evaluation_acuraccy.txt', 'a+') as f:
|
||||||
|
f.write(str(res[1])+'\n')
|
||||||
|
with open('evaluation_acuraccy.txt') as f:
|
||||||
|
scores = [float(line) for line in f if line]
|
||||||
|
print(scores)
|
||||||
|
builds = list(range(1, len(scores) + 1))
|
||||||
|
plot = plt.plot(builds, scores)
|
||||||
|
plt.xlabel('Build')
|
||||||
|
plt.xticks(range(1, len(scores) + 1))
|
||||||
|
plt.ylabel('Accuraccy')
|
||||||
|
plt.show()
|
||||||
|
plt.savefig('accuraccy.png')
|
79
kagle.py
79
kagle.py
@ -1,79 +0,0 @@
|
|||||||
import os
|
|
||||||
import pandas as pd
|
|
||||||
from sklearn.model_selection import train_test_split
|
|
||||||
|
|
||||||
#os.system("kaggle datasets download -d tamber/steam-video-games")
|
|
||||||
#os.system("unzip -o steam-video-games.zip")
|
|
||||||
|
|
||||||
steam=pd.read_csv('data.csv',usecols=[0,1,2,3],names=['userId','game','behavior','hoursPlayed'])
|
|
||||||
steam.isnull().values.any()
|
|
||||||
steam['userId'] = steam.userId.astype(str)
|
|
||||||
|
|
||||||
print("Zbior danych:")
|
|
||||||
print(steam)
|
|
||||||
|
|
||||||
print("Describe:")
|
|
||||||
print(steam.describe(include='all'),"\n\n")
|
|
||||||
|
|
||||||
print("Gracze z najwieksza aktywnoscia:")
|
|
||||||
print(steam["userId"].value_counts(),"\n\n")
|
|
||||||
|
|
||||||
print("Gracze z najwieksza liczba kupionych gier:")
|
|
||||||
print(steam[steam["behavior"] != "play"]["userId"].value_counts())
|
|
||||||
print("Mediana:")
|
|
||||||
print(steam[steam["behavior"] != "play"]["userId"].value_counts().median(),"\n\n")
|
|
||||||
|
|
||||||
print("Gracze ktorzy zagrali w najwieksza liczbe gier:")
|
|
||||||
print(steam[steam["behavior"] != "purchase"]["userId"].value_counts())
|
|
||||||
print("Mediana:")
|
|
||||||
print(steam[steam["behavior"] != "purchase"]["userId"].value_counts().median(),"\n\n")
|
|
||||||
|
|
||||||
|
|
||||||
print("Gry:")
|
|
||||||
print(steam["game"].value_counts(),"\n\n")
|
|
||||||
|
|
||||||
print("Sredni czas grania w grania w dana gre")
|
|
||||||
print(steam[steam["behavior"] != "purchase"].groupby("game").mean().sort_values(by="hoursPlayed",ascending=False))
|
|
||||||
print("Mediana:")
|
|
||||||
print(steam[steam["behavior"] != "purchase"].groupby("game").mean().sort_values(by="hoursPlayed",ascending=False).median(),"\n\n")
|
|
||||||
|
|
||||||
print("Najczesciej kupowana gra")
|
|
||||||
print(steam[steam["behavior"] != "play"]["game"].value_counts())
|
|
||||||
print("Mediana:")
|
|
||||||
print(steam[steam["behavior"] != "play"]["game"].value_counts().median(),"\n\n")
|
|
||||||
|
|
||||||
print("Gra w ktora zagralo najwiecej graczy")
|
|
||||||
print(steam[steam["behavior"] != "purchase"]["game"].value_counts())
|
|
||||||
print("Mediana:")
|
|
||||||
print(steam[steam["behavior"] != "purchase"]["game"].value_counts().median(),"\n\n")
|
|
||||||
|
|
||||||
print("Liczba kupionych gier i liczba gier w ktore gracze zagrali")
|
|
||||||
print(steam["behavior"].value_counts(),"\n\n")
|
|
||||||
|
|
||||||
|
|
||||||
print("Gra z najwieksza liczba godzin dla jednego gracza")
|
|
||||||
print(steam[steam["behavior"] != "purchase"][["userId","hoursPlayed","game"]].sort_values(by="hoursPlayed",ascending=False))
|
|
||||||
print("Mediana:")
|
|
||||||
print(steam[steam["behavior"] != "purchase"]["hoursPlayed"].sort_values(ascending=False).median(),"\n\n")
|
|
||||||
|
|
||||||
print("Suma rozegranych godzin dla danej gry")
|
|
||||||
print(steam[steam["behavior"] != "purchase"].groupby("game").sum().sort_values(by="hoursPlayed",ascending=False))
|
|
||||||
print("Mediana:")
|
|
||||||
print(steam[steam["behavior"] != "purchase"].groupby("game").sum().sort_values(by="hoursPlayed",ascending=False).median(),"\n\n")
|
|
||||||
|
|
||||||
#odrzucenie gier dla których jest mniej niż 10 wierszy
|
|
||||||
steam = steam.groupby("game").filter(lambda x: len(x)>10)
|
|
||||||
#rozmiar zbioru testowego i dev proporcje 8:1:1
|
|
||||||
size=int(len(steam)/10)
|
|
||||||
|
|
||||||
steam_train, steam_test = train_test_split(steam, test_size=size, random_state=1, stratify=steam["game"])
|
|
||||||
steam_train, steam_dev = train_test_split(steam_train, test_size=size, random_state=1, stratify=steam_train["game"])
|
|
||||||
|
|
||||||
print("Zbior trenujacy")
|
|
||||||
print(steam_train["game"].value_counts(),"\n")
|
|
||||||
|
|
||||||
print("Zbior testujacy")
|
|
||||||
print(steam_test["game"].value_counts(),"\n")
|
|
||||||
|
|
||||||
print("Zbior dev")
|
|
||||||
print(steam_dev["game"].value_counts(),"\n")
|
|
@ -1,139 +1,147 @@
|
|||||||
import tensorflow as tf
|
import tensorflow as tf
|
||||||
import os
|
import os
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
import numpy as np
|
import numpy as np
|
||||||
import csv
|
import csv
|
||||||
from sklearn.model_selection import train_test_split
|
from sklearn.model_selection import train_test_split
|
||||||
import sys
|
import sys
|
||||||
import mlflow
|
from sacred.observers import MongoObserver
|
||||||
|
from sacred.observers import FileStorageObserver
|
||||||
# os.system("kaggle datasets download -d tamber/steam-video-games")
|
from sacred import Experiment
|
||||||
# os.system("unzip -o steam-video-games.zip")
|
|
||||||
|
ex = Experiment("444386 sacred_scopes", interactive=True, save_git_info=False)
|
||||||
with mlflow.start_run():
|
ex.observers.append(MongoObserver(url='mongodb://admin:IUM_2021@172.17.0.1:27017',db_name='sacred'))
|
||||||
epoch = int(sys.argv[1])
|
ex.observers.append(FileStorageObserver('training'))
|
||||||
|
epochs = int(sys.argv[1])
|
||||||
steam=pd.read_csv('data.csv',usecols=[0,1,2,3],names=['userId','game','behavior','hoursPlayed'])
|
|
||||||
steam.isnull().values.any()
|
@ex.config
|
||||||
steam['userId'] = steam.userId.astype(str)
|
def my_config():
|
||||||
purchaseCount = steam[steam["behavior"] != "play"]["game"].value_counts()
|
epoch = epochs
|
||||||
playCount = steam[steam["behavior"] != "purchase"]["game"].value_counts()
|
layerDenseRelu = 256
|
||||||
|
layerDropout = 0.01
|
||||||
playerPurchaseCount = steam[steam["behavior"] != "play"]["userId"].value_counts()
|
layerDenseSoftMax = 1000.0
|
||||||
playerPlayCount = steam[steam["behavior"] != "purchase"]["userId"].value_counts()
|
|
||||||
|
#ex.add_config("config.json")
|
||||||
steam = steam[steam['behavior'] != 'purchase']
|
|
||||||
steam = steam.groupby("game").filter(lambda x: len(x)>10)
|
@ex.capture
|
||||||
size=int(len(steam)/10)
|
def prepare_data():
|
||||||
|
steam=pd.read_csv('data.csv',usecols=[0,1,2,3],names=['userId','game','behavior','hoursPlayed'])
|
||||||
meanGame = steam[steam["behavior"] != "purchase"].groupby("game").mean()
|
steam.isnull().values.any()
|
||||||
meanGame = meanGame.to_dict()
|
steam['userId'] = steam.userId.astype(str)
|
||||||
meanGame = meanGame['hoursPlayed']
|
purchaseCount = steam[steam["behavior"] != "play"]["game"].value_counts()
|
||||||
|
playCount = steam[steam["behavior"] != "purchase"]["game"].value_counts()
|
||||||
purchaseCount = purchaseCount.to_dict()
|
|
||||||
playCount = playCount.to_dict()
|
playerPurchaseCount = steam[steam["behavior"] != "play"]["userId"].value_counts()
|
||||||
playerPurchaseCount = playerPurchaseCount.to_dict()
|
playerPlayCount = steam[steam["behavior"] != "purchase"]["userId"].value_counts()
|
||||||
playerPlayCount = playerPlayCount.to_dict()
|
|
||||||
|
steam = steam[steam['behavior'] != 'purchase']
|
||||||
steam['meanTime'] = 0;
|
steam = steam.groupby("game").filter(lambda x: len(x)>10)
|
||||||
steam['purchaseCount'] = 0;
|
size=int(len(steam)/10)
|
||||||
steam['playCount'] = 0;
|
|
||||||
steam['playerPurchaseCount'] =0;
|
meanGame = steam[steam["behavior"] != "purchase"].groupby("game").mean()
|
||||||
steam['playerPlayCount'] =0;
|
meanGame = meanGame.to_dict()
|
||||||
steam['playPercent'] =0;
|
meanGame = meanGame['hoursPlayed']
|
||||||
|
|
||||||
for i in steam.index:
|
purchaseCount = purchaseCount.to_dict()
|
||||||
steam.at[i,'meanTime'] = meanGame[steam.at[i,'game']]
|
playCount = playCount.to_dict()
|
||||||
steam.at[i,'purchaseCount'] = purchaseCount[steam.at[i,'game']]
|
playerPurchaseCount = playerPurchaseCount.to_dict()
|
||||||
steam.at[i,'playCount'] = playCount[steam.at[i,'game']]
|
playerPlayCount = playerPlayCount.to_dict()
|
||||||
steam.at[i,'playerPurchaseCount'] = playerPurchaseCount[steam.at[i,'userId']]
|
|
||||||
steam.at[i,'playerPlayCount'] = playerPlayCount[steam.at[i,'userId']]
|
steam['meanTime'] = 0;
|
||||||
steam.at[i,'playPercent'] = playerPlayCount[steam.at[i,'userId']]/playerPurchaseCount[steam.at[i,'userId']]
|
steam['purchaseCount'] = 0;
|
||||||
|
steam['playCount'] = 0;
|
||||||
|
steam['playerPurchaseCount'] =0;
|
||||||
steam_train, steam_test = train_test_split(steam, test_size=size, random_state=1, stratify=steam["game"])
|
steam['playerPlayCount'] =0;
|
||||||
steam_train, steam_dev = train_test_split(steam_train, test_size=size, random_state=1, stratify=steam_train["game"])
|
steam['playPercent'] =0;
|
||||||
|
|
||||||
print(steam)
|
for i in steam.index:
|
||||||
|
steam.at[i,'meanTime'] = meanGame[steam.at[i,'game']]
|
||||||
games = {}
|
steam.at[i,'purchaseCount'] = purchaseCount[steam.at[i,'game']]
|
||||||
for i in steam['game']:
|
steam.at[i,'playCount'] = playCount[steam.at[i,'game']]
|
||||||
games[i] = 0
|
steam.at[i,'playerPurchaseCount'] = playerPurchaseCount[steam.at[i,'userId']]
|
||||||
|
steam.at[i,'playerPlayCount'] = playerPlayCount[steam.at[i,'userId']]
|
||||||
j=0
|
steam.at[i,'playPercent'] = playerPlayCount[steam.at[i,'userId']]/playerPurchaseCount[steam.at[i,'userId']]
|
||||||
for key,game in games.items():
|
|
||||||
games[key]=j
|
steam_train, steam_test = train_test_split(steam, test_size=size, random_state=1, stratify=steam["game"])
|
||||||
j=j+1
|
steam_train, steam_dev = train_test_split(steam_train, test_size=size, random_state=1, stratify=steam_train["game"])
|
||||||
|
|
||||||
for i in steam['game']:
|
games = {}
|
||||||
i = games[i]
|
for i in steam['game']:
|
||||||
|
games[i] = 0
|
||||||
invGames = {v: k for k, v in games.items()}
|
|
||||||
|
j=0
|
||||||
x_train = steam_train[['hoursPlayed','purchaseCount','playCount','playerPlayCount','playerPurchaseCount']]
|
for key,game in games.items():
|
||||||
y_train = steam_train['game']
|
games[key]=j
|
||||||
|
j=j+1
|
||||||
x_test = steam_test[['hoursPlayed','purchaseCount','playCount','playerPlayCount','playerPurchaseCount']]
|
|
||||||
y_test = steam_test['game']
|
for i in steam['game']:
|
||||||
|
i = games[i]
|
||||||
|
|
||||||
x_train = np.array(x_train)
|
invGames = {v: k for k, v in games.items()}
|
||||||
y_train = np.array(y_train)
|
|
||||||
x_test = np.array(x_test)
|
x_train = steam_train[['hoursPlayed','purchaseCount','playCount','playerPlayCount','playerPurchaseCount']]
|
||||||
y_test = np.array(y_test)
|
y_train = steam_train['game']
|
||||||
|
|
||||||
with open('xtest.csv','w',encoding='UTF-8',newline='') as xtest:
|
x_test = steam_test[['hoursPlayed','purchaseCount','playCount','playerPlayCount','playerPurchaseCount']]
|
||||||
writer = csv.writer(xtest)
|
y_test = steam_test['game']
|
||||||
for i in x_test:
|
|
||||||
writer.writerow(i)
|
|
||||||
|
x_train = np.array(x_train)
|
||||||
for i,j in enumerate(y_train):
|
y_train = np.array(y_train)
|
||||||
y_train[i] = games[j]
|
x_test = np.array(x_test)
|
||||||
|
y_test = np.array(y_test)
|
||||||
for i,j in enumerate(y_test):
|
|
||||||
y_test[i] = games[j]
|
with open('xtest.csv','w',encoding='UTF-8',newline='') as xtest:
|
||||||
|
writer = csv.writer(xtest)
|
||||||
np.savetxt("ytest.csv",y_test,delimiter=",",fmt='%d')
|
for i in x_test:
|
||||||
|
writer.writerow(i)
|
||||||
|
|
||||||
model = tf.keras.models.Sequential([
|
for i,j in enumerate(y_train):
|
||||||
tf.keras.layers.Flatten(input_shape=(5,1)),
|
y_train[i] = games[j]
|
||||||
tf.keras.layers.Dense(256, activation='relu'),
|
|
||||||
tf.keras.layers.Dropout(0.01),
|
for i,j in enumerate(y_test):
|
||||||
tf.keras.layers.Dense(1000, activation='softmax')
|
y_test[i] = games[j]
|
||||||
])
|
y_train = np.array(y_train).astype(np.float32)
|
||||||
mlflow.log_param("layers dense relu",256)
|
y_test = np.array(y_test).astype(np.float32)
|
||||||
mlflow.log_param("layers droout",0.01)
|
np.savetxt("ytest.csv",y_test,delimiter=",",fmt='%d')
|
||||||
mlflow.log_param("layers dense softmax",1000)
|
return x_train, y_train, x_test, y_test, invGames
|
||||||
mlflow.log_param("iterations",epoch)
|
|
||||||
|
@ex.main
|
||||||
|
def my_main(epoch,layerDenseRelu,layerDropout,layerDenseSoftMax,_run):
|
||||||
model.compile(optimizer='adam',
|
x_train, y_train, x_test, y_test, invGames = prepare_data()
|
||||||
loss='sparse_categorical_crossentropy',
|
model = tf.keras.models.Sequential([
|
||||||
metrics=['accuracy'])
|
tf.keras.layers.Flatten(input_shape=(5,1)),
|
||||||
|
tf.keras.layers.Dense(layerDenseRelu, activation='relu'),
|
||||||
y_train = np.array(y_train).astype(np.float32)
|
tf.keras.layers.Dropout(layerDropout),
|
||||||
y_test = np.array(y_test).astype(np.float32)
|
tf.keras.layers.Dense(layerDenseSoftMax, activation='softmax')
|
||||||
|
])
|
||||||
|
|
||||||
|
model.compile(optimizer='adam',
|
||||||
model.fit(x_train, y_train, epochs=epoch)
|
loss='sparse_categorical_crossentropy',
|
||||||
eval = model.evaluate(x_test, y_test)
|
metrics=['accuracy'])
|
||||||
|
|
||||||
mlflow.log_metric("accuraccy", eval[1])
|
|
||||||
|
model.fit(x_train, y_train, epochs=epoch)
|
||||||
prediction = model.predict(x_test)
|
evaluation = model.evaluate(x_test, y_test)
|
||||||
classes_x=np.argmax(prediction,axis=1)
|
_run.log_scalar("training.loss", evaluation[0])
|
||||||
|
_run.log_scalar("training.accuracy", evaluation[1])
|
||||||
rows = []
|
|
||||||
|
prediction = model.predict(x_test)
|
||||||
for j,i in enumerate(classes_x):
|
classes_x=np.argmax(prediction,axis=1)
|
||||||
row = [invGames[i],invGames[y_test[j]]]
|
|
||||||
rows.append(row)
|
rows = []
|
||||||
with open('results.csv','w',encoding='UTF-8',newline='') as f:
|
|
||||||
writer = csv.writer(f)
|
for j,i in enumerate(classes_x):
|
||||||
writer.writerow(["predicted", "expected"])
|
row = [invGames[i],invGames[y_test[j]]]
|
||||||
for row in rows:
|
rows.append(row)
|
||||||
writer.writerow(row)
|
with open('results.csv','w',encoding='UTF-8',newline='') as f:
|
||||||
|
writer = csv.writer(f)
|
||||||
model.save('./model')
|
writer.writerow(["predicted", "expected"])
|
||||||
|
for row in rows:
|
||||||
|
writer.writerow(row)
|
||||||
|
|
||||||
|
model.save('./model')
|
||||||
|
ex.add_artifact('./model/saved_model.pb')
|
||||||
|
|
||||||
|
|
||||||
|
ex.run()
|
200000
steam-200k.csv
200000
steam-200k.csv
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user