This commit is contained in:
s434695 2021-05-23 23:04:55 +02:00
parent a51ee6d1f9
commit 1055cde3af
14 changed files with 0 additions and 17229 deletions

View File

@ -1,76 +0,0 @@
#! /usr/bin/python3
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense
from sklearn.metrics import accuracy_score, classification_report
import pandas as pd
from sklearn.model_selection import train_test_split
import wget
import numpy as np
import requests
from sacred.observers import FileStorageObserver
from sacred import Experiment
from datetime import datetime
import os
ex = Experiment("ium_s434695", interactive=False)
ex.observers.append(FileStorageObserver('ium_s434695/my_runs'))
@ex.config
def my_config():
train_size_param = 0.8
test_size_param = 0.2
@ex.capture
def prepare_model(train_size_param, test_size_param, _run):
_run.info["prepare_model_ts"] = str(datetime.now())
url = 'https://git.wmi.amu.edu.pl/s434695/ium_434695/raw/commit/2301fb86e434734376f73503307a8f3255a75cc6/vgsales.csv'
r = requests.get(url, allow_redirects=True)
open('vgsales.csv', 'wb').write(r.content)
df = pd.read_csv('vgsales.csv')
def regression_model():
model = Sequential()
model.add(Dense(32,activation = "relu", input_shape = (x_train.shape[1],)))
model.add(Dense(64,activation = "relu"))
model.add(Dense(1,activation = "relu"))
model.compile(optimizer = "adam", loss = "mean_squared_error")
return model
df['Nintendo'] = df['Publisher'].apply(lambda x: 1 if x=='Nintendo' else 0)
df = df.drop(['Rank','Name','Platform','Year','Genre','Publisher'],axis = 1)
df
y = df.Nintendo
df=((df-df.min())/(df.max()-df.min()))
x = df.drop(['Nintendo'],axis = 1)
x_train, x_test, y_train, y_test = train_test_split(x,y , test_size=0.2,train_size=0.8, random_state=21)
model = regression_model()
model.fit(x_train, y_train, epochs = 600, verbose = 1)
y_pred = model.predict(x_test)
y_pred[:5]
y_pred = np.around(y_pred, decimals=0)
y_pred[:5]
return(classification_report(y_test,y_pred))
@ex.main
def my_main(train_size_param, test_size_param):
print(prepare_model())
r = ex.run()
ex.add_artifact("vgsales_model/saved_model/saved_model.pb")

View File

@ -1,78 +0,0 @@
#! /usr/bin/python3
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense
from sklearn.metrics import accuracy_score, classification_report
import pandas as pd
from sklearn.model_selection import train_test_split
import wget
import numpy as np
import requests
from sacred.observers import FileStorageObserver
from sacred import Experiment
from datetime import datetime
import os
from sacred.observers import MongoObserver
ex = Experiment("ium_s434695", interactive=False)
ex.observers.append(MongoObserver(url='mongodb://mongo_user:mongo_password_IUM_2021@172.17.0.1:27017',
db_name='sacred'))
@ex.config
def my_config():
train_size_param = 0.8
test_size_param = 0.2
@ex.capture
def prepare_model(train_size_param, test_size_param, _run):
_run.info["prepare_model_ts"] = str(datetime.now())
url = 'https://git.wmi.amu.edu.pl/s434695/ium_434695/raw/commit/2301fb86e434734376f73503307a8f3255a75cc6/vgsales.csv'
r = requests.get(url, allow_redirects=True)
open('vgsales.csv', 'wb').write(r.content)
df = pd.read_csv('vgsales.csv')
def regression_model():
model = Sequential()
model.add(Dense(32,activation = "relu", input_shape = (x_train.shape[1],)))
model.add(Dense(64,activation = "relu"))
model.add(Dense(1,activation = "relu"))
model.compile(optimizer = "adam", loss = "mean_squared_error")
return model
df['Nintendo'] = df['Publisher'].apply(lambda x: 1 if x=='Nintendo' else 0)
df = df.drop(['Rank','Name','Platform','Year','Genre','Publisher'],axis = 1)
df
y = df.Nintendo
df=((df-df.min())/(df.max()-df.min()))
x = df.drop(['Nintendo'],axis = 1)
x_train, x_test, y_train, y_test = train_test_split(x,y , test_size=0.2,train_size=0.8, random_state=21)
model = regression_model()
model.fit(x_train, y_train, epochs = 600, verbose = 1)
y_pred = model.predict(x_test)
y_pred[:5]
y_pred = np.around(y_pred, decimals=0)
y_pred[:5]
return(classification_report(y_test,y_pred))
@ex.main
def my_main(train_size_param, test_size_param):
print(prepare_model())
r = ex.run()
ex.add_artifact("vgsales_model/saved_model/saved_model.pb")

View File

@ -1,42 +0,0 @@
#! /usr/bin/python3
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense
from sklearn.metrics import accuracy_score, classification_report
import pandas as pd
from sklearn.model_selection import train_test_split
import numpy as np
import requests
url = 'https://git.wmi.amu.edu.pl/s434695/ium_434695/raw/commit/2301fb86e434734376f73503307a8f3255a75cc6/vgsales.csv'
r = requests.get(url, allow_redirects=True)
open('vgsales.csv', 'wb').write(r.content)
df = pd.read_csv('vgsales.csv')
def regression_model():
model = Sequential()
model.add(Dense(16,activation = "relu", input_shape = (x_train.shape[1],)))
model.add(Dense(32,activation = "relu"))
model.add(Dense(1,activation = "relu"))
model.compile(optimizer = "adam", loss = "mean_squared_error")
return model
df['Nintendo'] = df['Publisher'].apply(lambda x: 1 if x=='Nintendo' else 0)
df = df.drop(['Rank','Name','Platform','Year','Genre','Publisher'],axis = 1)
df
y = df.Nintendo
df=((df-df.min())/(df.max()-df.min()))
x = df.drop(['Nintendo'],axis = 1)
x_train, x_test, y_train, y_test = train_test_split(x,y , test_size=0.2,train_size=0.8, random_state=21)
model = regression_model()
model.fit(x_train, y_train, epochs = 600, verbose = 1)
y_pred = model.predict(x_test)
model.save('model1')

View File

@ -1,23 +0,0 @@
# Nasz obraz będzie dzidziczył z obrazu Ubuntu w wersji latest
FROM ubuntu:latest
# Instalujemy niezbędne zależności. Zwróć uwagę na flagę "-y" (assume yes)
RUN apt update && apt install -y figlet
RUN apt install -y git
RUN apt install -y python3-pip
RUN pip3 install setuptools
RUN pip3 install kaggle
RUN pip3 install pandas
RUN pip3 install numpy
RUN pip3 install seaborn
RUN pip3 install sklearn
RUN pip3 install matplotlib
RUN pip3 install tensorflow
RUN pip3 install sacred
RUN pip3 install wget
RUN pip3 install keras
RUN pip3 install GitPython
RUN pip3 install pymongo
RUN pip3 install mlflow

View File

@ -1,49 +0,0 @@
pipeline {
agent {
dockerfile true
}
parameters{
buildSelector(
defaultSelector: lastSuccessful(),
description: 'Which build to use for copying data artifacts',
name: 'WHICH_BUILD_DATA'
)
buildSelector(
defaultSelector: lastSuccessful(),
description: 'Which build to use for copying train artifacts',
name: 'WHICH_BUILD_TRAIN'
)
buildSelector(
defaultSelector: lastSuccessful(),
description: 'Which build to use for copying current project artifacts',
name: 'WHICH_BUILD_THIS'
)
}
stages {
stage('copy artifacts')
{
steps
{
copyArtifacts(fingerprintArtifacts: true, projectName: 's434695-create-dataset', selector: buildParameter('WHICH_BUILD_DATA'))
copyArtifacts(fingerprintArtifacts: true, projectName: 's434695-training', selector: buildParameter('WHICH_BUILD_TRAIN'))
copyArtifacts(fingerprintArtifacts: true, optional: true, projectName: 's434695-evaluation', selector: buildParameter('WHICH_BUILD_THIS'))
}
}
stage('evaluate')
{
steps
{
catchError {
sh 'python3 evaluate.py'
}
}
}
stage('send email') {
steps {
emailext body: currentBuild.result ?: 'SUCCESS',
subject: 's434695 - evaluation',
to: '26ab8f35.uam.onmicrosoft.com@emea.teams.ms'
}
}
}
}

View File

@ -1,59 +0,0 @@
pipeline {
agent any;
parameters{
buildSelector(
defaultSelector: lastSuccessful(),
description: 'Which build to use for copying artifacts',
name: 'WHICH_BUILD'
)
string(
defaultValue: '16',
description: 'batch size',
name: 'BATCH_SIZE'
)
string(
defaultValue: '15',
description: 'epochs',
name: 'EPOCHS'
)
}
stages {
stage('checkout') {
steps {
copyArtifacts fingerprintArtifacts: true, projectName: 's434695-create-dataset', selector: buildParameter('WHICH_BUILD')
}
}
stage('docker-training') {
steps {
script {
def img = docker.build('shroomy/ium2:1')
img.inside {
sh "python3 train.py"
sh "python3 sacred1.py"
sh "python3 sacred2.py"
}
}
}
}
stage('archiveArtifacts') {
steps{
archiveArtifacts 'ium_s434695/**'
archiveArtifacts 'model1'
}
}
}
post {
success {
build job: 's434695-evaluation/master'
mail body: 'SUCCESS TRAINING', subject: 's434695', to: '26ab8f35.uam.onmicrosoft.com@emea.teams.ms'
}
failure {
mail body: 'FAILURE TRAINING', subject: 's434695', to: '26ab8f35.uam.onmicrosoft.com@emea.teams.ms'
}
}
}

View File

@ -1 +0,0 @@
print('test')

View File

@ -1,76 +0,0 @@
#! /usr/bin/python3
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense
from sklearn.metrics import accuracy_score, classification_report
import pandas as pd
from sklearn.model_selection import train_test_split
import wget
import numpy as np
import requests
from sacred.observers import FileStorageObserver
from sacred import Experiment
from datetime import datetime
import os
ex = Experiment("ium_s434695", interactive=False)
ex.observers.append(FileStorageObserver('ium_s434695/my_runs'))
@ex.config
def my_config():
train_size_param = 0.8
test_size_param = 0.2
@ex.capture
def prepare_model(train_size_param, test_size_param, _run):
_run.info["prepare_model_ts"] = str(datetime.now())
url = 'https://git.wmi.amu.edu.pl/s434695/ium_434695/raw/commit/2301fb86e434734376f73503307a8f3255a75cc6/vgsales.csv'
r = requests.get(url, allow_redirects=True)
open('vgsales.csv', 'wb').write(r.content)
df = pd.read_csv('vgsales.csv')
def regression_model():
model = Sequential()
model.add(Dense(32,activation = "relu", input_shape = (x_train.shape[1],)))
model.add(Dense(64,activation = "relu"))
model.add(Dense(1,activation = "relu"))
model.compile(optimizer = "adam", loss = "mean_squared_error")
return model
df['Nintendo'] = df['Publisher'].apply(lambda x: 1 if x=='Nintendo' else 0)
df = df.drop(['Rank','Name','Platform','Year','Genre','Publisher'],axis = 1)
df
y = df.Nintendo
df=((df-df.min())/(df.max()-df.min()))
x = df.drop(['Nintendo'],axis = 1)
x_train, x_test, y_train, y_test = train_test_split(x,y , test_size=0.2,train_size=0.8, random_state=21)
model = regression_model()
model.fit(x_train, y_train, epochs = 600, verbose = 1)
y_pred = model.predict(x_test)
y_pred[:5]
y_pred = np.around(y_pred, decimals=0)
y_pred[:5]
return(classification_report(y_test,y_pred))
@ex.main
def my_main(train_size_param, test_size_param):
print(prepare_model())
r = ex.run()
ex.add_artifact("vgsales_model/saved_model/saved_model.pb")

View File

@ -1,78 +0,0 @@
#! /usr/bin/python3
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense
from sklearn.metrics import accuracy_score, classification_report
import pandas as pd
from sklearn.model_selection import train_test_split
import wget
import numpy as np
import requests
from sacred.observers import FileStorageObserver
from sacred import Experiment
from datetime import datetime
import os
from sacred.observers import MongoObserver
ex = Experiment("ium_s434695", interactive=False)
ex.observers.append(MongoObserver(url='mongodb://mongo_user:mongo_password_IUM_2021@172.17.0.1:27017',
db_name='sacred'))
@ex.config
def my_config():
train_size_param = 0.8
test_size_param = 0.2
@ex.capture
def prepare_model(train_size_param, test_size_param, _run):
_run.info["prepare_model_ts"] = str(datetime.now())
url = 'https://git.wmi.amu.edu.pl/s434695/ium_434695/raw/commit/2301fb86e434734376f73503307a8f3255a75cc6/vgsales.csv'
r = requests.get(url, allow_redirects=True)
open('vgsales.csv', 'wb').write(r.content)
df = pd.read_csv('vgsales.csv')
def regression_model():
model = Sequential()
model.add(Dense(32,activation = "relu", input_shape = (x_train.shape[1],)))
model.add(Dense(64,activation = "relu"))
model.add(Dense(1,activation = "relu"))
model.compile(optimizer = "adam", loss = "mean_squared_error")
return model
df['Nintendo'] = df['Publisher'].apply(lambda x: 1 if x=='Nintendo' else 0)
df = df.drop(['Rank','Name','Platform','Year','Genre','Publisher'],axis = 1)
df
y = df.Nintendo
df=((df-df.min())/(df.max()-df.min()))
x = df.drop(['Nintendo'],axis = 1)
x_train, x_test, y_train, y_test = train_test_split(x,y , test_size=0.2,train_size=0.8, random_state=21)
model = regression_model()
model.fit(x_train, y_train, epochs = 600, verbose = 1)
y_pred = model.predict(x_test)
y_pred[:5]
y_pred = np.around(y_pred, decimals=0)
y_pred[:5]
return(classification_report(y_test,y_pred))
@ex.main
def my_main(train_size_param, test_size_param):
print(prepare_model())
r = ex.run()
ex.add_artifact("vgsales_model/saved_model/saved_model.pb")

View File

@ -1,19 +0,0 @@
#Pobranie pliku .csv
curl -OL https://git.wmi.amu.edu.pl/s434695/ium_434695/raw/branch/master/vgsales.csv
#Podzielenie pliku csv na test/dev/train
head -n 1 vgsales.csv > header.csv
tail -n +2 vgsales.csv | shuf > data.shuffled
head -n 3320 data.shuffled > games.data.test
head -n 6640 data.shuffled | tail -n 3320 > games.data.dev
tail -n +6641 data.shuffled > games.data.train
cat header.csv games.data.test > test.csv
cat header.csv games.data.dev > dev.csv
cat header.csv games.data.train > train.csv
#Obcinanie danych
head -n $1 data.shuffled > obcietedane.data
cat header.csv obcietedane.data > obcietedane.csv

View File

@ -1,42 +0,0 @@
#! /usr/bin/python3
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense
from sklearn.metrics import accuracy_score, classification_report
import pandas as pd
from sklearn.model_selection import train_test_split
import numpy as np
import requests
url = 'https://git.wmi.amu.edu.pl/s434695/ium_434695/raw/commit/2301fb86e434734376f73503307a8f3255a75cc6/vgsales.csv'
r = requests.get(url, allow_redirects=True)
open('vgsales.csv', 'wb').write(r.content)
df = pd.read_csv('vgsales.csv')
def regression_model():
model = Sequential()
model.add(Dense(16,activation = "relu", input_shape = (x_train.shape[1],)))
model.add(Dense(32,activation = "relu"))
model.add(Dense(1,activation = "relu"))
model.compile(optimizer = "adam", loss = "mean_squared_error")
return model
df['Nintendo'] = df['Publisher'].apply(lambda x: 1 if x=='Nintendo' else 0)
df = df.drop(['Rank','Name','Platform','Year','Genre','Publisher'],axis = 1)
df
y = df.Nintendo
df=((df-df.min())/(df.max()-df.min()))
x = df.drop(['Nintendo'],axis = 1)
x_train, x_test, y_train, y_test = train_test_split(x,y , test_size=0.2,train_size=0.8, random_state=21)
model = regression_model()
model.fit(x_train, y_train, epochs = 600, verbose = 1)
y_pred = model.predict(x_test)
model.save('model1')

File diff suppressed because it is too large Load Diff

View File

@ -1,34 +0,0 @@
#! /usr/bin/python3
import requests
url = 'https://git.wmi.amu.edu.pl/s434695/ium_434695/raw/commit/2301fb86e434734376f73503307a8f3255a75cc6/vgsales.csv'
r = requests.get(url, allow_redirects=True)
open('vgsales.csv', 'wb').write(r.content)
import pandas as pd
vgsales = pd.read_csv('vgsales.csv')
vgsales
vgsales.describe(include='all')
vgsales["Publisher"].value_counts()
vgsales["Platform"].value_counts()
vgsales["Platform"].value_counts().plot(kind="bar")
vgsales[["Platform","JP_Sales"]].groupby("Platform").mean().plot(kind="bar")
import seaborn as sns
sns.set_theme()
sns.relplot(data=vgsales, x="JP_Sales", y="NA_Sales", hue="Genre")
from sklearn.model_selection import train_test_split
vgsales_train, vgsales_test = train_test_split(vgsales, test_size = 0.6, random_state = 1)
vgsales_train["Platform"].value_counts()
vgsales_test["Platform"].value_counts()
print(vgsales_train["Platform"])

View File

@ -1,53 +0,0 @@
#! /usr/bin/python3
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense
from sklearn.metrics import accuracy_score, classification_report
import pandas as pd
from sklearn.model_selection import train_test_split
import numpy as np
import requests
url = 'https://git.wmi.amu.edu.pl/s434695/ium_434695/raw/commit/2301fb86e434734376f73503307a8f3255a75cc6/vgsales.csv'
r = requests.get(url, allow_redirects=True)
open('vgsales.csv', 'wb').write(r.content)
df = pd.read_csv('vgsales.csv')
def regression_model():
model = Sequential()
model.add(Dense(16,activation = "relu", input_shape = (x_train.shape[1],)))
model.add(Dense(32,activation = "relu"))
model.add(Dense(1,activation = "relu"))
model.compile(optimizer = "adam", loss = "mean_squared_error")
return model
df['Nintendo'] = df['Publisher'].apply(lambda x: 1 if x=='Nintendo' else 0)
df = df.drop(['Rank','Name','Platform','Year','Genre','Publisher'],axis = 1)
df
y = df.Nintendo
df=((df-df.min())/(df.max()-df.min()))
x = df.drop(['Nintendo'],axis = 1)
x_train, x_test, y_train, y_test = train_test_split(x,y , test_size=0.2,train_size=0.8, random_state=21)
model = regression_model()
model.fit(x_train, y_train, epochs = 600, verbose = 1)
y_pred = model.predict(x_test)
y_pred[:5]
y_pred = np.around(y_pred, decimals=0)
y_pred[:5]
print(accuracy_score(y_test, y_pred))
print(classification_report(y_test,y_pred))
pd.DataFrame(y_pred).to_csv("preds.csv")