training changes

This commit is contained in:
s434695 2021-05-15 19:14:37 +02:00
parent 2d0e53d1eb
commit 18172eadf9
3 changed files with 86 additions and 32 deletions

2
Jenkinsfile vendored
View File

@ -29,7 +29,7 @@ pipeline {
} }
stage('archiveArtifacts') { stage('archiveArtifacts') {
steps{ steps{
archiveArtifacts 'model1.h5' archiveArtifacts 'vgsales_model.h5'
} }
} }
} }

View File

@ -1,42 +1,48 @@
#! /usr/bin/python3 import sys
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense
from sklearn.metrics import accuracy_score, classification_report
import pandas as pd import pandas as pd
from sklearn.model_selection import train_test_split
import numpy as np import numpy as np
import requests from sklearn import preprocessing
url = 'https://git.wmi.amu.edu.pl/s434695/ium_434695/raw/commit/2301fb86e434734376f73503307a8f3255a75cc6/vgsales.csv' from sklearn.linear_model import LinearRegression
r = requests.get(url, allow_redirects=True) from sklearn.metrics import mean_squared_error
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import Input, Dense, Activation,Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.models import Sequential
open('vgsales.csv', 'wb').write(r.content) # odczytanie danych z plików
df = pd.read_csv('vgsales.csv') vgsales_train = pd.read_csv('train.csv')
vgsales_test = pd.read_csv('test.csv')
vgsales_dev = pd.read_csv('dev.csv')
vgsales_train['Nintendo'] = vgsales_train['Publisher'].apply(lambda x: 1 if x=='Nintendo' else 0)
vgsales_test['Nintendo'] = vgsales_test['Publisher'].apply(lambda x: 1 if x=='Nintendo' else 0)
vgsales_dev['Nintendo'] = vgsales_dev['Publisher'].apply(lambda x: 1 if x=='Nintendo' else 0)
# podzial na X i y
X_train = vgsales_train.drop(['Rank','Name','Platform','Year','Genre','Publisher'],axis = 1)
y_train = vgsales_train[['Nintendo']]
X_test = vgsales_test.drop(['Rank','Name','Platform','Year','Genre','Publisher'],axis = 1)
y_test = vgsales_test[['Nintendo']]
def regression_model(): print(X_train.shape[1])
model = Sequential() # keras model
model.add(Dense(16,activation = "relu", input_shape = (x_train.shape[1],))) model = Sequential()
model.add(Dense(32,activation = "relu")) model.add(Dense(9, input_dim = X_train.shape[1], kernel_initializer='normal', activation='relu'))
model.add(Dense(1,activation = "relu")) model.add(Dense(1,kernel_initializer='normal', activation='sigmoid'))
model.compile(optimizer = "adam", loss = "mean_squared_error")
return model
df['Nintendo'] = df['Publisher'].apply(lambda x: 1 if x=='Nintendo' else 0) early_stop = EarlyStopping(monitor="val_loss", mode="min", verbose=1, patience=10)
df = df.drop(['Rank','Name','Platform','Year','Genre','Publisher'],axis = 1)
df
y = df.Nintendo # kompilacja
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
df=((df-df.min())/(df.max()-df.min())) # model fit
epochs = int(sys.argv[1])
batch_size = int(sys.argv[2])
x = df.drop(['Nintendo'],axis = 1) # trenowanie modelu
model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, validation_data=(X_test, y_test))
x_train, x_test, y_train, y_test = train_test_split(x,y , test_size=0.2,train_size=0.8, random_state=21) # zapisanie modelu
model.save('vgsales_model.h5')
model = regression_model()
model.fit(x_train, y_train, epochs = 600, verbose = 1)
y_pred = model.predict(x_test)
model.save('model1.h5')

48
train2.py Normal file
View File

@ -0,0 +1,48 @@
import sys
import pandas as pd
import numpy as np
from sklearn import preprocessing
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import Input, Dense, Activation,Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.models import Sequential
# odczytanie danych z plików
vgsales_train = pd.read_csv('train.csv')
vgsales_test = pd.read_csv('test.csv')
vgsales_dev = pd.read_csv('dev.csv')
vgsales_train['Nintendo'] = vgsales_train['Publisher'].apply(lambda x: 1 if x=='Nintendo' else 0)
vgsales_test['Nintendo'] = vgsales_test['Publisher'].apply(lambda x: 1 if x=='Nintendo' else 0)
vgsales_dev['Nintendo'] = vgsales_dev['Publisher'].apply(lambda x: 1 if x=='Nintendo' else 0)
# podzial na X i y
X_train = vgsales_train.drop(['Rank','Name','Platform','Year','Genre','Publisher'],axis = 1)
y_train = vgsales_train[['Nintendo']]
X_test = vgsales_test.drop(['Rank','Name','Platform','Year','Genre','Publisher'],axis = 1)
y_test = vgsales_test[['Nintendo']]
print(X_train.shape[1])
# keras model
model = Sequential()
model.add(Dense(9, input_dim = X_train.shape[1], kernel_initializer='normal', activation='relu'))
model.add(Dense(1,kernel_initializer='normal', activation='sigmoid'))
early_stop = EarlyStopping(monitor="val_loss", mode="min", verbose=1, patience=10)
# kompilacja
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
# model fit
epochs = int(sys.argv[1])
batch_size = int(sys.argv[2])
# trenowanie modelu
model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, validation_data=(X_test, y_test))
# zapisanie modelu
model.save('vgsales_model.h5')