diff --git a/Jenkinsfile b/Jenkinsfile index 37019a1..ba066a8 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -26,7 +26,7 @@ pipeline { { steps { - copyArtifacts(fingerprintArtifacts: true, projectName: 's434788-create-dataset', selector: buildParameter('WHICH_BUILD')) + copyArtifacts(fingerprintArtifacts: true, projectName: 's434695-create-dataset', selector: buildParameter('WHICH_BUILD')) } } stage('train') @@ -34,14 +34,16 @@ pipeline { steps { catchError { - sh 'python3.8 Zadanie_06_and_07_training.py ${BATCH_SIZE} ${EPOCHS}' + sh 'python3.8 train.py ${BATCH_SIZE} ${EPOCHS}' + sh 'python3.8 sacred1.py ${BATCH_SIZE} ${EPOCHS}' + sh 'python3.8 sacred2.py ${BATCH_SIZE} ${EPOCHS}' } } } stage('Archive artifacts') { steps{ - archiveArtifacts 'wine_model.h5' + archiveArtifacts 'vgsales_model.h5' archiveArtifacts 'my_runs/**' } } diff --git a/sacred1.py b/sacred1.py index 1140565..665e7d9 100755 --- a/sacred1.py +++ b/sacred1.py @@ -1,12 +1,16 @@ #! /usr/bin/python3 -from tensorflow.keras.models import Sequential, load_model -from tensorflow.keras.layers import Dense -from sklearn.metrics import accuracy_score, classification_report +import sys import pandas as pd -from sklearn.model_selection import train_test_split -import wget import numpy as np -import requests +from sklearn import preprocessing +from sklearn.linear_model import LinearRegression +from sklearn.metrics import mean_squared_error +import tensorflow as tf +from tensorflow import keras +from tensorflow.keras.layers import Input, Dense, Activation,Dropout +from tensorflow.keras.models import Model +from tensorflow.keras.callbacks import EarlyStopping +from tensorflow.keras.models import Sequential from sacred.observers import FileStorageObserver from sacred import Experiment from datetime import datetime diff --git a/sacred2.py b/sacred2.py index 36c4d51..db9f17d 100755 --- a/sacred2.py +++ b/sacred2.py @@ -1,12 +1,16 @@ #! /usr/bin/python3 -from tensorflow.keras.models import Sequential, load_model -from tensorflow.keras.layers import Dense -from sklearn.metrics import accuracy_score, classification_report +import sys import pandas as pd -from sklearn.model_selection import train_test_split -import wget import numpy as np -import requests +from sklearn import preprocessing +from sklearn.linear_model import LinearRegression +from sklearn.metrics import mean_squared_error +import tensorflow as tf +from tensorflow import keras +from tensorflow.keras.layers import Input, Dense, Activation,Dropout +from tensorflow.keras.models import Model +from tensorflow.keras.callbacks import EarlyStopping +from tensorflow.keras.models import Sequential from sacred.observers import FileStorageObserver from sacred import Experiment from datetime import datetime diff --git a/train.py b/train.py index c579a74..9598ce2 100755 --- a/train.py +++ b/train.py @@ -10,39 +10,63 @@ from tensorflow.keras.layers import Input, Dense, Activation,Dropout from tensorflow.keras.models import Model from tensorflow.keras.callbacks import EarlyStopping from tensorflow.keras.models import Sequential +from sacred import Experiment +from datetime import datetime +from sacred.observers import FileStorageObserver +from sacred.observers import MongoObserver +import pymongo -# odczytanie danych z plików -vgsales_train = pd.read_csv('train.csv') -vgsales_test = pd.read_csv('test.csv') -vgsales_dev = pd.read_csv('dev.csv') +ex = Experiment("434695-mongo", interactive=False, save_git_info=False) +ex.observers.append(MongoObserver(url='mongodb://mongo_user:mongo_password_IUM_2021@172.17.0.1:27017', db_name='sacred')) +ex.observers.append(FileStorageObserver('my_runs')) -vgsales_train['Nintendo'] = vgsales_train['Publisher'].apply(lambda x: 1 if x=='Nintendo' else 0) -vgsales_test['Nintendo'] = vgsales_test['Publisher'].apply(lambda x: 1 if x=='Nintendo' else 0) -vgsales_dev['Nintendo'] = vgsales_dev['Publisher'].apply(lambda x: 1 if x=='Nintendo' else 0) -# podzial na X i y -X_train = vgsales_train.drop(['Rank','Name','Platform','Year','Genre','Publisher'],axis = 1) -y_train = vgsales_train[['Nintendo']] -X_test = vgsales_test.drop(['Rank','Name','Platform','Year','Genre','Publisher'],axis = 1) -y_test = vgsales_test[['Nintendo']] +@ex.config +def my_config(): + batch_param = int(sys.argv[1]) + epoch_param = int(sys.argv[2]) -print(X_train.shape[1]) -# keras model -model = Sequential() -model.add(Dense(9, input_dim = X_train.shape[1], kernel_initializer='normal', activation='relu')) -model.add(Dense(1,kernel_initializer='normal', activation='sigmoid')) +def regression_model(epoch_param, batch_param, _run): + _run.info["prepare_model_ts"] = str(datetime.now()) + # odczytanie danych z plików + vgsales_train = pd.read_csv('train.csv') + vgsales_test = pd.read_csv('test.csv') + vgsales_dev = pd.read_csv('dev.csv') -early_stop = EarlyStopping(monitor="val_loss", mode="min", verbose=1, patience=10) + vgsales_train['Nintendo'] = vgsales_train['Publisher'].apply(lambda x: 1 if x=='Nintendo' else 0) + vgsales_test['Nintendo'] = vgsales_test['Publisher'].apply(lambda x: 1 if x=='Nintendo' else 0) + vgsales_dev['Nintendo'] = vgsales_dev['Publisher'].apply(lambda x: 1 if x=='Nintendo' else 0) -# kompilacja -model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) + # podzial na X i y + X_train = vgsales_train.drop(['Rank','Name','Platform','Year','Genre','Publisher'],axis = 1) + y_train = vgsales_train[['Nintendo']] + X_test = vgsales_test.drop(['Rank','Name','Platform','Year','Genre','Publisher'],axis = 1) + y_test = vgsales_test[['Nintendo']] -# model fit -epochs = int(sys.argv[1]) -batch_size = int(sys.argv[2]) + print(X_train.shape[1]) + # keras model + model = Sequential() + model.add(Dense(9, input_dim = X_train.shape[1], kernel_initializer='normal', activation='relu')) + model.add(Dense(1,kernel_initializer='normal', activation='sigmoid')) -# trenowanie modelu -model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, validation_data=(X_test, y_test)) + early_stop = EarlyStopping(monitor="val_loss", mode="min", verbose=1, patience=10) -# zapisanie modelu -model.save('vgsales_model.h5') + # kompilacja + model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) + + # model fit + epochs = int(sys.argv[1]) + batch_size = int(sys.argv[2]) + + # trenowanie modelu + model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, validation_data=(X_test, y_test)) + + # zapisanie modelu + model.save('vgsales_model.h5') + +@ex.main +def my_main(epoch_param, batch_param): + print(regression_model()) + +r = ex.run() +ex.add_artifact("vgsales_model.h5") \ No newline at end of file