From 2b8fba580b1e0ca4374649a04cb4f5fd8519aae3 Mon Sep 17 00:00:00 2001 From: AdamOsiowy123 Date: Mon, 2 May 2022 22:51:16 +0200 Subject: [PATCH] parametrized --- Jenkins/Jenkinsfile.training | 20 ++++++------------ Scripts/train_neural_network.py | 37 +++++++++++++++++++++++---------- 2 files changed, 32 insertions(+), 25 deletions(-) diff --git a/Jenkins/Jenkinsfile.training b/Jenkins/Jenkinsfile.training index 6cc366f..dc4620a 100644 --- a/Jenkins/Jenkinsfile.training +++ b/Jenkins/Jenkinsfile.training @@ -7,9 +7,9 @@ node { pipelineTriggers([upstream(threshold: hudson.model.Result.SUCCESS, upstreamProjects: "s444452-create-dataset")]), parameters([ string( - defaultValue: ".", - description: 'Arguments for model training: arg1,arg2,arg3', - name: 'TRAIN_ARGS' + defaultValue: ".,14000,1,50,100", + description: 'Train params: data_path,num_words,epochs,batch_size,pad_length', + name: 'TRAIN_PARAMS' ) ]) ]) @@ -20,8 +20,8 @@ node { copyArtifacts filter: 'dev_data.csv', fingerprintArtifacts: true, projectName: 's444452-create-dataset' } stage('Run script') { - withEnv(["TRAIN_ARGS=${params.TRAIN_ARGS}"]) { - sh "python3 Scripts/train_neural_network.py $TRAIN_ARGS" + withEnv(["TRAIN_ARGS=${params.TRAIN_PARAMS}"]) { + sh "python3 Scripts/train_neural_network.py $TRAIN_PARAMS" } } stage('Archive artifacts') { @@ -39,16 +39,8 @@ def notifyBuild(String buildStatus = 'STARTED') { buildStatus = buildStatus ?: 'SUCCESS' def subject = "Job: ${env.JOB_NAME}" - def details = "Build nr: ${env.BUILD_NUMBER}, status: ${buildStatus} \n url: ${env.BUILD_URL}" + def details = "Build nr: ${env.BUILD_NUMBER}, status: ${buildStatus} \n url: ${env.BUILD_URL} \n build params: ${params.TRAIN_PARAMS}" - // Override default values based on build status - if (buildStatus == 'SUCCESS') { - color = 'GREEN' - colorCode = '#00FF00' - } else { - color = 'RED' - colorCode = '#FF0000' - } emailext ( subject: subject, body: details, diff --git a/Scripts/train_neural_network.py b/Scripts/train_neural_network.py index efdff62..155323d 100644 --- a/Scripts/train_neural_network.py +++ b/Scripts/train_neural_network.py @@ -13,16 +13,23 @@ import logging logging.getLogger("tensorflow").setLevel(logging.ERROR) +data_path = '' +num_words = 0 +epochs = 0 +batch_size = 0 +pad_length = 0 -def tokenize(x, x_train, x_test, max_len): - tokenizer = Tokenizer(num_words=14000) + +def tokenize(x, x_train, x_test): + global pad_length, num_words + tokenizer = Tokenizer(num_words=num_words) tokenizer.fit_on_texts(x) train_x = tokenizer.texts_to_sequences(x_train) test_x = tokenizer.texts_to_sequences(x_test) vocabulary_length = len(tokenizer.word_index) + 1 - train_x = pad_sequences(train_x, padding='post', maxlen=max_len) - test_x = pad_sequences(test_x, padding='post', maxlen=max_len) + train_x = pad_sequences(train_x, padding='post', maxlen=pad_length) + test_x = pad_sequences(test_x, padding='post', maxlen=pad_length) return train_x, test_x, vocabulary_length @@ -47,14 +54,16 @@ def save_model(model): def train_model(model, x_train, y_train): - model.fit(x_train, y_train, epochs=1, verbose=False, batch_size=50) + global epochs, batch_size + model.fit(x_train, y_train, epochs=epochs, verbose=False, batch_size=batch_size) -def get_model(output_dim, vocabulary_length): +def get_model(vocabulary_length): + global pad_length, batch_size model = Sequential() model.add(layers.Embedding(input_dim=vocabulary_length, - output_dim=output_dim, - input_length=100)) + output_dim=batch_size, + input_length=pad_length)) model.add(layers.Flatten()) model.add(layers.Dense(10, activation='relu')) model.add(layers.Dense(1, activation='sigmoid')) @@ -74,15 +83,21 @@ def load_data(data_path, filename) -> pd.DataFrame: return pd.read_csv(os.path.join(data_path, filename)) +def read_params(): + global data_path, num_words, epochs, batch_size, pad_length + data_path, num_words, epochs, batch_size, pad_length = sys.argv[1].split(',') + + def main(): - data_path = sys.argv[1] + read_params() + global data_path abs_data_path = os.path.abspath(data_path) train_data = load_data(abs_data_path, 'train_data.csv') test_data = load_data(abs_data_path, 'test_data.csv') x_train, y_train = split_data(train_data) x_test, y_test = split_data(test_data) - x_train, x_test, vocab_size = tokenize(pd.concat([x_train, x_test]), x_train, x_test, 100) - model = get_model(50, vocab_size) + x_train, x_test, vocab_size = tokenize(pd.concat([x_train, x_test]), x_train, x_test) + model = get_model(vocab_size) train_model(model, x_train, y_train) save_model(model) evaluate_and_save(model, x_test, y_test, abs_data_path)