From 5a805021643431c38a57c7672f8ae07025f6aa57 Mon Sep 17 00:00:00 2001 From: patrycjalazna Date: Fri, 7 May 2021 18:26:53 +0200 Subject: [PATCH] changed artifacts in jenkinsfile and splitted main file to avocado-preprocessing and avocado-training --- Jenkinsfile | 12 +++++----- avocado-preprocessing.py | 49 +++++++++------------------------------- avocado-training.py | 49 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 66 insertions(+), 44 deletions(-) create mode 100644 avocado-training.py diff --git a/Jenkinsfile b/Jenkinsfile index 6bd0450..eaa5331 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -41,10 +41,10 @@ pipeline { script { def img = docker.build('patlaz/ium:1.0') img.inside { - sh 'chmod +x avocado-preprocessing.sh' - sh 'chmod +x ./avocado-preprocessing.sh' + sh 'chmod +x avocado-preprocessing.py' + // sh 'chmod +x ./avocado-preprocessing.sh' sh 'echo ${CUTOFF}' - sh './avocado-preprocessing.sh ${CUTOFF}' + sh 'python3 avocado-preprocessing.py ${CUTOFF}' } } } @@ -53,9 +53,9 @@ pipeline { stage('archiveArtifacts') { steps { - archiveArtifacts 'test.csv' - archiveArtifacts 'dev.csv' - archiveArtifacts 'train.csv' + archiveArtifacts 'avocado_test.csv' + archiveArtifacts 'avocado_validate.csv' + archiveArtifacts 'avocado_train.csv' } } } diff --git a/avocado-preprocessing.py b/avocado-preprocessing.py index f7fd244..2c4e282 100644 --- a/avocado-preprocessing.py +++ b/avocado-preprocessing.py @@ -5,11 +5,7 @@ import numpy as np from sklearn import preprocessing from sklearn.linear_model import LinearRegression from sklearn.metrics import mean_squared_error -import tensorflow as tf -from tensorflow.keras.layers import Input, Dense, Activation,Dropout -from tensorflow.keras.models import Model -from tensorflow.keras.callbacks import EarlyStopping -from keras.models import Sequential + device = 'cpu' @@ -44,6 +40,10 @@ for col in avocado.columns: avocado_normalized['type'] = avocado['type'] avocado_normalized['geography'] = avocado['geography'] +# parametr CUTOFF +cutoff_param = int(sys.argv[1]) +avocado_normalized = avocado_normalized.head(cutoff_param) + # podział na train/dev/test avocado_train, avocado_validate, avocado_test = np.split(avocado_normalized.sample(frac=1), [int(.6*len(avocado_normalized)), int(.8*len(avocado_normalized))]) @@ -72,39 +72,12 @@ avocado_train['average_price'].hist() avocado_validate['average_price'].hist() avocado_test['average_price'].hist() +# zapis do plików +avocado_train.to_csv('avocado_train.csv') +avocado_validate.to_csv('avocado_validate.csv') +avocado_test.to_csv('avocado_test.csv') + # print(avocado_train[:10]) # print(avocado_test[:10]) -print(avocado_normalized) - -# podzial na X i y -X_train = avocado_train[['average_price', 'total_volume', '4046', '4225', '4770', 'total_bags', 'small_bags', 'large_bags', 'xlarge_bags']] -y_train = avocado_train[['type']] -X_test = avocado_test[['average_price', 'total_volume', '4046', '4225', '4770', 'total_bags', 'small_bags', 'large_bags', 'xlarge_bags']] -y_test = avocado_test[['type']] - -print(X_train.shape[1]) -# keras model -model = Sequential() -model.add(Dense(9, input_dim = X_train.shape[1], kernel_initializer='normal', activation='relu')) -model.add(Dense(1,kernel_initializer='normal', activation='sigmoid')) - -early_stop = EarlyStopping(monitor="val_loss", mode="min", verbose=1, patience=10) - -# kompilacja -model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) - -# model fit -epochs = int(sys.argv[1]) -batch_size = int(sys.argv[2]) - -model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, validation_data=(X_test, y_test)) -model.save('avocado-model.h5') - -# predict -predictions = model.predict(X_test) -pd.DataFrame(predictions).to_csv('prediction_results.csv') - -# ewaluacja -error = mean_squared_error(y_test, predictions) -print('Error: ', error) +#print(avocado_normalized) diff --git a/avocado-training.py b/avocado-training.py new file mode 100644 index 0000000..014bd23 --- /dev/null +++ b/avocado-training.py @@ -0,0 +1,49 @@ +import sys +import kaggle +import pandas as pd +import numpy as np +from sklearn import preprocessing +from sklearn.linear_model import LinearRegression +from sklearn.metrics import mean_squared_error +import tensorflow as tf +from tensorflow.keras.layers import Input, Dense, Activation,Dropout +from tensorflow.keras.models import Model +from tensorflow.keras.callbacks import EarlyStopping +from keras.models import Sequential + +avocado_train = pd.read_csv('avocado_train.csv') +avocado_test = pd.read_csv('avocado_test.csv') +avocado_validate = pd.read_csv('avocado_validate.csv') + + +# podzial na X i y +X_train = avocado_train[['average_price', 'total_volume', '4046', '4225', '4770', 'total_bags', 'small_bags', 'large_bags', 'xlarge_bags']] +y_train = avocado_train[['type']] +X_test = avocado_test[['average_price', 'total_volume', '4046', '4225', '4770', 'total_bags', 'small_bags', 'large_bags', 'xlarge_bags']] +y_test = avocado_test[['type']] + +print(X_train.shape[1]) +# keras model +model = Sequential() +model.add(Dense(9, input_dim = X_train.shape[1], kernel_initializer='normal', activation='relu')) +model.add(Dense(1,kernel_initializer='normal', activation='sigmoid')) + +early_stop = EarlyStopping(monitor="val_loss", mode="min", verbose=1, patience=10) + +# kompilacja +model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) + +# model fit +epochs = int(sys.argv[1]) +batch_size = int(sys.argv[2]) + +model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, validation_data=(X_test, y_test)) +model.save('avocado-model.h5') + +# predict +predictions = model.predict(X_test) +pd.DataFrame(predictions).to_csv('prediction_results.csv') + +# ewaluacja +error = mean_squared_error(y_test, predictions) +print('Error: ', error)