lab7
Some checks failed
s444417-training/pipeline/head There was a failure building this commit

This commit is contained in:
s444417 2022-05-07 15:37:17 +02:00
parent 0c5537f26d
commit 3853b0ff00
8 changed files with 136 additions and 18 deletions

View File

@ -2,4 +2,7 @@ kaggle.json
venv
.vscode
.idea
Participants_Data_HPP
Participants_Data_HPP
my_runs
saved_model

5
.gitignore vendored
View File

@ -220,4 +220,7 @@ venv/*
training_1
Participants_Data_HPP/
Participants_Data_HPP/
my_runs
saved_model

View File

@ -12,6 +12,7 @@ RUN apt-get install wget
# RUN python3 -m pip install kaggle
RUN python3 -m pip install pandas
RUN pip3 install matplotlib
RUN pip3 install sacred
# RUN ln -s ~/.local/bin/kaggle /usr/bin/kaggle
WORKDIR /app

View File

@ -15,7 +15,7 @@ pipeline {
copyArtifacts projectName: 's444417-create-dataset'
sh 'ls -la'
sh 'echo $EPOCH_NUMBER'
sh 'python3 ./src/trainScript.py $EPOCH_NUMBER'
sh 'python3 ./lab7/trainScript.py $EPOCH_NUMBER'
}
}
stage('Archive') {

View File

@ -1,6 +1,11 @@
## Projekt na przedmiot inżynieria oprogramowania
IUM_6 opis sposobu rozwiązania zadań i podpunktów
---
Aktualne wyniki zadania IUM_6 dostępne są:
- [s444417-create-dataset](https://tzietkiewicz.vm.wmi.amu.edu.pl:8080/job/s444417-create-dataset/): build #244
- [s444417-training](https://tzietkiewicz.vm.wmi.amu.edu.pl:8080/job/s444417-training/job/master/): build #96
- [s444417-evaluation](https://tzietkiewicz.vm.wmi.amu.edu.pl:8080/job/s444417-evaluation/job/master/): build #43
Zadanie 1
1. stworzono job [s444417-training](https://tzietkiewicz.vm.wmi.amu.edu.pl:8080/job/s444417-training/)
2. s444417-training uruchamia się automatycznie po zakończeniu joba s444417-create-dataset, plik Jenkinsfile, przy pomocy build job. Kopiuje zbiór danych przy pomocy copyArtifact w pliku Jenkinsfile3
@ -12,8 +17,8 @@ Zadanie 2
1. stworzono job [s444417-evaluation](https://tzietkiewicz.vm.wmi.amu.edu.pl:8080/job/s444417-evaluation/)
2. evaluacja modelu metodą evaluate zawołana na modelu w pliku trainScript.py.Zapisanie wyniku do pliku trainResults.csv, w Jenkinsfile.eval archiveArtifact
3. Jenkinsfile.eval w stagu "Copy prev build artifact" kopiuje trainResults.csv a jeśli go nie ma to catch łapie error, skrypt trainScript.py też obsługuje brak takiego pliku, bo otwiera go w trybie "a+"
4. skrypt trainScript.py tworzy plota z wczytanych wartości odczytanych z pliku trainResults.csv, natomiast nei ma jak tego podejrzeć w Jenkins
5. projekt odpala się po zakończeniu trenowania jenkinsfile3 build job oraz kopiuje sobie model copyArtifacts z uwzględnieniem brancha master
4. skrypt trainScript.py tworzy plota z wczytanych wartości odczytanych z pliku trainResults.csv i zapisuje wkres do pliku metric.py
5. projekt odpala się po zakończeniu trenowania jenkinsfile3 build job oraz kopiuje sobie model copyArtifacts z uwzględnieniem brancha master
6. copyArtifacts z s444417-create-dataset
7. parametr BRANCH do wyboru konkretnej gałęzi, buildselector do wybrania builda w Jenkins.eval
8. powiadomenie mail wysyłane w pliku Jenkinsfile.eval post emailext
8. powiadomenie mail wraz z metryką loss wysyłane w pliku Jenkinsfile.eval post emailext

116
lab7/trainScript.py Normal file
View File

@ -0,0 +1,116 @@
import os
import sys
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers
from sacred import Experiment
from sacred.observers import FileStorageObserver
from sacred.observers import MongoObserver
ex = Experiment("sacred_scopes", interactive=True)
ex.observers.append(FileStorageObserver('my_runs'))
# Mongo observer
ex.observers.append(MongoObserver(url='mongodb://mongo_user:mongo_password_IUM_2021@localhost:27017', db_name='sacred'))
# train params
numberOfEpochParam = 0
try:
numberOfEpochParam = int(sys.argv[1])
except:
# dafault val
numberOfEpochParam = 3
@ex.config
def my_config():
numberOfEpoch = numberOfEpochParam
@ex.capture
def train(numberOfEpoch, _run):
cwd = os.path.abspath(os.path.dirname(sys.argv[0]))
pathTrain = cwd + "/../Participants_Data_HPP/Train.csv"
pathTest = cwd + "/../Participants_Data_HPP/Test.csv"
features = ["UNDER_CONSTRUCTION", "RERA", "BHK_NO.", "SQUARE_FT", "READY_TO_MOVE", "RESALE", "LONGITUDE", "LATITUDE", "TARGET(PRICE_IN_LACS)"]
# get dataset
house_price_train = pd.read_csv(pathTrain)[features]
# get test dataset
house_price_test = pd.read_csv(pathTest)[features]
house_price_features = house_price_train.copy()
# pop column
house_price_labels = house_price_features.pop('TARGET(PRICE_IN_LACS)')
# process data
normalize = layers.Normalization()
normalize.adapt(house_price_features)
feature_test_sample = house_price_test.sample(10)
labels_test_sample = feature_test_sample.pop('TARGET(PRICE_IN_LACS)')
house_price_test_features = house_price_test.copy()
# pop column
house_price_test_expected = house_price_test_features.pop('TARGET(PRICE_IN_LACS)')
house_price_features = np.array(house_price_features)
# load model if exists or create new
modelPath = 'saved_model/MyModel_tf'
try:
linear_model = tf.keras.models.load_model(modelPath)
print("open existing model")
except Exception as exception:
print(exception)
linear_model = tf.keras.Sequential([
normalize,
layers.Dense(1)
])
linear_model.compile(loss = tf.losses.MeanSquaredError(),
optimizer = tf.optimizers.Adam(1))
print("creating new model")
# train model
history = linear_model.fit(
house_price_features,
house_price_labels,
epochs=int(numberOfEpoch),
validation_split=0.33,
verbose=1)
# save model
linear_model.save(modelPath, save_format='tf')
# save model as artifact
ex.add_artifact(modelPath + "/saved_model.pb")
# finall loss
hist = pd.DataFrame(history.history)
hist['epoch'] = history.epoch
_run.log_scalar('final.training.loss', hist['loss'].iloc[-1])
test_results = {}
test_results['linear_model'] = linear_model.evaluate(
house_price_test_features, house_price_test_expected, verbose=0)
def flatten(t):
return [item for sublist in t for item in sublist]
pred = np.array(linear_model.predict(feature_test_sample))
flatten_pred = flatten(pred)
with open(cwd + "/../result.txt", "w+") as resultFile:
resultFile.write("predictions: " + str(flatten_pred) + '\n')
resultFile.write("expected: " + str(labels_test_sample.to_numpy()))
@ex.main
def main():
train()
ex.run()

View File

@ -1,2 +1,2 @@
predictions: [26.87796, 42.875183, 75.51122, 184.03447, 283.11658, 132.76123, 187.1964, 54.623642, 48.12828, 120.18621]
expected: [ 17. 85. 27. 110. 370. 57.9 870. 32.5 76. 38. ]
predictions: [185.41609, 41.248466, -66.347305, 112.55022, 106.2057, 11.261917, 75.81361, 184.90059, -3.6325989, 85.295105]
expected: [ 96. 51. 8. 63. 25. 11. 80. 110. 85. 41.]

View File

@ -7,15 +7,6 @@ import tensorflow as tf
from tensorflow.keras import layers
import matplotlib.pyplot as plt
def plot_loss(history):
plt.plot(history.history['loss'], label='loss')
plt.plot(history.history['val_loss'], label='val_loss')
plt.xlabel('Epoch')
plt.ylabel('Error [MPG]')
plt.legend()
plt.grid(True)
plt.show()
#train params
numberOfEpoch = sys.argv[1]
@ -85,7 +76,6 @@ history = linear_model.fit(
verbose=1)
#callbacks=[cp_callback])
plot_loss(history)
# save model
linear_model.save(modelPath, save_format='tf')