split everything into per-lab repos
This commit is contained in:
parent
42b20cfa84
commit
8e3e206cca
8
lab5/Jenkinsfile
vendored
8
lab5/Jenkinsfile
vendored
@ -12,14 +12,14 @@ pipeline {
|
|||||||
stages {
|
stages {
|
||||||
stage('Stage 1') {
|
stage('Stage 1') {
|
||||||
steps {
|
steps {
|
||||||
sh 'chmod u+x ./process_dataset.py ./simple_regression.py'
|
sh 'chmod u+x ./lab5/process_dataset.py ./lab5/simple_regression.py'
|
||||||
echo 'Processing dataset...'
|
echo 'Processing dataset...'
|
||||||
sh 'python3 process_dataset.py'
|
sh 'python3 lab5/process_dataset.py'
|
||||||
echo 'Dataset processed'
|
echo 'Dataset processed'
|
||||||
echo 'Conducting simple regression model test'
|
echo 'Conducting simple regression model test'
|
||||||
sh 'python3 simple_regression.py $EPOCHS_NUM'
|
sh 'python3 lab5/simple_regression.py $EPOCHS_NUM'
|
||||||
echo 'Model predictions saved'
|
echo 'Model predictions saved'
|
||||||
sh 'head lego_reg_results.csv'
|
sh 'head lab5/lego_reg_results.csv'
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -5,12 +5,12 @@ pipeline {
|
|||||||
stages {
|
stages {
|
||||||
stage('Stage 1') {
|
stage('Stage 1') {
|
||||||
steps {
|
steps {
|
||||||
sh 'chmod u+x ./process_dataset.py ./simple_regression.py'
|
sh 'chmod u+x ./lab5/process_dataset.py ./lab5/simple_regression.py'
|
||||||
echo 'Processing dataset...'
|
echo 'Processing dataset...'
|
||||||
sh 'python3 process_dataset.py'
|
sh 'python3 lab5/process_dataset.py'
|
||||||
echo 'Dataset processed'
|
echo 'Dataset processed'
|
||||||
echo 'Conducting simple regression model test'
|
echo 'Conducting simple regression model test'
|
||||||
sh 'python3 simple_regression.py'
|
sh 'python3 lab5/simple_regression.py'
|
||||||
echo 'Model predictions saved'
|
echo 'Model predictions saved'
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
30
lab5/process_dataset.py
Normal file
30
lab5/process_dataset.py
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
import pandas as pd
|
||||||
|
import numpy as np
|
||||||
|
from sklearn.model_selection import train_test_split
|
||||||
|
|
||||||
|
# usuwamy przy okazji puste pola
|
||||||
|
lego = pd.read_csv('lego_sets.csv', encoding='utf-8').dropna()
|
||||||
|
|
||||||
|
# list_price moze byc do dwoch miejsc po przecinku
|
||||||
|
lego['list_price'] = lego['list_price'].round(2)
|
||||||
|
|
||||||
|
# num_reviews, piece_count i prod_id moga byc wartosciami calkowitymi
|
||||||
|
lego['num_reviews'] = lego['num_reviews'].apply(np.int64)
|
||||||
|
lego['piece_count'] = lego['piece_count'].apply(np.int64)
|
||||||
|
lego['prod_id'] = lego['prod_id'].apply(np.int64)
|
||||||
|
|
||||||
|
# wglad, statystyki
|
||||||
|
print(lego)
|
||||||
|
print(lego.describe(include='all'))
|
||||||
|
|
||||||
|
# pierwszy podzial, wydzielamy zbior treningowy
|
||||||
|
lego_train, lego_rem = train_test_split(lego, train_size=0.8)
|
||||||
|
|
||||||
|
# drugi podział, wydzielamy walidacyjny i testowy
|
||||||
|
lego_valid, lego_test = train_test_split(lego_rem, test_size=0.5)
|
||||||
|
|
||||||
|
# zapis
|
||||||
|
lego.to_csv('lego_sets_clean.csv', index=None, header=True)
|
||||||
|
lego_train.to_csv('lego_sets_clean_train.csv', index=None, header=True)
|
||||||
|
lego_valid.to_csv('lego_sets_clean_valid.csv', index=None, header=True)
|
||||||
|
lego_test.to_csv('lego_sets_clean_test.csv', index=None, header=True)
|
69
lab5/simple_regression.py
Normal file
69
lab5/simple_regression.py
Normal file
@ -0,0 +1,69 @@
|
|||||||
|
import tensorflow as tf
|
||||||
|
from keras import layers
|
||||||
|
from keras.models import save_model
|
||||||
|
import pandas as pd
|
||||||
|
import numpy as np
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
import sys
|
||||||
|
|
||||||
|
# Pobranie przykładowego argumentu trenowania
|
||||||
|
EPOCHS_NUM = int(sys.argv[1])
|
||||||
|
|
||||||
|
# Wczytanie danych
|
||||||
|
data_train = pd.read_csv('lego_sets_clean_train.csv')
|
||||||
|
data_test = pd.read_csv('lego_sets_clean_test.csv')
|
||||||
|
|
||||||
|
# Wydzielenie zbiorów dla predykcji ceny zestawu na podstawie liczby klocków, którą zawiera
|
||||||
|
train_piece_counts = np.array(data_train['piece_count'])
|
||||||
|
train_prices = np.array(data_train['list_price'])
|
||||||
|
test_piece_counts = np.array(data_test['piece_count'])
|
||||||
|
test_prices = np.array(data_test['list_price'])
|
||||||
|
|
||||||
|
# Normalizacja
|
||||||
|
normalizer = layers.Normalization(input_shape=[1, ], axis=None)
|
||||||
|
normalizer.adapt(train_piece_counts)
|
||||||
|
|
||||||
|
# Inicjalizacja
|
||||||
|
model = tf.keras.Sequential([
|
||||||
|
normalizer,
|
||||||
|
layers.Dense(units=1)
|
||||||
|
])
|
||||||
|
|
||||||
|
# Kompilacja
|
||||||
|
model.compile(
|
||||||
|
optimizer=tf.optimizers.Adam(learning_rate=0.1),
|
||||||
|
loss='mean_absolute_error'
|
||||||
|
)
|
||||||
|
|
||||||
|
# Trening
|
||||||
|
history = model.fit(
|
||||||
|
train_piece_counts,
|
||||||
|
train_prices,
|
||||||
|
epochs=EPOCHS_NUM,
|
||||||
|
verbose=0,
|
||||||
|
validation_split=0.2
|
||||||
|
)
|
||||||
|
|
||||||
|
# Wykonanie predykcji na danych ze zbioru testującego
|
||||||
|
y_pred = model.predict(test_piece_counts)
|
||||||
|
|
||||||
|
# Zapis predykcji do pliku
|
||||||
|
results = pd.DataFrame({'test_set_piece_count': test_piece_counts.tolist(), 'predicted_price': [round(a[0], 2) for a in y_pred.tolist()]})
|
||||||
|
results.to_csv('lego_reg_results.csv', index=False, header=True)
|
||||||
|
|
||||||
|
# Zapis modelu do pliku
|
||||||
|
model.save('lego_reg_model')
|
||||||
|
|
||||||
|
# Opcjonalne statystyki, wykresy
|
||||||
|
'''
|
||||||
|
hist = pd.DataFrame(history.history)
|
||||||
|
hist['epoch'] = history.epoch
|
||||||
|
print(hist.tail())
|
||||||
|
|
||||||
|
plt.scatter(train_piece_counts, train_prices, label='Data')
|
||||||
|
plt.plot(x, y_pred, color='k', label='Predictions')
|
||||||
|
plt.xlabel('pieces')
|
||||||
|
plt.ylabel('price')
|
||||||
|
plt.legend()
|
||||||
|
plt.show()
|
||||||
|
'''
|
@ -5,12 +5,12 @@ pipeline {
|
|||||||
stages {
|
stages {
|
||||||
stage('Stage 1') {
|
stage('Stage 1') {
|
||||||
steps {
|
steps {
|
||||||
sh 'chmod u+x ./process_dataset.py'
|
sh 'chmod u+x ./lab6/process_dataset.py'
|
||||||
echo 'Processing dataset...'
|
echo 'Processing dataset...'
|
||||||
sh 'python3 process_dataset.py'
|
sh 'python3 lab6/process_dataset.py'
|
||||||
echo 'Dataset processed'
|
echo 'Dataset processed'
|
||||||
echo 'Archiving clean train and test datasets...'
|
echo 'Archiving clean train and test datasets...'
|
||||||
archiveArtifacts artifacts: 'lego_sets_clean_train.csv, lego_sets_clean_test.csv'
|
archiveArtifacts artifacts: 'lab6/lego_sets_clean_train.csv, lab6/lego_sets_clean_test.csv'
|
||||||
echo 'Datasets archived'
|
echo 'Datasets archived'
|
||||||
echo 'Launching the s449288-training job...'
|
echo 'Launching the s449288-training job...'
|
||||||
build job: 's449288-training/master/'
|
build job: 's449288-training/master/'
|
@ -16,28 +16,28 @@ pipeline {
|
|||||||
stage('Stage 1') {
|
stage('Stage 1') {
|
||||||
steps {
|
steps {
|
||||||
git branch: "${params.BRANCH}", url: 'https://git.wmi.amu.edu.pl/s449288/ium_s449288.git'
|
git branch: "${params.BRANCH}", url: 'https://git.wmi.amu.edu.pl/s449288/ium_s449288.git'
|
||||||
sh 'chmod u+x ./evaluate.py'
|
sh 'chmod u+x ./lab6/evaluate.py'
|
||||||
echo 'Copying datasets from the create-dataset job...'
|
echo 'Copying datasets from the create-dataset job...'
|
||||||
copyArtifacts filter: 'lego_sets_clean_test.csv', projectName: 's449288-create-dataset'
|
copyArtifacts filter: 'lab6/lego_sets_clean_test.csv', projectName: 's449288-create-dataset'
|
||||||
echo 'Datasets copied'
|
echo 'Datasets copied'
|
||||||
echo 'Copying model from the training job...'
|
echo 'Copying model from the training job...'
|
||||||
copyArtifacts filter: 'lego_reg_model.tar.gz', projectName: "s449288-training/${BRANCH}/", selector: buildParameter('BUILD_SELECTOR')
|
copyArtifacts filter: 'lab6/lego_reg_model.tar.gz', projectName: "s449288-training/${BRANCH}/", selector: buildParameter('BUILD_SELECTOR')
|
||||||
echo 'Model copied'
|
echo 'Model copied'
|
||||||
sh 'tar xvzf lego_reg_model.tar.gz'
|
sh 'tar xvzf lab6/lego_reg_model.tar.gz'
|
||||||
echo 'Optional copying of the metrics file from previous build...'
|
echo 'Optional copying of the metrics file from previous build...'
|
||||||
copyArtifacts filter: 'eval_results.txt', projectName: 's449288-evaluation/master/', optional: true
|
copyArtifacts filter: 'lab6/eval_results.txt', projectName: 's449288-evaluation/master/', optional: true
|
||||||
echo 'Metrics file copied if it did not exist'
|
echo 'Metrics file copied if it did not exist'
|
||||||
echo 'Evaluating model...'
|
echo 'Evaluating model...'
|
||||||
sh 'python3 evaluate.py'
|
sh 'python3 lab6/evaluate.py'
|
||||||
echo 'Model evaluated. Metrics saved. Plot saved.'
|
echo 'Model evaluated. Metrics saved. Plot saved.'
|
||||||
sh 'head eval_results.txt'
|
sh 'head lab6/eval_results.txt'
|
||||||
sh 'file error_plot.jpg'
|
sh 'file lab6/error_plot.jpg'
|
||||||
echo 'Archiving metrics file...'
|
echo 'Archiving metrics file...'
|
||||||
archiveArtifacts 'eval_results.txt'
|
archiveArtifacts 'lab6/eval_results.txt'
|
||||||
echo 'File archived'
|
echo 'File archived'
|
||||||
script {
|
script {
|
||||||
LAST_MAE = sh (
|
LAST_MAE = sh (
|
||||||
script: 'tail -1 eval_results.txt',
|
script: 'tail -1 lab6/eval_results.txt',
|
||||||
returnStdout: true
|
returnStdout: true
|
||||||
).trim()
|
).trim()
|
||||||
}
|
}
|
||||||
|
@ -14,18 +14,18 @@ pipeline {
|
|||||||
stages {
|
stages {
|
||||||
stage('Stage 1') {
|
stage('Stage 1') {
|
||||||
steps {
|
steps {
|
||||||
sh 'chmod u+x ./simple_regression.py'
|
sh 'chmod u+x ./lab6/simple_regression.py'
|
||||||
echo 'Copying datasets from create-dataset...'
|
echo 'Copying datasets from create-dataset...'
|
||||||
copyArtifacts filter: '*', projectName: 's449288-create-dataset'
|
copyArtifacts filter: '*', projectName: 's449288-create-dataset'
|
||||||
echo 'Datasets copied'
|
echo 'Datasets copied'
|
||||||
echo 'Conducting simple regression model test'
|
echo 'Conducting simple regression model test'
|
||||||
sh 'python3 simple_regression.py $EPOCHS_NUM'
|
sh 'python3 lab6/simple_regression.py $EPOCHS_NUM'
|
||||||
echo 'Model and predictions saved'
|
echo 'Model and predictions saved'
|
||||||
sh 'head lego_reg_results.csv'
|
sh 'head lab6/lego_reg_results.csv'
|
||||||
sh 'ls -lh lego_reg_model'
|
sh 'ls -lh lab6/lego_reg_model'
|
||||||
echo 'Archiving model...'
|
echo 'Archiving model...'
|
||||||
sh 'tar -czf lego_reg_model.tar.gz lego_reg_model/'
|
sh 'tar -czf lab6/lego_reg_model.tar.gz lab6/lego_reg_model/'
|
||||||
archiveArtifacts 'lego_reg_model.tar.gz'
|
archiveArtifacts 'lab6/lego_reg_model.tar.gz'
|
||||||
echo 'Model archived'
|
echo 'Model archived'
|
||||||
echo 'Launching the s449288-evaluation job...'
|
echo 'Launching the s449288-evaluation job...'
|
||||||
build job: 's449288-evaluation/master/'
|
build job: 's449288-evaluation/master/'
|
||||||
|
37
lab6/evaluate.py
Normal file
37
lab6/evaluate.py
Normal file
@ -0,0 +1,37 @@
|
|||||||
|
import tensorflow as tf
|
||||||
|
from tensorflow import keras
|
||||||
|
from matplotlib import pyplot as plt
|
||||||
|
from matplotlib.ticker import MaxNLocator
|
||||||
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
# Załadowanie modelu z pliku
|
||||||
|
model = keras.models.load_model('lego_reg_model')
|
||||||
|
|
||||||
|
# Załadowanie zbioru testowego
|
||||||
|
data_test = pd.read_csv('lego_sets_clean_test.csv')
|
||||||
|
test_piece_counts = np.array(data_test['piece_count'])
|
||||||
|
test_prices = np.array(data_test['list_price'])
|
||||||
|
|
||||||
|
# Prosta ewaluacja (mean absolute error)
|
||||||
|
test_results = model.evaluate(
|
||||||
|
test_piece_counts,
|
||||||
|
test_prices, verbose=0)
|
||||||
|
|
||||||
|
# Zapis wartości liczbowej metryki do pliku
|
||||||
|
with open('eval_results.txt', 'a+') as f:
|
||||||
|
f.write(str(test_results) + '\n')
|
||||||
|
|
||||||
|
# Wygenerowanie i zapisanie do pliku wykresu
|
||||||
|
with open('eval_results.txt') as f:
|
||||||
|
scores = [float(line) for line in f if line]
|
||||||
|
builds = list(range(1, len(scores) + 1))
|
||||||
|
|
||||||
|
plot = plt.plot(builds, scores)
|
||||||
|
plt.xlabel('Build number')
|
||||||
|
plt.xticks(range(1, len(scores) + 1))
|
||||||
|
plt.ylabel('Mean absolute error')
|
||||||
|
plt.title('Model error by build')
|
||||||
|
plt.savefig('error_plot.jpg')
|
||||||
|
plt.show()
|
||||||
|
|
30
lab6/process_dataset.py
Normal file
30
lab6/process_dataset.py
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
import pandas as pd
|
||||||
|
import numpy as np
|
||||||
|
from sklearn.model_selection import train_test_split
|
||||||
|
|
||||||
|
# usuwamy przy okazji puste pola
|
||||||
|
lego = pd.read_csv('lego_sets.csv', encoding='utf-8').dropna()
|
||||||
|
|
||||||
|
# list_price moze byc do dwoch miejsc po przecinku
|
||||||
|
lego['list_price'] = lego['list_price'].round(2)
|
||||||
|
|
||||||
|
# num_reviews, piece_count i prod_id moga byc wartosciami calkowitymi
|
||||||
|
lego['num_reviews'] = lego['num_reviews'].apply(np.int64)
|
||||||
|
lego['piece_count'] = lego['piece_count'].apply(np.int64)
|
||||||
|
lego['prod_id'] = lego['prod_id'].apply(np.int64)
|
||||||
|
|
||||||
|
# wglad, statystyki
|
||||||
|
print(lego)
|
||||||
|
print(lego.describe(include='all'))
|
||||||
|
|
||||||
|
# pierwszy podzial, wydzielamy zbior treningowy
|
||||||
|
lego_train, lego_rem = train_test_split(lego, train_size=0.8)
|
||||||
|
|
||||||
|
# drugi podział, wydzielamy walidacyjny i testowy
|
||||||
|
lego_valid, lego_test = train_test_split(lego_rem, test_size=0.5)
|
||||||
|
|
||||||
|
# zapis
|
||||||
|
lego.to_csv('lego_sets_clean.csv', index=None, header=True)
|
||||||
|
lego_train.to_csv('lego_sets_clean_train.csv', index=None, header=True)
|
||||||
|
lego_valid.to_csv('lego_sets_clean_valid.csv', index=None, header=True)
|
||||||
|
lego_test.to_csv('lego_sets_clean_test.csv', index=None, header=True)
|
69
lab6/simple_regression.py
Normal file
69
lab6/simple_regression.py
Normal file
@ -0,0 +1,69 @@
|
|||||||
|
import tensorflow as tf
|
||||||
|
from keras import layers
|
||||||
|
from keras.models import save_model
|
||||||
|
import pandas as pd
|
||||||
|
import numpy as np
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
import sys
|
||||||
|
|
||||||
|
# Pobranie przykładowego argumentu trenowania
|
||||||
|
EPOCHS_NUM = int(sys.argv[1])
|
||||||
|
|
||||||
|
# Wczytanie danych
|
||||||
|
data_train = pd.read_csv('lego_sets_clean_train.csv')
|
||||||
|
data_test = pd.read_csv('lego_sets_clean_test.csv')
|
||||||
|
|
||||||
|
# Wydzielenie zbiorów dla predykcji ceny zestawu na podstawie liczby klocków, którą zawiera
|
||||||
|
train_piece_counts = np.array(data_train['piece_count'])
|
||||||
|
train_prices = np.array(data_train['list_price'])
|
||||||
|
test_piece_counts = np.array(data_test['piece_count'])
|
||||||
|
test_prices = np.array(data_test['list_price'])
|
||||||
|
|
||||||
|
# Normalizacja
|
||||||
|
normalizer = layers.Normalization(input_shape=[1, ], axis=None)
|
||||||
|
normalizer.adapt(train_piece_counts)
|
||||||
|
|
||||||
|
# Inicjalizacja
|
||||||
|
model = tf.keras.Sequential([
|
||||||
|
normalizer,
|
||||||
|
layers.Dense(units=1)
|
||||||
|
])
|
||||||
|
|
||||||
|
# Kompilacja
|
||||||
|
model.compile(
|
||||||
|
optimizer=tf.optimizers.Adam(learning_rate=0.1),
|
||||||
|
loss='mean_absolute_error'
|
||||||
|
)
|
||||||
|
|
||||||
|
# Trening
|
||||||
|
history = model.fit(
|
||||||
|
train_piece_counts,
|
||||||
|
train_prices,
|
||||||
|
epochs=EPOCHS_NUM,
|
||||||
|
verbose=0,
|
||||||
|
validation_split=0.2
|
||||||
|
)
|
||||||
|
|
||||||
|
# Wykonanie predykcji na danych ze zbioru testującego
|
||||||
|
y_pred = model.predict(test_piece_counts)
|
||||||
|
|
||||||
|
# Zapis predykcji do pliku
|
||||||
|
results = pd.DataFrame({'test_set_piece_count': test_piece_counts.tolist(), 'predicted_price': [round(a[0], 2) for a in y_pred.tolist()]})
|
||||||
|
results.to_csv('lego_reg_results.csv', index=False, header=True)
|
||||||
|
|
||||||
|
# Zapis modelu do pliku
|
||||||
|
model.save('lego_reg_model')
|
||||||
|
|
||||||
|
# Opcjonalne statystyki, wykresy
|
||||||
|
'''
|
||||||
|
hist = pd.DataFrame(history.history)
|
||||||
|
hist['epoch'] = history.epoch
|
||||||
|
print(hist.tail())
|
||||||
|
|
||||||
|
plt.scatter(train_piece_counts, train_prices, label='Data')
|
||||||
|
plt.plot(x, y_pred, color='k', label='Predictions')
|
||||||
|
plt.xlabel('pieces')
|
||||||
|
plt.ylabel('price')
|
||||||
|
plt.legend()
|
||||||
|
plt.show()
|
||||||
|
'''
|
@ -16,28 +16,28 @@ pipeline {
|
|||||||
stage('Stage 1') {
|
stage('Stage 1') {
|
||||||
steps {
|
steps {
|
||||||
git branch: "${params.BRANCH}", url: 'https://git.wmi.amu.edu.pl/s449288/ium_s449288.git'
|
git branch: "${params.BRANCH}", url: 'https://git.wmi.amu.edu.pl/s449288/ium_s449288.git'
|
||||||
sh 'chmod u+x ./evaluate.py'
|
sh 'chmod u+x ./lab7/evaluate.py'
|
||||||
echo 'Copying datasets from the create-dataset job...'
|
echo 'Copying datasets from the create-dataset job...'
|
||||||
copyArtifacts filter: 'lego_sets_clean_test.csv', projectName: 's449288-create-dataset'
|
copyArtifacts filter: 'lab7/lego_sets_clean_test.csv', projectName: 's449288-create-dataset'
|
||||||
echo 'Datasets copied'
|
echo 'Datasets copied'
|
||||||
echo 'Copying model from the training job...'
|
echo 'Copying model from the training job...'
|
||||||
copyArtifacts filter: 'lego_reg_model.tar.gz', projectName: "s449288-training/${BRANCH}/", selector: buildParameter('BUILD_SELECTOR')
|
copyArtifacts filter: 'lab7/lego_reg_model.tar.gz', projectName: "s449288-training/${BRANCH}/", selector: buildParameter('BUILD_SELECTOR')
|
||||||
echo 'Model copied'
|
echo 'Model copied'
|
||||||
sh 'tar xvzf lego_reg_model.tar.gz'
|
sh 'tar xvzf lab7/lego_reg_model.tar.gz'
|
||||||
echo 'Optional copying of the metrics file from previous build...'
|
echo 'Optional copying of the metrics file from previous build...'
|
||||||
copyArtifacts filter: 'eval_results.txt', projectName: 's449288-evaluation/master/', optional: true
|
copyArtifacts filter: 'lab7/eval_results.txt', projectName: 's449288-evaluation/master/', optional: true
|
||||||
echo 'Metrics file copied if it did not exist'
|
echo 'Metrics file copied if it did not exist'
|
||||||
echo 'Evaluating model...'
|
echo 'Evaluating model...'
|
||||||
sh 'python3 evaluate.py'
|
sh 'python3 lab7/evaluate.py'
|
||||||
echo 'Model evaluated. Metrics saved. Plot saved.'
|
echo 'Model evaluated. Metrics saved. Plot saved.'
|
||||||
sh 'head eval_results.txt'
|
sh 'head lab7/eval_results.txt'
|
||||||
sh 'file error_plot.jpg'
|
sh 'file lab7/error_plot.jpg'
|
||||||
echo 'Archiving metrics file...'
|
echo 'Archiving metrics file...'
|
||||||
archiveArtifacts 'eval_results.txt'
|
archiveArtifacts 'lab7/eval_results.txt'
|
||||||
echo 'File archived'
|
echo 'File archived'
|
||||||
script {
|
script {
|
||||||
LAST_MAE = sh (
|
LAST_MAE = sh (
|
||||||
script: 'tail -1 eval_results.txt',
|
script: 'tail -1 lab7/eval_results.txt',
|
||||||
returnStdout: true
|
returnStdout: true
|
||||||
).trim()
|
).trim()
|
||||||
}
|
}
|
||||||
|
@ -7,23 +7,23 @@ pipeline {
|
|||||||
stages {
|
stages {
|
||||||
stage('Stage 1') {
|
stage('Stage 1') {
|
||||||
steps {
|
steps {
|
||||||
sh 'chmod u+x ./simple_regression_lab7.py'
|
sh 'chmod u+x ./lab7/simple_regression_lab7.py'
|
||||||
echo 'Copying datasets from create-dataset...'
|
echo 'Copying datasets from create-dataset...'
|
||||||
copyArtifacts filter: '*', projectName: 's449288-create-dataset'
|
copyArtifacts filter: '*', projectName: 's449288-create-dataset'
|
||||||
echo 'Datasets copied'
|
echo 'Datasets copied'
|
||||||
echo 'Conducting simple regression model test'
|
echo 'Conducting simple regression model test'
|
||||||
sh 'python3 simple_regression_lab7.py'
|
sh 'python3 lab7/simple_regression_lab7.py'
|
||||||
echo 'Model and predictions saved'
|
echo 'Model and predictions saved'
|
||||||
sh 'head lego_reg_results.csv'
|
sh 'head lab7/lego_reg_results.csv'
|
||||||
echo 'Archiving model...'
|
echo 'Archiving model...'
|
||||||
sh 'ls -lh lego_reg_model'
|
sh 'ls -lh lab7/lego_reg_model'
|
||||||
sh 'tar -czf lego_reg_model.tar.gz lego_reg_model/'
|
sh 'tar -czf lab7/lego_reg_model.tar.gz lab7/lego_reg_model/'
|
||||||
archiveArtifacts 'lego_reg_model.tar.gz'
|
archiveArtifacts 'lab7/lego_reg_model.tar.gz'
|
||||||
echo 'Model archived'
|
echo 'Model archived'
|
||||||
echo 'Archiving Sacreds output repo...'
|
echo 'Archiving Sacreds output repo...'
|
||||||
sh 'ls -lh runs/*/'
|
sh 'ls -lh lab7/runs/*/'
|
||||||
sh 'tar -czf sacred_runs.tar.gz runs/'
|
sh 'tar -czf lab7/sacred_runs.tar.gz lab7/runs/'
|
||||||
archiveArtifacts 'sacred_runs.tar.gz'
|
archiveArtifacts 'lab7/sacred_runs.tar.gz'
|
||||||
echo 'Sacreds repo archived'
|
echo 'Sacreds repo archived'
|
||||||
echo 'Launching the s449288-evaluation job...'
|
echo 'Launching the s449288-evaluation job...'
|
||||||
build job: 's449288-evaluation/master/'
|
build job: 's449288-evaluation/master/'
|
||||||
|
37
lab7/evaluate.py
Normal file
37
lab7/evaluate.py
Normal file
@ -0,0 +1,37 @@
|
|||||||
|
import tensorflow as tf
|
||||||
|
from tensorflow import keras
|
||||||
|
from matplotlib import pyplot as plt
|
||||||
|
from matplotlib.ticker import MaxNLocator
|
||||||
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
# Załadowanie modelu z pliku
|
||||||
|
model = keras.models.load_model('lego_reg_model')
|
||||||
|
|
||||||
|
# Załadowanie zbioru testowego
|
||||||
|
data_test = pd.read_csv('lego_sets_clean_test.csv')
|
||||||
|
test_piece_counts = np.array(data_test['piece_count'])
|
||||||
|
test_prices = np.array(data_test['list_price'])
|
||||||
|
|
||||||
|
# Prosta ewaluacja (mean absolute error)
|
||||||
|
test_results = model.evaluate(
|
||||||
|
test_piece_counts,
|
||||||
|
test_prices, verbose=0)
|
||||||
|
|
||||||
|
# Zapis wartości liczbowej metryki do pliku
|
||||||
|
with open('eval_results.txt', 'a+') as f:
|
||||||
|
f.write(str(test_results) + '\n')
|
||||||
|
|
||||||
|
# Wygenerowanie i zapisanie do pliku wykresu
|
||||||
|
with open('eval_results.txt') as f:
|
||||||
|
scores = [float(line) for line in f if line]
|
||||||
|
builds = list(range(1, len(scores) + 1))
|
||||||
|
|
||||||
|
plot = plt.plot(builds, scores)
|
||||||
|
plt.xlabel('Build number')
|
||||||
|
plt.xticks(range(1, len(scores) + 1))
|
||||||
|
plt.ylabel('Mean absolute error')
|
||||||
|
plt.title('Model error by build')
|
||||||
|
plt.savefig('error_plot.jpg')
|
||||||
|
plt.show()
|
||||||
|
|
30
lab7/process_dataset.py
Normal file
30
lab7/process_dataset.py
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
import pandas as pd
|
||||||
|
import numpy as np
|
||||||
|
from sklearn.model_selection import train_test_split
|
||||||
|
|
||||||
|
# usuwamy przy okazji puste pola
|
||||||
|
lego = pd.read_csv('lego_sets.csv', encoding='utf-8').dropna()
|
||||||
|
|
||||||
|
# list_price moze byc do dwoch miejsc po przecinku
|
||||||
|
lego['list_price'] = lego['list_price'].round(2)
|
||||||
|
|
||||||
|
# num_reviews, piece_count i prod_id moga byc wartosciami calkowitymi
|
||||||
|
lego['num_reviews'] = lego['num_reviews'].apply(np.int64)
|
||||||
|
lego['piece_count'] = lego['piece_count'].apply(np.int64)
|
||||||
|
lego['prod_id'] = lego['prod_id'].apply(np.int64)
|
||||||
|
|
||||||
|
# wglad, statystyki
|
||||||
|
print(lego)
|
||||||
|
print(lego.describe(include='all'))
|
||||||
|
|
||||||
|
# pierwszy podzial, wydzielamy zbior treningowy
|
||||||
|
lego_train, lego_rem = train_test_split(lego, train_size=0.8)
|
||||||
|
|
||||||
|
# drugi podział, wydzielamy walidacyjny i testowy
|
||||||
|
lego_valid, lego_test = train_test_split(lego_rem, test_size=0.5)
|
||||||
|
|
||||||
|
# zapis
|
||||||
|
lego.to_csv('lego_sets_clean.csv', index=None, header=True)
|
||||||
|
lego_train.to_csv('lego_sets_clean_train.csv', index=None, header=True)
|
||||||
|
lego_valid.to_csv('lego_sets_clean_valid.csv', index=None, header=True)
|
||||||
|
lego_test.to_csv('lego_sets_clean_test.csv', index=None, header=True)
|
86
lab7/simple_regression_lab7.py
Normal file
86
lab7/simple_regression_lab7.py
Normal file
@ -0,0 +1,86 @@
|
|||||||
|
import tensorflow as tf
|
||||||
|
from keras import layers
|
||||||
|
from keras.models import save_model
|
||||||
|
import pandas as pd
|
||||||
|
import numpy as np
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
from sacred import Experiment
|
||||||
|
from sacred.observers import FileStorageObserver
|
||||||
|
from sacred.observers import MongoObserver
|
||||||
|
|
||||||
|
# Stworzenie obiektu klasy Experiment do śledzenia przebiegu regresji narzędziem Sacred
|
||||||
|
ex = Experiment(save_git_info=False)
|
||||||
|
|
||||||
|
# Dodanie obserwatora FileObserver
|
||||||
|
ex.observers.append(FileStorageObserver('runs'))
|
||||||
|
|
||||||
|
#Dodanie obserwatora Mongo
|
||||||
|
ex.observers.append(MongoObserver(url='mongodb://admin:IUM_2021@172.17.0.1:27017', db_name='sacred'))
|
||||||
|
|
||||||
|
# Przykładowa modyfikowalna z Sacred konfiguracja wybranych parametrów treningu
|
||||||
|
@ex.config
|
||||||
|
def config():
|
||||||
|
epochs = 100
|
||||||
|
units = 1
|
||||||
|
learning_rate = 0.1
|
||||||
|
|
||||||
|
|
||||||
|
# Reszta kodu wrzucona do udekorowanej funkcji train do wywołania przez Sacred, żeby coś było capture'owane
|
||||||
|
@ex.capture
|
||||||
|
def train(epochs, units, learning_rate, _run):
|
||||||
|
|
||||||
|
# Wczytanie danych
|
||||||
|
data_train = pd.read_csv('lego_sets_clean_train.csv')
|
||||||
|
data_test = pd.read_csv('lego_sets_clean_test.csv')
|
||||||
|
|
||||||
|
# Wydzielenie zbiorów dla predykcji ceny zestawu na podstawie liczby klocków, którą zawiera
|
||||||
|
train_piece_counts = np.array(data_train['piece_count'])
|
||||||
|
train_prices = np.array(data_train['list_price'])
|
||||||
|
test_piece_counts = np.array(data_test['piece_count'])
|
||||||
|
test_prices = np.array(data_test['list_price'])
|
||||||
|
|
||||||
|
# Normalizacja
|
||||||
|
normalizer = layers.Normalization(input_shape=[1, ], axis=None)
|
||||||
|
normalizer.adapt(train_piece_counts)
|
||||||
|
|
||||||
|
# Inicjalizacja
|
||||||
|
model = tf.keras.Sequential([
|
||||||
|
normalizer,
|
||||||
|
layers.Dense(units=units)
|
||||||
|
])
|
||||||
|
|
||||||
|
# Kompilacja
|
||||||
|
model.compile(
|
||||||
|
optimizer=tf.optimizers.Adam(learning_rate=learning_rate),
|
||||||
|
loss='mean_absolute_error'
|
||||||
|
)
|
||||||
|
|
||||||
|
# Trening
|
||||||
|
history = model.fit(
|
||||||
|
train_piece_counts,
|
||||||
|
train_prices,
|
||||||
|
epochs=epochs,
|
||||||
|
verbose=0,
|
||||||
|
validation_split=0.2
|
||||||
|
)
|
||||||
|
|
||||||
|
# Wykonanie predykcji na danych ze zbioru testującego
|
||||||
|
y_pred = model.predict(test_piece_counts)
|
||||||
|
|
||||||
|
# Zapis predykcji do pliku
|
||||||
|
results = pd.DataFrame(
|
||||||
|
{'test_set_piece_count': test_piece_counts.tolist(), 'predicted_price': [round(a[0], 2) for a in y_pred.tolist()]})
|
||||||
|
results.to_csv('lego_reg_results.csv', index=False, header=True)
|
||||||
|
|
||||||
|
# Zapis modelu do pliku standardowo poprzez metodę kerasa i poprzez metodę obiektu Experiment z Sacred
|
||||||
|
model.save('lego_reg_model')
|
||||||
|
ex.add_artifact('lego_reg_model/saved_model.pb')
|
||||||
|
|
||||||
|
# Przykładowo zwracamy loss ostatniej epoki w charakterze wyników, żeby było widoczne w plikach zapisanych przez obserwator
|
||||||
|
hist = pd.DataFrame(history.history)
|
||||||
|
hist['epoch'] = history.epoch
|
||||||
|
_run.log_scalar('final.training.loss', hist['loss'].iloc[-1])
|
||||||
|
|
||||||
|
@ex.automain
|
||||||
|
def main(units, learning_rate):
|
||||||
|
train()
|
30
lab8/process_dataset.py
Normal file
30
lab8/process_dataset.py
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
import pandas as pd
|
||||||
|
import numpy as np
|
||||||
|
from sklearn.model_selection import train_test_split
|
||||||
|
|
||||||
|
# usuwamy przy okazji puste pola
|
||||||
|
lego = pd.read_csv('lego_sets.csv', encoding='utf-8').dropna()
|
||||||
|
|
||||||
|
# list_price moze byc do dwoch miejsc po przecinku
|
||||||
|
lego['list_price'] = lego['list_price'].round(2)
|
||||||
|
|
||||||
|
# num_reviews, piece_count i prod_id moga byc wartosciami calkowitymi
|
||||||
|
lego['num_reviews'] = lego['num_reviews'].apply(np.int64)
|
||||||
|
lego['piece_count'] = lego['piece_count'].apply(np.int64)
|
||||||
|
lego['prod_id'] = lego['prod_id'].apply(np.int64)
|
||||||
|
|
||||||
|
# wglad, statystyki
|
||||||
|
print(lego)
|
||||||
|
print(lego.describe(include='all'))
|
||||||
|
|
||||||
|
# pierwszy podzial, wydzielamy zbior treningowy
|
||||||
|
lego_train, lego_rem = train_test_split(lego, train_size=0.8)
|
||||||
|
|
||||||
|
# drugi podział, wydzielamy walidacyjny i testowy
|
||||||
|
lego_valid, lego_test = train_test_split(lego_rem, test_size=0.5)
|
||||||
|
|
||||||
|
# zapis
|
||||||
|
lego.to_csv('lego_sets_clean.csv', index=None, header=True)
|
||||||
|
lego_train.to_csv('lego_sets_clean_train.csv', index=None, header=True)
|
||||||
|
lego_valid.to_csv('lego_sets_clean_valid.csv', index=None, header=True)
|
||||||
|
lego_test.to_csv('lego_sets_clean_test.csv', index=None, header=True)
|
118
lab8/simple_regression_lab8.py
Normal file
118
lab8/simple_regression_lab8.py
Normal file
@ -0,0 +1,118 @@
|
|||||||
|
import tensorflow as tf
|
||||||
|
from keras import layers
|
||||||
|
from keras.models import save_model
|
||||||
|
import pandas as pd
|
||||||
|
import numpy as np
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
from sacred import Experiment
|
||||||
|
from sacred.observers import FileStorageObserver
|
||||||
|
from sacred.observers import MongoObserver
|
||||||
|
import mlflow
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
|
# Konfiguracja serwera i nazwy eksperymentu MLflow
|
||||||
|
mlflow.set_tracking_uri('http://tzietkiewicz.vm.wmi.amu.edu.pl:5000/#/')
|
||||||
|
mlflow.set_experiment('s449288')
|
||||||
|
|
||||||
|
# Stworzenie obiektu klasy Experiment do śledzenia przebiegu regresji narzędziem Sacred
|
||||||
|
ex = Experiment(save_git_info=False)
|
||||||
|
|
||||||
|
# Dodanie obserwatora FileObserver
|
||||||
|
ex.observers.append(FileStorageObserver('runs'))
|
||||||
|
|
||||||
|
#Dodanie obserwatora Mongo
|
||||||
|
ex.observers.append(MongoObserver(url='mongodb://admin:IUM_2021@172.17.0.1:27017', db_name='sacred'))
|
||||||
|
|
||||||
|
# Przykładowa modyfikowalna z Sacred konfiguracja wybranych parametrów treningu
|
||||||
|
@ex.config
|
||||||
|
def config():
|
||||||
|
epochs = 100
|
||||||
|
units = 1
|
||||||
|
learning_rate = 0.1
|
||||||
|
|
||||||
|
|
||||||
|
# Reszta kodu wrzucona do udekorowanej funkcji train do wywołania przez Sacred, żeby coś było capture'owane
|
||||||
|
@ex.capture
|
||||||
|
def train(epochs, units, learning_rate, _run):
|
||||||
|
|
||||||
|
# Podpięcie treningu do MLflow
|
||||||
|
with mlflow.start_run() as run:
|
||||||
|
print('MLflow run experiment_id: {0}'.format(run.info.experiment_id))
|
||||||
|
print('MLflow run artifact_uri: {0}'.format(run.info.artifact_uri))
|
||||||
|
|
||||||
|
# Wczytanie danych
|
||||||
|
data_train = pd.read_csv('lego_sets_clean_train.csv')
|
||||||
|
data_test = pd.read_csv('lego_sets_clean_test.csv')
|
||||||
|
|
||||||
|
# Wydzielenie zbiorów dla predykcji ceny zestawu na podstawie liczby klocków, którą zawiera
|
||||||
|
train_piece_counts = np.array(data_train['piece_count'])
|
||||||
|
train_prices = np.array(data_train['list_price'])
|
||||||
|
test_piece_counts = np.array(data_test['piece_count'])
|
||||||
|
test_prices = np.array(data_test['list_price'])
|
||||||
|
|
||||||
|
# Normalizacja
|
||||||
|
normalizer = layers.Normalization(input_shape=[1, ], axis=None)
|
||||||
|
normalizer.adapt(train_piece_counts)
|
||||||
|
|
||||||
|
# Inicjalizacja
|
||||||
|
model = tf.keras.Sequential([
|
||||||
|
normalizer,
|
||||||
|
layers.Dense(units=units)
|
||||||
|
])
|
||||||
|
|
||||||
|
# Kompilacja
|
||||||
|
model.compile(
|
||||||
|
optimizer=tf.optimizers.Adam(learning_rate=learning_rate),
|
||||||
|
loss='mean_absolute_error'
|
||||||
|
)
|
||||||
|
|
||||||
|
# Trening
|
||||||
|
history = model.fit(
|
||||||
|
train_piece_counts,
|
||||||
|
train_prices,
|
||||||
|
epochs=epochs,
|
||||||
|
verbose=0,
|
||||||
|
validation_split=0.2
|
||||||
|
)
|
||||||
|
|
||||||
|
# Wykonanie predykcji na danych ze zbioru testującego
|
||||||
|
y_pred = model.predict(test_piece_counts)
|
||||||
|
|
||||||
|
# Zapis predykcji do pliku
|
||||||
|
results = pd.DataFrame(
|
||||||
|
{'test_set_piece_count': test_piece_counts.tolist(), 'predicted_price': [round(a[0], 2) for a in y_pred.tolist()]})
|
||||||
|
results.to_csv('lego_reg_results.csv', index=False, header=True)
|
||||||
|
|
||||||
|
# Zapis modelu do pliku standardowo poprzez metodę kerasa i poprzez metodę obiektu Experiment z Sacred
|
||||||
|
model.save('lego_reg_model')
|
||||||
|
ex.add_artifact('lego_reg_model/saved_model.pb')
|
||||||
|
|
||||||
|
# Przykładowo zwracamy loss ostatniej epoki w charakterze wyników, żeby było widoczne w plikach zapisanych przez obserwator
|
||||||
|
hist = pd.DataFrame(history.history)
|
||||||
|
hist['epoch'] = history.epoch
|
||||||
|
_run.log_scalar('final.training.loss', hist['loss'].iloc[-1])
|
||||||
|
|
||||||
|
# Ewaluacja MAE na potrzeby MLflow (kopia z evaluate.py)
|
||||||
|
mae = model.evaluate(
|
||||||
|
test_piece_counts,
|
||||||
|
test_prices, verbose=0)
|
||||||
|
|
||||||
|
# Zapis parametrów i metryk dla MLflow
|
||||||
|
mlflow.log_param('epochs', epochs)
|
||||||
|
mlflow.log_param('units', units)
|
||||||
|
mlflow.log_param('learning_rate', learning_rate)
|
||||||
|
mlflow.log_metric("mae", mae)
|
||||||
|
|
||||||
|
# Logowanie i zapis modelu dla Mlflow
|
||||||
|
signature = mlflow.models.signature.infer_signature(train_piece_counts, model.predict(train_piece_counts))
|
||||||
|
tracking_url_type_store = urlparse(mlflow.get_tracking_uri()).scheme
|
||||||
|
if tracking_url_type_store != 'file':
|
||||||
|
mlflow.keras.log_model(model, 'lego-model', registered_model_name='TFLegoModel',
|
||||||
|
signature=signature)
|
||||||
|
else:
|
||||||
|
mlflow.keras.log_model(model, 'model', signature=signature, input_example=500)
|
||||||
|
|
||||||
|
|
||||||
|
@ex.automain
|
||||||
|
def main(epochs, units, learning_rate):
|
||||||
|
train()
|
Loading…
Reference in New Issue
Block a user