split everything into per-lab repos
Some checks failed
s449288-evaluation/pipeline/head There was a failure building this commit
s449288-training/pipeline/head There was a failure building this commit

This commit is contained in:
Kacper 2022-05-12 22:30:38 +02:00
parent 42b20cfa84
commit 8e3e206cca
17 changed files with 587 additions and 51 deletions

10
lab5/Jenkinsfile vendored
View File

@ -12,15 +12,15 @@ pipeline {
stages {
stage('Stage 1') {
steps {
sh 'chmod u+x ./process_dataset.py ./simple_regression.py'
sh 'chmod u+x ./lab5/process_dataset.py ./lab5/simple_regression.py'
echo 'Processing dataset...'
sh 'python3 process_dataset.py'
sh 'python3 lab5/process_dataset.py'
echo 'Dataset processed'
echo 'Conducting simple regression model test'
sh 'python3 simple_regression.py $EPOCHS_NUM'
sh 'python3 lab5/simple_regression.py $EPOCHS_NUM'
echo 'Model predictions saved'
sh 'head lego_reg_results.csv'
sh 'head lab5/lego_reg_results.csv'
}
}
}
}
}

View File

@ -5,12 +5,12 @@ pipeline {
stages {
stage('Stage 1') {
steps {
sh 'chmod u+x ./process_dataset.py ./simple_regression.py'
sh 'chmod u+x ./lab5/process_dataset.py ./lab5/simple_regression.py'
echo 'Processing dataset...'
sh 'python3 process_dataset.py'
sh 'python3 lab5/process_dataset.py'
echo 'Dataset processed'
echo 'Conducting simple regression model test'
sh 'python3 simple_regression.py'
sh 'python3 lab5/simple_regression.py'
echo 'Model predictions saved'
}
}

30
lab5/process_dataset.py Normal file
View File

@ -0,0 +1,30 @@
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
# usuwamy przy okazji puste pola
lego = pd.read_csv('lego_sets.csv', encoding='utf-8').dropna()
# list_price moze byc do dwoch miejsc po przecinku
lego['list_price'] = lego['list_price'].round(2)
# num_reviews, piece_count i prod_id moga byc wartosciami calkowitymi
lego['num_reviews'] = lego['num_reviews'].apply(np.int64)
lego['piece_count'] = lego['piece_count'].apply(np.int64)
lego['prod_id'] = lego['prod_id'].apply(np.int64)
# wglad, statystyki
print(lego)
print(lego.describe(include='all'))
# pierwszy podzial, wydzielamy zbior treningowy
lego_train, lego_rem = train_test_split(lego, train_size=0.8)
# drugi podział, wydzielamy walidacyjny i testowy
lego_valid, lego_test = train_test_split(lego_rem, test_size=0.5)
# zapis
lego.to_csv('lego_sets_clean.csv', index=None, header=True)
lego_train.to_csv('lego_sets_clean_train.csv', index=None, header=True)
lego_valid.to_csv('lego_sets_clean_valid.csv', index=None, header=True)
lego_test.to_csv('lego_sets_clean_test.csv', index=None, header=True)

69
lab5/simple_regression.py Normal file
View File

@ -0,0 +1,69 @@
import tensorflow as tf
from keras import layers
from keras.models import save_model
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import sys
# Pobranie przykładowego argumentu trenowania
EPOCHS_NUM = int(sys.argv[1])
# Wczytanie danych
data_train = pd.read_csv('lego_sets_clean_train.csv')
data_test = pd.read_csv('lego_sets_clean_test.csv')
# Wydzielenie zbiorów dla predykcji ceny zestawu na podstawie liczby klocków, którą zawiera
train_piece_counts = np.array(data_train['piece_count'])
train_prices = np.array(data_train['list_price'])
test_piece_counts = np.array(data_test['piece_count'])
test_prices = np.array(data_test['list_price'])
# Normalizacja
normalizer = layers.Normalization(input_shape=[1, ], axis=None)
normalizer.adapt(train_piece_counts)
# Inicjalizacja
model = tf.keras.Sequential([
normalizer,
layers.Dense(units=1)
])
# Kompilacja
model.compile(
optimizer=tf.optimizers.Adam(learning_rate=0.1),
loss='mean_absolute_error'
)
# Trening
history = model.fit(
train_piece_counts,
train_prices,
epochs=EPOCHS_NUM,
verbose=0,
validation_split=0.2
)
# Wykonanie predykcji na danych ze zbioru testującego
y_pred = model.predict(test_piece_counts)
# Zapis predykcji do pliku
results = pd.DataFrame({'test_set_piece_count': test_piece_counts.tolist(), 'predicted_price': [round(a[0], 2) for a in y_pred.tolist()]})
results.to_csv('lego_reg_results.csv', index=False, header=True)
# Zapis modelu do pliku
model.save('lego_reg_model')
# Opcjonalne statystyki, wykresy
'''
hist = pd.DataFrame(history.history)
hist['epoch'] = history.epoch
print(hist.tail())
plt.scatter(train_piece_counts, train_prices, label='Data')
plt.plot(x, y_pred, color='k', label='Predictions')
plt.xlabel('pieces')
plt.ylabel('price')
plt.legend()
plt.show()
'''

View File

@ -5,16 +5,16 @@ pipeline {
stages {
stage('Stage 1') {
steps {
sh 'chmod u+x ./process_dataset.py'
sh 'chmod u+x ./lab6/process_dataset.py'
echo 'Processing dataset...'
sh 'python3 process_dataset.py'
sh 'python3 lab6/process_dataset.py'
echo 'Dataset processed'
echo 'Archiving clean train and test datasets...'
archiveArtifacts artifacts: 'lego_sets_clean_train.csv, lego_sets_clean_test.csv'
archiveArtifacts artifacts: 'lab6/lego_sets_clean_train.csv, lab6/lego_sets_clean_test.csv'
echo 'Datasets archived'
echo 'Launching the s449288-training job...'
build job: 's449288-training/master/'
}
}
}
}
}

View File

@ -16,28 +16,28 @@ pipeline {
stage('Stage 1') {
steps {
git branch: "${params.BRANCH}", url: 'https://git.wmi.amu.edu.pl/s449288/ium_s449288.git'
sh 'chmod u+x ./evaluate.py'
sh 'chmod u+x ./lab6/evaluate.py'
echo 'Copying datasets from the create-dataset job...'
copyArtifacts filter: 'lego_sets_clean_test.csv', projectName: 's449288-create-dataset'
copyArtifacts filter: 'lab6/lego_sets_clean_test.csv', projectName: 's449288-create-dataset'
echo 'Datasets copied'
echo 'Copying model from the training job...'
copyArtifacts filter: 'lego_reg_model.tar.gz', projectName: "s449288-training/${BRANCH}/", selector: buildParameter('BUILD_SELECTOR')
copyArtifacts filter: 'lab6/lego_reg_model.tar.gz', projectName: "s449288-training/${BRANCH}/", selector: buildParameter('BUILD_SELECTOR')
echo 'Model copied'
sh 'tar xvzf lego_reg_model.tar.gz'
sh 'tar xvzf lab6/lego_reg_model.tar.gz'
echo 'Optional copying of the metrics file from previous build...'
copyArtifacts filter: 'eval_results.txt', projectName: 's449288-evaluation/master/', optional: true
copyArtifacts filter: 'lab6/eval_results.txt', projectName: 's449288-evaluation/master/', optional: true
echo 'Metrics file copied if it did not exist'
echo 'Evaluating model...'
sh 'python3 evaluate.py'
sh 'python3 lab6/evaluate.py'
echo 'Model evaluated. Metrics saved. Plot saved.'
sh 'head eval_results.txt'
sh 'file error_plot.jpg'
sh 'head lab6/eval_results.txt'
sh 'file lab6/error_plot.jpg'
echo 'Archiving metrics file...'
archiveArtifacts 'eval_results.txt'
archiveArtifacts 'lab6/eval_results.txt'
echo 'File archived'
script {
LAST_MAE = sh (
script: 'tail -1 eval_results.txt',
script: 'tail -1 lab6/eval_results.txt',
returnStdout: true
).trim()
}
@ -58,4 +58,4 @@ pipeline {
emailext body: "CHANGED - ${LAST_MAE} MAE", subject: 's449288-evaluation build status', to: 'e19191c5.uam.onmicrosoft.com@emea.teams.ms'
}
}
}
}

View File

@ -14,18 +14,18 @@ pipeline {
stages {
stage('Stage 1') {
steps {
sh 'chmod u+x ./simple_regression.py'
sh 'chmod u+x ./lab6/simple_regression.py'
echo 'Copying datasets from create-dataset...'
copyArtifacts filter: '*', projectName: 's449288-create-dataset'
echo 'Datasets copied'
echo 'Conducting simple regression model test'
sh 'python3 simple_regression.py $EPOCHS_NUM'
sh 'python3 lab6/simple_regression.py $EPOCHS_NUM'
echo 'Model and predictions saved'
sh 'head lego_reg_results.csv'
sh 'ls -lh lego_reg_model'
sh 'head lab6/lego_reg_results.csv'
sh 'ls -lh lab6/lego_reg_model'
echo 'Archiving model...'
sh 'tar -czf lego_reg_model.tar.gz lego_reg_model/'
archiveArtifacts 'lego_reg_model.tar.gz'
sh 'tar -czf lab6/lego_reg_model.tar.gz lab6/lego_reg_model/'
archiveArtifacts 'lab6/lego_reg_model.tar.gz'
echo 'Model archived'
echo 'Launching the s449288-evaluation job...'
build job: 's449288-evaluation/master/'
@ -46,4 +46,4 @@ pipeline {
emailext body: 'CHANGED', subject: 's449288-training build status', to: 'e19191c5.uam.onmicrosoft.com@emea.teams.ms'
}
}
}
}

37
lab6/evaluate.py Normal file
View File

@ -0,0 +1,37 @@
import tensorflow as tf
from tensorflow import keras
from matplotlib import pyplot as plt
from matplotlib.ticker import MaxNLocator
import numpy as np
import pandas as pd
# Załadowanie modelu z pliku
model = keras.models.load_model('lego_reg_model')
# Załadowanie zbioru testowego
data_test = pd.read_csv('lego_sets_clean_test.csv')
test_piece_counts = np.array(data_test['piece_count'])
test_prices = np.array(data_test['list_price'])
# Prosta ewaluacja (mean absolute error)
test_results = model.evaluate(
test_piece_counts,
test_prices, verbose=0)
# Zapis wartości liczbowej metryki do pliku
with open('eval_results.txt', 'a+') as f:
f.write(str(test_results) + '\n')
# Wygenerowanie i zapisanie do pliku wykresu
with open('eval_results.txt') as f:
scores = [float(line) for line in f if line]
builds = list(range(1, len(scores) + 1))
plot = plt.plot(builds, scores)
plt.xlabel('Build number')
plt.xticks(range(1, len(scores) + 1))
plt.ylabel('Mean absolute error')
plt.title('Model error by build')
plt.savefig('error_plot.jpg')
plt.show()

30
lab6/process_dataset.py Normal file
View File

@ -0,0 +1,30 @@
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
# usuwamy przy okazji puste pola
lego = pd.read_csv('lego_sets.csv', encoding='utf-8').dropna()
# list_price moze byc do dwoch miejsc po przecinku
lego['list_price'] = lego['list_price'].round(2)
# num_reviews, piece_count i prod_id moga byc wartosciami calkowitymi
lego['num_reviews'] = lego['num_reviews'].apply(np.int64)
lego['piece_count'] = lego['piece_count'].apply(np.int64)
lego['prod_id'] = lego['prod_id'].apply(np.int64)
# wglad, statystyki
print(lego)
print(lego.describe(include='all'))
# pierwszy podzial, wydzielamy zbior treningowy
lego_train, lego_rem = train_test_split(lego, train_size=0.8)
# drugi podział, wydzielamy walidacyjny i testowy
lego_valid, lego_test = train_test_split(lego_rem, test_size=0.5)
# zapis
lego.to_csv('lego_sets_clean.csv', index=None, header=True)
lego_train.to_csv('lego_sets_clean_train.csv', index=None, header=True)
lego_valid.to_csv('lego_sets_clean_valid.csv', index=None, header=True)
lego_test.to_csv('lego_sets_clean_test.csv', index=None, header=True)

69
lab6/simple_regression.py Normal file
View File

@ -0,0 +1,69 @@
import tensorflow as tf
from keras import layers
from keras.models import save_model
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import sys
# Pobranie przykładowego argumentu trenowania
EPOCHS_NUM = int(sys.argv[1])
# Wczytanie danych
data_train = pd.read_csv('lego_sets_clean_train.csv')
data_test = pd.read_csv('lego_sets_clean_test.csv')
# Wydzielenie zbiorów dla predykcji ceny zestawu na podstawie liczby klocków, którą zawiera
train_piece_counts = np.array(data_train['piece_count'])
train_prices = np.array(data_train['list_price'])
test_piece_counts = np.array(data_test['piece_count'])
test_prices = np.array(data_test['list_price'])
# Normalizacja
normalizer = layers.Normalization(input_shape=[1, ], axis=None)
normalizer.adapt(train_piece_counts)
# Inicjalizacja
model = tf.keras.Sequential([
normalizer,
layers.Dense(units=1)
])
# Kompilacja
model.compile(
optimizer=tf.optimizers.Adam(learning_rate=0.1),
loss='mean_absolute_error'
)
# Trening
history = model.fit(
train_piece_counts,
train_prices,
epochs=EPOCHS_NUM,
verbose=0,
validation_split=0.2
)
# Wykonanie predykcji na danych ze zbioru testującego
y_pred = model.predict(test_piece_counts)
# Zapis predykcji do pliku
results = pd.DataFrame({'test_set_piece_count': test_piece_counts.tolist(), 'predicted_price': [round(a[0], 2) for a in y_pred.tolist()]})
results.to_csv('lego_reg_results.csv', index=False, header=True)
# Zapis modelu do pliku
model.save('lego_reg_model')
# Opcjonalne statystyki, wykresy
'''
hist = pd.DataFrame(history.history)
hist['epoch'] = history.epoch
print(hist.tail())
plt.scatter(train_piece_counts, train_prices, label='Data')
plt.plot(x, y_pred, color='k', label='Predictions')
plt.xlabel('pieces')
plt.ylabel('price')
plt.legend()
plt.show()
'''

View File

@ -16,28 +16,28 @@ pipeline {
stage('Stage 1') {
steps {
git branch: "${params.BRANCH}", url: 'https://git.wmi.amu.edu.pl/s449288/ium_s449288.git'
sh 'chmod u+x ./evaluate.py'
sh 'chmod u+x ./lab7/evaluate.py'
echo 'Copying datasets from the create-dataset job...'
copyArtifacts filter: 'lego_sets_clean_test.csv', projectName: 's449288-create-dataset'
copyArtifacts filter: 'lab7/lego_sets_clean_test.csv', projectName: 's449288-create-dataset'
echo 'Datasets copied'
echo 'Copying model from the training job...'
copyArtifacts filter: 'lego_reg_model.tar.gz', projectName: "s449288-training/${BRANCH}/", selector: buildParameter('BUILD_SELECTOR')
copyArtifacts filter: 'lab7/lego_reg_model.tar.gz', projectName: "s449288-training/${BRANCH}/", selector: buildParameter('BUILD_SELECTOR')
echo 'Model copied'
sh 'tar xvzf lego_reg_model.tar.gz'
sh 'tar xvzf lab7/lego_reg_model.tar.gz'
echo 'Optional copying of the metrics file from previous build...'
copyArtifacts filter: 'eval_results.txt', projectName: 's449288-evaluation/master/', optional: true
copyArtifacts filter: 'lab7/eval_results.txt', projectName: 's449288-evaluation/master/', optional: true
echo 'Metrics file copied if it did not exist'
echo 'Evaluating model...'
sh 'python3 evaluate.py'
sh 'python3 lab7/evaluate.py'
echo 'Model evaluated. Metrics saved. Plot saved.'
sh 'head eval_results.txt'
sh 'file error_plot.jpg'
sh 'head lab7/eval_results.txt'
sh 'file lab7/error_plot.jpg'
echo 'Archiving metrics file...'
archiveArtifacts 'eval_results.txt'
archiveArtifacts 'lab7/eval_results.txt'
echo 'File archived'
script {
LAST_MAE = sh (
script: 'tail -1 eval_results.txt',
script: 'tail -1 lab7/eval_results.txt',
returnStdout: true
).trim()
}
@ -58,4 +58,4 @@ pipeline {
emailext body: "CHANGED - ${LAST_MAE} MAE", subject: 's449288-evaluation build status', to: 'e19191c5.uam.onmicrosoft.com@emea.teams.ms'
}
}
}
}

View File

@ -7,23 +7,23 @@ pipeline {
stages {
stage('Stage 1') {
steps {
sh 'chmod u+x ./simple_regression_lab7.py'
sh 'chmod u+x ./lab7/simple_regression_lab7.py'
echo 'Copying datasets from create-dataset...'
copyArtifacts filter: '*', projectName: 's449288-create-dataset'
echo 'Datasets copied'
echo 'Conducting simple regression model test'
sh 'python3 simple_regression_lab7.py'
sh 'python3 lab7/simple_regression_lab7.py'
echo 'Model and predictions saved'
sh 'head lego_reg_results.csv'
sh 'head lab7/lego_reg_results.csv'
echo 'Archiving model...'
sh 'ls -lh lego_reg_model'
sh 'tar -czf lego_reg_model.tar.gz lego_reg_model/'
archiveArtifacts 'lego_reg_model.tar.gz'
sh 'ls -lh lab7/lego_reg_model'
sh 'tar -czf lab7/lego_reg_model.tar.gz lab7/lego_reg_model/'
archiveArtifacts 'lab7/lego_reg_model.tar.gz'
echo 'Model archived'
echo 'Archiving Sacreds output repo...'
sh 'ls -lh runs/*/'
sh 'tar -czf sacred_runs.tar.gz runs/'
archiveArtifacts 'sacred_runs.tar.gz'
sh 'ls -lh lab7/runs/*/'
sh 'tar -czf lab7/sacred_runs.tar.gz lab7/runs/'
archiveArtifacts 'lab7/sacred_runs.tar.gz'
echo 'Sacreds repo archived'
echo 'Launching the s449288-evaluation job...'
build job: 's449288-evaluation/master/'
@ -44,4 +44,4 @@ pipeline {
emailext body: 'CHANGED', subject: 's449288-training build status', to: 'e19191c5.uam.onmicrosoft.com@emea.teams.ms'
}
}
}
}

37
lab7/evaluate.py Normal file
View File

@ -0,0 +1,37 @@
import tensorflow as tf
from tensorflow import keras
from matplotlib import pyplot as plt
from matplotlib.ticker import MaxNLocator
import numpy as np
import pandas as pd
# Załadowanie modelu z pliku
model = keras.models.load_model('lego_reg_model')
# Załadowanie zbioru testowego
data_test = pd.read_csv('lego_sets_clean_test.csv')
test_piece_counts = np.array(data_test['piece_count'])
test_prices = np.array(data_test['list_price'])
# Prosta ewaluacja (mean absolute error)
test_results = model.evaluate(
test_piece_counts,
test_prices, verbose=0)
# Zapis wartości liczbowej metryki do pliku
with open('eval_results.txt', 'a+') as f:
f.write(str(test_results) + '\n')
# Wygenerowanie i zapisanie do pliku wykresu
with open('eval_results.txt') as f:
scores = [float(line) for line in f if line]
builds = list(range(1, len(scores) + 1))
plot = plt.plot(builds, scores)
plt.xlabel('Build number')
plt.xticks(range(1, len(scores) + 1))
plt.ylabel('Mean absolute error')
plt.title('Model error by build')
plt.savefig('error_plot.jpg')
plt.show()

30
lab7/process_dataset.py Normal file
View File

@ -0,0 +1,30 @@
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
# usuwamy przy okazji puste pola
lego = pd.read_csv('lego_sets.csv', encoding='utf-8').dropna()
# list_price moze byc do dwoch miejsc po przecinku
lego['list_price'] = lego['list_price'].round(2)
# num_reviews, piece_count i prod_id moga byc wartosciami calkowitymi
lego['num_reviews'] = lego['num_reviews'].apply(np.int64)
lego['piece_count'] = lego['piece_count'].apply(np.int64)
lego['prod_id'] = lego['prod_id'].apply(np.int64)
# wglad, statystyki
print(lego)
print(lego.describe(include='all'))
# pierwszy podzial, wydzielamy zbior treningowy
lego_train, lego_rem = train_test_split(lego, train_size=0.8)
# drugi podział, wydzielamy walidacyjny i testowy
lego_valid, lego_test = train_test_split(lego_rem, test_size=0.5)
# zapis
lego.to_csv('lego_sets_clean.csv', index=None, header=True)
lego_train.to_csv('lego_sets_clean_train.csv', index=None, header=True)
lego_valid.to_csv('lego_sets_clean_valid.csv', index=None, header=True)
lego_test.to_csv('lego_sets_clean_test.csv', index=None, header=True)

View File

@ -0,0 +1,86 @@
import tensorflow as tf
from keras import layers
from keras.models import save_model
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sacred import Experiment
from sacred.observers import FileStorageObserver
from sacred.observers import MongoObserver
# Stworzenie obiektu klasy Experiment do śledzenia przebiegu regresji narzędziem Sacred
ex = Experiment(save_git_info=False)
# Dodanie obserwatora FileObserver
ex.observers.append(FileStorageObserver('runs'))
#Dodanie obserwatora Mongo
ex.observers.append(MongoObserver(url='mongodb://admin:IUM_2021@172.17.0.1:27017', db_name='sacred'))
# Przykładowa modyfikowalna z Sacred konfiguracja wybranych parametrów treningu
@ex.config
def config():
epochs = 100
units = 1
learning_rate = 0.1
# Reszta kodu wrzucona do udekorowanej funkcji train do wywołania przez Sacred, żeby coś było capture'owane
@ex.capture
def train(epochs, units, learning_rate, _run):
# Wczytanie danych
data_train = pd.read_csv('lego_sets_clean_train.csv')
data_test = pd.read_csv('lego_sets_clean_test.csv')
# Wydzielenie zbiorów dla predykcji ceny zestawu na podstawie liczby klocków, którą zawiera
train_piece_counts = np.array(data_train['piece_count'])
train_prices = np.array(data_train['list_price'])
test_piece_counts = np.array(data_test['piece_count'])
test_prices = np.array(data_test['list_price'])
# Normalizacja
normalizer = layers.Normalization(input_shape=[1, ], axis=None)
normalizer.adapt(train_piece_counts)
# Inicjalizacja
model = tf.keras.Sequential([
normalizer,
layers.Dense(units=units)
])
# Kompilacja
model.compile(
optimizer=tf.optimizers.Adam(learning_rate=learning_rate),
loss='mean_absolute_error'
)
# Trening
history = model.fit(
train_piece_counts,
train_prices,
epochs=epochs,
verbose=0,
validation_split=0.2
)
# Wykonanie predykcji na danych ze zbioru testującego
y_pred = model.predict(test_piece_counts)
# Zapis predykcji do pliku
results = pd.DataFrame(
{'test_set_piece_count': test_piece_counts.tolist(), 'predicted_price': [round(a[0], 2) for a in y_pred.tolist()]})
results.to_csv('lego_reg_results.csv', index=False, header=True)
# Zapis modelu do pliku standardowo poprzez metodę kerasa i poprzez metodę obiektu Experiment z Sacred
model.save('lego_reg_model')
ex.add_artifact('lego_reg_model/saved_model.pb')
# Przykładowo zwracamy loss ostatniej epoki w charakterze wyników, żeby było widoczne w plikach zapisanych przez obserwator
hist = pd.DataFrame(history.history)
hist['epoch'] = history.epoch
_run.log_scalar('final.training.loss', hist['loss'].iloc[-1])
@ex.automain
def main(units, learning_rate):
train()

30
lab8/process_dataset.py Normal file
View File

@ -0,0 +1,30 @@
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
# usuwamy przy okazji puste pola
lego = pd.read_csv('lego_sets.csv', encoding='utf-8').dropna()
# list_price moze byc do dwoch miejsc po przecinku
lego['list_price'] = lego['list_price'].round(2)
# num_reviews, piece_count i prod_id moga byc wartosciami calkowitymi
lego['num_reviews'] = lego['num_reviews'].apply(np.int64)
lego['piece_count'] = lego['piece_count'].apply(np.int64)
lego['prod_id'] = lego['prod_id'].apply(np.int64)
# wglad, statystyki
print(lego)
print(lego.describe(include='all'))
# pierwszy podzial, wydzielamy zbior treningowy
lego_train, lego_rem = train_test_split(lego, train_size=0.8)
# drugi podział, wydzielamy walidacyjny i testowy
lego_valid, lego_test = train_test_split(lego_rem, test_size=0.5)
# zapis
lego.to_csv('lego_sets_clean.csv', index=None, header=True)
lego_train.to_csv('lego_sets_clean_train.csv', index=None, header=True)
lego_valid.to_csv('lego_sets_clean_valid.csv', index=None, header=True)
lego_test.to_csv('lego_sets_clean_test.csv', index=None, header=True)

View File

@ -0,0 +1,118 @@
import tensorflow as tf
from keras import layers
from keras.models import save_model
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sacred import Experiment
from sacred.observers import FileStorageObserver
from sacred.observers import MongoObserver
import mlflow
from urllib.parse import urlparse
# Konfiguracja serwera i nazwy eksperymentu MLflow
mlflow.set_tracking_uri('http://tzietkiewicz.vm.wmi.amu.edu.pl:5000/#/')
mlflow.set_experiment('s449288')
# Stworzenie obiektu klasy Experiment do śledzenia przebiegu regresji narzędziem Sacred
ex = Experiment(save_git_info=False)
# Dodanie obserwatora FileObserver
ex.observers.append(FileStorageObserver('runs'))
#Dodanie obserwatora Mongo
ex.observers.append(MongoObserver(url='mongodb://admin:IUM_2021@172.17.0.1:27017', db_name='sacred'))
# Przykładowa modyfikowalna z Sacred konfiguracja wybranych parametrów treningu
@ex.config
def config():
epochs = 100
units = 1
learning_rate = 0.1
# Reszta kodu wrzucona do udekorowanej funkcji train do wywołania przez Sacred, żeby coś było capture'owane
@ex.capture
def train(epochs, units, learning_rate, _run):
# Podpięcie treningu do MLflow
with mlflow.start_run() as run:
print('MLflow run experiment_id: {0}'.format(run.info.experiment_id))
print('MLflow run artifact_uri: {0}'.format(run.info.artifact_uri))
# Wczytanie danych
data_train = pd.read_csv('lego_sets_clean_train.csv')
data_test = pd.read_csv('lego_sets_clean_test.csv')
# Wydzielenie zbiorów dla predykcji ceny zestawu na podstawie liczby klocków, którą zawiera
train_piece_counts = np.array(data_train['piece_count'])
train_prices = np.array(data_train['list_price'])
test_piece_counts = np.array(data_test['piece_count'])
test_prices = np.array(data_test['list_price'])
# Normalizacja
normalizer = layers.Normalization(input_shape=[1, ], axis=None)
normalizer.adapt(train_piece_counts)
# Inicjalizacja
model = tf.keras.Sequential([
normalizer,
layers.Dense(units=units)
])
# Kompilacja
model.compile(
optimizer=tf.optimizers.Adam(learning_rate=learning_rate),
loss='mean_absolute_error'
)
# Trening
history = model.fit(
train_piece_counts,
train_prices,
epochs=epochs,
verbose=0,
validation_split=0.2
)
# Wykonanie predykcji na danych ze zbioru testującego
y_pred = model.predict(test_piece_counts)
# Zapis predykcji do pliku
results = pd.DataFrame(
{'test_set_piece_count': test_piece_counts.tolist(), 'predicted_price': [round(a[0], 2) for a in y_pred.tolist()]})
results.to_csv('lego_reg_results.csv', index=False, header=True)
# Zapis modelu do pliku standardowo poprzez metodę kerasa i poprzez metodę obiektu Experiment z Sacred
model.save('lego_reg_model')
ex.add_artifact('lego_reg_model/saved_model.pb')
# Przykładowo zwracamy loss ostatniej epoki w charakterze wyników, żeby było widoczne w plikach zapisanych przez obserwator
hist = pd.DataFrame(history.history)
hist['epoch'] = history.epoch
_run.log_scalar('final.training.loss', hist['loss'].iloc[-1])
# Ewaluacja MAE na potrzeby MLflow (kopia z evaluate.py)
mae = model.evaluate(
test_piece_counts,
test_prices, verbose=0)
# Zapis parametrów i metryk dla MLflow
mlflow.log_param('epochs', epochs)
mlflow.log_param('units', units)
mlflow.log_param('learning_rate', learning_rate)
mlflow.log_metric("mae", mae)
# Logowanie i zapis modelu dla Mlflow
signature = mlflow.models.signature.infer_signature(train_piece_counts, model.predict(train_piece_counts))
tracking_url_type_store = urlparse(mlflow.get_tracking_uri()).scheme
if tracking_url_type_store != 'file':
mlflow.keras.log_model(model, 'lego-model', registered_model_name='TFLegoModel',
signature=signature)
else:
mlflow.keras.log_model(model, 'model', signature=signature, input_example=500)
@ex.automain
def main(epochs, units, learning_rate):
train()