From 3db952a567c736c08f5573fe5ab3cb6c264657a6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cezary=20Ga=C5=82=C4=85zkiewicz?= Date: Mon, 6 Jun 2022 00:28:02 +0200 Subject: [PATCH] Zad 10. DVC --- .gitignore | 2 ++ evaluate.py | 41 +++++++++++++++++++++++++++ process_dataset.py | 46 +------------------------------ steel_industry_data_test.csv.dvc | 4 +++ steel_industry_data_train.csv.dvc | 4 +++ 5 files changed, 52 insertions(+), 45 deletions(-) create mode 100644 evaluate.py create mode 100644 steel_industry_data_test.csv.dvc create mode 100644 steel_industry_data_train.csv.dvc diff --git a/.gitignore b/.gitignore index 8781153..26d8820 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,3 @@ /Steel_industry_data.csv +/steel_industry_data_train.csv +/steel_industry_data_test.csv diff --git a/evaluate.py b/evaluate.py new file mode 100644 index 0000000..857eb78 --- /dev/null +++ b/evaluate.py @@ -0,0 +1,41 @@ +import pandas as pd +import numpy as np +from sklearn.model_selection import train_test_split +from sklearn import metrics +import matplotlib.pyplot as plt +import tensorflow as tf +import math + +from tensorflow import keras +from process_dataset import process_data_and_get_x_y + + +def show_result(x, y): + plt.title('Usage kWh Model', fontsize=15, color='g', pad=12) + plt.plot(x, y, 'o', color='r') + + m, b = np.polyfit(x, y, 1) + plt.plot(x, m * x + b, color='darkblue') + plt.xlabel('Actual') + plt.ylabel('Predicted') + plt.show() + + +model = keras.models.load_model('steel_industry_model') + +energy_data_test = pd.read_csv('Steel_industry_data_test.csv') +energy_data_test, x_test, y_test = process_data_and_get_x_y(energy_data_test) + +y_predicted = model.predict(x_test) +test_results = {} +test_results['usage_model'] = model.evaluate( + x_test, + y_test, verbose=0) + +print('Mean Absolute Error : ', metrics.mean_absolute_error(y_test, y_predicted)) +print('Mean Squared Error : ', metrics.mean_squared_error(y_test, y_predicted)) +print('Root Mean Squared Error : ', math.sqrt(metrics.mean_squared_error(y_test, y_predicted))) + +print(test_results['usage_model']) + +show_result(y_test, y_predicted) diff --git a/process_dataset.py b/process_dataset.py index 3869c43..3d8d413 100644 --- a/process_dataset.py +++ b/process_dataset.py @@ -34,28 +34,14 @@ def plot_loss(history): plt.show() -def show_result(x, y): - plt.title('One variable Model', fontsize=15, color='g', pad=12) - plt.plot(x, y, 'o', color='r') - - m, b = np.polyfit(x, y, 1) - plt.plot(x, m * x + b, color='darkblue') - plt.xlabel('Actual') - plt.ylabel('Predicted') - plt.show() - - energy_data_train = pd.read_csv('Steel_industry_data_train.csv') -energy_data_test = pd.read_csv('Steel_industry_data_test.csv') -energy_data_test, x_test, y_test = process_data_and_get_x_y(energy_data_test) energy_data_train, x_train, y_train = process_data_and_get_x_y(energy_data_train) #x_train, x_test, y_train, y_test = train_test_split(x_data, y_data, test_size=0.2, random_state=1) # x_test, x_dev, y_test, y_dev = train_test_split(x_test, y_test, test_size=0.5, random_state=1) # stats -print(energy_data_test.describe(include='all')) print(x_train.describe(include='all')) #print(np.array(x_train).reshape(-1, 1)) @@ -66,10 +52,6 @@ print(normalizer.mean.numpy()) # powinno być niezmienione print(np.array(x_train[:1])) -#usage = np.array(x_train) -#usage_normalizer = keras.layers.Normalization(input_shape=[14, ], axis=1) -#usage_normalizer.adapt(usage) - usage_model = tf.keras.Sequential([ normalizer, keras.layers.Dense(units=10, activation='relu'), @@ -97,30 +79,4 @@ print(hist.tail()) plot_loss(history) -y_predicted = usage_model.predict(x_test) -test_results = {} -test_results['usage_model'] = usage_model.evaluate( - x_test, - y_test, verbose=0) - -print('Mean Absolute Error : ', metrics.mean_absolute_error(y_test, y_predicted)) -print('Mean Squared Error : ', metrics.mean_squared_error(y_test, y_predicted)) -print('Root Mean Squared Error : ', math.sqrt(metrics.mean_squared_error(y_test, y_predicted))) - -print(test_results['usage_model']) - -show_result(y_test, y_predicted) - -#print('Training set size:') -#print(x_train.shape) -#print(y_train.shape) -#print('Testing set size:') -#print(x_test.shape) -#print(y_test.shape) -# print('Dev set size:') -# print(x_dev.shape) -# print(y_dev.shape) - -# print(train_data.describe(include='all')) -# print(test_data.describe(include='all')) -# print(dev_data.describe(include='all')) +usage_model.save('steel_industry_model') diff --git a/steel_industry_data_test.csv.dvc b/steel_industry_data_test.csv.dvc new file mode 100644 index 0000000..9fff34d --- /dev/null +++ b/steel_industry_data_test.csv.dvc @@ -0,0 +1,4 @@ +outs: +- md5: ba702b5ad2647abad7f297449a6ca273 + size: 252454 + path: steel_industry_data_test.csv diff --git a/steel_industry_data_train.csv.dvc b/steel_industry_data_train.csv.dvc new file mode 100644 index 0000000..80c842f --- /dev/null +++ b/steel_industry_data_train.csv.dvc @@ -0,0 +1,4 @@ +outs: +- md5: b9a05e4bc7ecf47bc3fb5ca7d92fd9fa + size: 2021682 + path: steel_industry_data_train.csv