Zad 10. DVC

This commit is contained in:
Cezary Gałązkiewicz 2022-06-06 00:28:02 +02:00
parent 002b3b8d6d
commit 3db952a567
5 changed files with 52 additions and 45 deletions

2
.gitignore vendored
View File

@ -1 +1,3 @@
/Steel_industry_data.csv
/steel_industry_data_train.csv
/steel_industry_data_test.csv

41
evaluate.py Normal file
View File

@ -0,0 +1,41 @@
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn import metrics
import matplotlib.pyplot as plt
import tensorflow as tf
import math
from tensorflow import keras
from process_dataset import process_data_and_get_x_y
def show_result(x, y):
plt.title('Usage kWh Model', fontsize=15, color='g', pad=12)
plt.plot(x, y, 'o', color='r')
m, b = np.polyfit(x, y, 1)
plt.plot(x, m * x + b, color='darkblue')
plt.xlabel('Actual')
plt.ylabel('Predicted')
plt.show()
model = keras.models.load_model('steel_industry_model')
energy_data_test = pd.read_csv('Steel_industry_data_test.csv')
energy_data_test, x_test, y_test = process_data_and_get_x_y(energy_data_test)
y_predicted = model.predict(x_test)
test_results = {}
test_results['usage_model'] = model.evaluate(
x_test,
y_test, verbose=0)
print('Mean Absolute Error : ', metrics.mean_absolute_error(y_test, y_predicted))
print('Mean Squared Error : ', metrics.mean_squared_error(y_test, y_predicted))
print('Root Mean Squared Error : ', math.sqrt(metrics.mean_squared_error(y_test, y_predicted)))
print(test_results['usage_model'])
show_result(y_test, y_predicted)

View File

@ -34,28 +34,14 @@ def plot_loss(history):
plt.show()
def show_result(x, y):
plt.title('One variable Model', fontsize=15, color='g', pad=12)
plt.plot(x, y, 'o', color='r')
m, b = np.polyfit(x, y, 1)
plt.plot(x, m * x + b, color='darkblue')
plt.xlabel('Actual')
plt.ylabel('Predicted')
plt.show()
energy_data_train = pd.read_csv('Steel_industry_data_train.csv')
energy_data_test = pd.read_csv('Steel_industry_data_test.csv')
energy_data_test, x_test, y_test = process_data_and_get_x_y(energy_data_test)
energy_data_train, x_train, y_train = process_data_and_get_x_y(energy_data_train)
#x_train, x_test, y_train, y_test = train_test_split(x_data, y_data, test_size=0.2, random_state=1)
# x_test, x_dev, y_test, y_dev = train_test_split(x_test, y_test, test_size=0.5, random_state=1)
# stats
print(energy_data_test.describe(include='all'))
print(x_train.describe(include='all'))
#print(np.array(x_train).reshape(-1, 1))
@ -66,10 +52,6 @@ print(normalizer.mean.numpy())
# powinno być niezmienione
print(np.array(x_train[:1]))
#usage = np.array(x_train)
#usage_normalizer = keras.layers.Normalization(input_shape=[14, ], axis=1)
#usage_normalizer.adapt(usage)
usage_model = tf.keras.Sequential([
normalizer,
keras.layers.Dense(units=10, activation='relu'),
@ -97,30 +79,4 @@ print(hist.tail())
plot_loss(history)
y_predicted = usage_model.predict(x_test)
test_results = {}
test_results['usage_model'] = usage_model.evaluate(
x_test,
y_test, verbose=0)
print('Mean Absolute Error : ', metrics.mean_absolute_error(y_test, y_predicted))
print('Mean Squared Error : ', metrics.mean_squared_error(y_test, y_predicted))
print('Root Mean Squared Error : ', math.sqrt(metrics.mean_squared_error(y_test, y_predicted)))
print(test_results['usage_model'])
show_result(y_test, y_predicted)
#print('Training set size:')
#print(x_train.shape)
#print(y_train.shape)
#print('Testing set size:')
#print(x_test.shape)
#print(y_test.shape)
# print('Dev set size:')
# print(x_dev.shape)
# print(y_dev.shape)
# print(train_data.describe(include='all'))
# print(test_data.describe(include='all'))
# print(dev_data.describe(include='all'))
usage_model.save('steel_industry_model')

View File

@ -0,0 +1,4 @@
outs:
- md5: ba702b5ad2647abad7f297449a6ca273
size: 252454
path: steel_industry_data_test.csv

View File

@ -0,0 +1,4 @@
outs:
- md5: b9a05e4bc7ecf47bc3fb5ca7d92fd9fa
size: 2021682
path: steel_industry_data_train.csv