Zad 10. DVC

2022-06-06 00:28:02 +02:00 · 2022-06-06 00:28:02 +02:00 · 3db952a567
commit 3db952a567
parent 002b3b8d6d
5 changed files with 52 additions and 45 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1 +1,3 @@
 /Steel_industry_data.csv
 /steel_industry_data_train.csv
 /steel_industry_data_test.csv
--- a/evaluate.py
+++ b/evaluate.py
@ -0,0 +1,41 @@
 import pandas as pd
 import numpy as np
 from sklearn.model_selection import train_test_split
 from sklearn import metrics
 import matplotlib.pyplot as plt
 import tensorflow as tf
 import math
 from tensorflow import keras
 from process_dataset import process_data_and_get_x_y
 def show_result(x, y):
    plt.title('Usage kWh Model', fontsize=15, color='g', pad=12)
    plt.plot(x, y, 'o', color='r')
    m, b = np.polyfit(x, y, 1)
    plt.plot(x, m * x + b, color='darkblue')
    plt.xlabel('Actual')
    plt.ylabel('Predicted')
    plt.show()
 model = keras.models.load_model('steel_industry_model')
 energy_data_test = pd.read_csv('Steel_industry_data_test.csv')
 energy_data_test, x_test, y_test = process_data_and_get_x_y(energy_data_test)
 y_predicted = model.predict(x_test)
 test_results = {}
 test_results['usage_model'] = model.evaluate(
    x_test,
    y_test, verbose=0)
 print('Mean Absolute Error : ', metrics.mean_absolute_error(y_test, y_predicted))
 print('Mean Squared Error : ', metrics.mean_squared_error(y_test, y_predicted))
 print('Root Mean Squared Error : ', math.sqrt(metrics.mean_squared_error(y_test, y_predicted)))
 print(test_results['usage_model'])
 show_result(y_test, y_predicted)
--- a/process_dataset.py
+++ b/process_dataset.py
@ -34,28 +34,14 @@ def plot_loss(history):
    plt.show()
 def show_result(x, y):
    plt.title('One variable Model', fontsize=15, color='g', pad=12)
    plt.plot(x, y, 'o', color='r')
    m, b = np.polyfit(x, y, 1)
    plt.plot(x, m * x + b, color='darkblue')
    plt.xlabel('Actual')
    plt.ylabel('Predicted')
    plt.show()
 energy_data_train = pd.read_csv('Steel_industry_data_train.csv')
 energy_data_test = pd.read_csv('Steel_industry_data_test.csv')
 energy_data_test, x_test, y_test = process_data_and_get_x_y(energy_data_test)
 energy_data_train, x_train, y_train = process_data_and_get_x_y(energy_data_train)
 #x_train, x_test, y_train, y_test = train_test_split(x_data, y_data, test_size=0.2, random_state=1)
 # x_test, x_dev, y_test, y_dev = train_test_split(x_test, y_test, test_size=0.5, random_state=1)
 # stats
 print(energy_data_test.describe(include='all'))
 print(x_train.describe(include='all'))
 #print(np.array(x_train).reshape(-1, 1))
@ -66,10 +52,6 @@ print(normalizer.mean.numpy())
 # powinno być niezmienione
 print(np.array(x_train[:1]))
 #usage = np.array(x_train)
 #usage_normalizer = keras.layers.Normalization(input_shape=[14, ], axis=1)
 #usage_normalizer.adapt(usage)
 usage_model = tf.keras.Sequential([
    normalizer,
    keras.layers.Dense(units=10, activation='relu'),
@ -97,30 +79,4 @@ print(hist.tail())
 plot_loss(history)
-y_predicted = usage_model.predict(x_test)
+usage_model.save('steel_industry_model')
 test_results = {}
 test_results['usage_model'] = usage_model.evaluate(
    x_test,
    y_test, verbose=0)
 print('Mean Absolute Error : ', metrics.mean_absolute_error(y_test, y_predicted))
 print('Mean Squared Error : ', metrics.mean_squared_error(y_test, y_predicted))
 print('Root Mean Squared Error : ', math.sqrt(metrics.mean_squared_error(y_test, y_predicted)))
 print(test_results['usage_model'])
 show_result(y_test, y_predicted)
 #print('Training set size:')
 #print(x_train.shape)
 #print(y_train.shape)
 #print('Testing set size:')
 #print(x_test.shape)
 #print(y_test.shape)
 # print('Dev set size:')
 # print(x_dev.shape)
 # print(y_dev.shape)
 # print(train_data.describe(include='all'))
 # print(test_data.describe(include='all'))
 # print(dev_data.describe(include='all'))
--- a/steel_industry_data_test.csv.dvc
+++ b/steel_industry_data_test.csv.dvc
@ -0,0 +1,4 @@
 outs:
 - md5: ba702b5ad2647abad7f297449a6ca273
  size: 252454
  path: steel_industry_data_test.csv
--- a/steel_industry_data_train.csv.dvc
+++ b/steel_industry_data_train.csv.dvc
@ -0,0 +1,4 @@
 outs:
 - md5: b9a05e4bc7ecf47bc3fb5ca7d92fd9fa
  size: 2021682
  path: steel_industry_data_train.csv