Zad 10. DVC

2022-06-06 00:28:02 +02:00 · 2022-06-06 00:28:02 +02:00 · 3db952a567
commit 3db952a567
parent 002b3b8d6d
5 changed files with 52 additions and 45 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1 +1,3 @@
 /Steel_industry_data.csv
+/steel_industry_data_train.csv
+/steel_industry_data_test.csv
--- a/evaluate.py
+++ b/evaluate.py
@ -0,0 +1,41 @@
+import pandas as pd
+import numpy as np
+from sklearn.model_selection import train_test_split
+from sklearn import metrics
+import matplotlib.pyplot as plt
+import tensorflow as tf
+import math
+
+from tensorflow import keras
+from process_dataset import process_data_and_get_x_y
+
+
+def show_result(x, y):
+    plt.title('Usage kWh Model', fontsize=15, color='g', pad=12)
+    plt.plot(x, y, 'o', color='r')
+
+    m, b = np.polyfit(x, y, 1)
+    plt.plot(x, m * x + b, color='darkblue')
+    plt.xlabel('Actual')
+    plt.ylabel('Predicted')
+    plt.show()
+
+
+model = keras.models.load_model('steel_industry_model')
+
+energy_data_test = pd.read_csv('Steel_industry_data_test.csv')
+energy_data_test, x_test, y_test = process_data_and_get_x_y(energy_data_test)
+
+y_predicted = model.predict(x_test)
+test_results = {}
+test_results['usage_model'] = model.evaluate(
+    x_test,
+    y_test, verbose=0)
+
+print('Mean Absolute Error : ', metrics.mean_absolute_error(y_test, y_predicted))
+print('Mean Squared Error : ', metrics.mean_squared_error(y_test, y_predicted))
+print('Root Mean Squared Error : ', math.sqrt(metrics.mean_squared_error(y_test, y_predicted)))
+
+print(test_results['usage_model'])
+
+show_result(y_test, y_predicted)
--- a/process_dataset.py
+++ b/process_dataset.py
@ -34,28 +34,14 @@ def plot_loss(history):
    plt.show()


-def show_result(x, y):
-    plt.title('One variable Model', fontsize=15, color='g', pad=12)
-    plt.plot(x, y, 'o', color='r')
-
-    m, b = np.polyfit(x, y, 1)
-    plt.plot(x, m * x + b, color='darkblue')
-    plt.xlabel('Actual')
-    plt.ylabel('Predicted')
-    plt.show()
-
-
 energy_data_train = pd.read_csv('Steel_industry_data_train.csv')
-energy_data_test = pd.read_csv('Steel_industry_data_test.csv')

-energy_data_test, x_test, y_test = process_data_and_get_x_y(energy_data_test)
 energy_data_train, x_train, y_train = process_data_and_get_x_y(energy_data_train)

 #x_train, x_test, y_train, y_test = train_test_split(x_data, y_data, test_size=0.2, random_state=1)
 # x_test, x_dev, y_test, y_dev = train_test_split(x_test, y_test, test_size=0.5, random_state=1)

 # stats
-print(energy_data_test.describe(include='all'))
 print(x_train.describe(include='all'))
 #print(np.array(x_train).reshape(-1, 1))

@ -66,10 +52,6 @@ print(normalizer.mean.numpy())
 # powinno być niezmienione
 print(np.array(x_train[:1]))

-#usage = np.array(x_train)
-#usage_normalizer = keras.layers.Normalization(input_shape=[14, ], axis=1)
-#usage_normalizer.adapt(usage)
-
 usage_model = tf.keras.Sequential([
    normalizer,
    keras.layers.Dense(units=10, activation='relu'),
@ -97,30 +79,4 @@ print(hist.tail())

 plot_loss(history)

-y_predicted = usage_model.predict(x_test)
-test_results = {}
-test_results['usage_model'] = usage_model.evaluate(
-    x_test,
-    y_test, verbose=0)
-
-print('Mean Absolute Error : ', metrics.mean_absolute_error(y_test, y_predicted))
-print('Mean Squared Error : ', metrics.mean_squared_error(y_test, y_predicted))
-print('Root Mean Squared Error : ', math.sqrt(metrics.mean_squared_error(y_test, y_predicted)))
-
-print(test_results['usage_model'])
-
-show_result(y_test, y_predicted)
-
-#print('Training set size:')
-#print(x_train.shape)
-#print(y_train.shape)
-#print('Testing set size:')
-#print(x_test.shape)
-#print(y_test.shape)
-# print('Dev set size:')
-# print(x_dev.shape)
-# print(y_dev.shape)
-
-# print(train_data.describe(include='all'))
-# print(test_data.describe(include='all'))
-# print(dev_data.describe(include='all'))
+usage_model.save('steel_industry_model')
--- a/steel_industry_data_test.csv.dvc
+++ b/steel_industry_data_test.csv.dvc
@ -0,0 +1,4 @@
+outs:
+- md5: ba702b5ad2647abad7f297449a6ca273
+  size: 252454
+  path: steel_industry_data_test.csv
--- a/steel_industry_data_train.csv.dvc
+++ b/steel_industry_data_train.csv.dvc
@ -0,0 +1,4 @@
+outs:
+- md5: b9a05e4bc7ecf47bc3fb5ca7d92fd9fa
+  size: 2021682
+  path: steel_industry_data_train.csv