import pandas as pd import numpy as np from sklearn.model_selection import train_test_split from sklearn import metrics import matplotlib.pyplot as plt import tensorflow as tf import math from tensorflow import keras #from tensorflow.keras import layers def process_data_and_get_x_y(data): data.columns = ["date", "Usage_kWh", "Lagging_Current_Reactive.Power_kVarh", "Leading_Current_Reactive_Power_kVarh", "CO2(tCO2)", "Lagging_Current_Power_Factor", "Leading_Current_Power_Factor", "WeekStatus", "Day_of_week", "Load_Type"] #without NSM column data = data.set_index('date') data = pd.get_dummies(data, drop_first=True) x = data.drop('Usage_kWh', axis=1) #x = data['Lagging_Current_Reactive.Power_kVarh'] y = data['Usage_kWh'] return data, x, y def plot_loss(history): plt.plot(history.history['loss'], label='loss') plt.plot(history.history['val_loss'], label='val_loss') plt.ylim([0, 10]) plt.xlabel('Epoch') plt.ylabel('Error') plt.legend() plt.grid(True) plt.show() def show_result(x, y): plt.title('One variable Model', fontsize=15, color='g', pad=12) plt.plot(x, y, 'o', color='r') m, b = np.polyfit(x, y, 1) plt.plot(x, m * x + b, color='darkblue') plt.xlabel('Actual') plt.ylabel('Predicted') plt.show() energy_data_train = pd.read_csv('Steel_industry_data_train.csv') energy_data_test = pd.read_csv('Steel_industry_data_test.csv') energy_data_test, x_test, y_test = process_data_and_get_x_y(energy_data_test) energy_data_train, x_train, y_train = process_data_and_get_x_y(energy_data_train) #x_train, x_test, y_train, y_test = train_test_split(x_data, y_data, test_size=0.2, random_state=1) # x_test, x_dev, y_test, y_dev = train_test_split(x_test, y_test, test_size=0.5, random_state=1) # stats print(energy_data_test.describe(include='all')) print(x_train.describe(include='all')) #print(np.array(x_train).reshape(-1, 1)) normalizer = tf.keras.layers.Normalization(axis=1) normalizer.adapt(np.array(x_train)) print(normalizer.mean.numpy()) # powinno być niezmienione print(np.array(x_train[:1])) #usage = np.array(x_train) #usage_normalizer = keras.layers.Normalization(input_shape=[14, ], axis=1) #usage_normalizer.adapt(usage) usage_model = tf.keras.Sequential([ normalizer, keras.layers.Dense(units=10, activation='relu'), keras.layers.Dense(units=1) ]) print(usage_model.summary()) usage_model.compile( optimizer=tf.optimizers.Adam(learning_rate=0.1), loss='mean_absolute_error') history = usage_model.fit( x_train, y_train, epochs=100, # Suppress logging. verbose=0, # Calculate validation results on 20% of the training data. validation_split=0.2) hist = pd.DataFrame(history.history) hist['epoch'] = history.epoch print(hist.tail()) plot_loss(history) y_predicted = usage_model.predict(x_test) test_results = {} test_results['usage_model'] = usage_model.evaluate( x_test, y_test, verbose=0) print('Mean Absolute Error : ', metrics.mean_absolute_error(y_test, y_predicted)) print('Mean Squared Error : ', metrics.mean_squared_error(y_test, y_predicted)) print('Root Mean Squared Error : ', math.sqrt(metrics.mean_squared_error(y_test, y_predicted))) print(test_results['usage_model']) show_result(y_test, y_predicted) #print('Training set size:') #print(x_train.shape) #print(y_train.shape) #print('Testing set size:') #print(x_test.shape) #print(y_test.shape) # print('Dev set size:') # print(x_dev.shape) # print(y_dev.shape) # print(train_data.describe(include='all')) # print(test_data.describe(include='all')) # print(dev_data.describe(include='all'))