ium_444409/power_plant_data_stats.py

30 lines
809 B
Python
Raw Normal View History

2022-04-01 22:25:05 +02:00
print('# statystyki dla pełnego zbioru')
import pandas as pd
plant_all = pd.read_csv('data/Plant_1_Generation_Data.csv')
print(plant_all.describe(include='all'))
print('# statystyki dla zbioru dev')
plant_dev = pd.read_csv('data/Plant_1_Generation_Data.csv.dev')
print(plant_dev.describe(include='all'))
print('# statystyki dla zbioru test')
plant_test = pd.read_csv('data/Plant_1_Generation_Data.csv.test')
print(plant_test.describe(include='all'))
print('# statystyki dla zbioru train')
plant_train = pd.read_csv('data/Plant_1_Generation_Data.csv.train')
print(plant_train.describe(include='all'))
print('# normalizacja')
plant_normalized = plant_test.copy()
column = 'DC_POWER'
plant_normalized[column] = plant_normalized[column] / plant_normalized[column].abs().max()
print(plant_normalized)