import pandas as pd import numpy as np def calculate_stats(data, col_name): col_values = data[col_name] return [ len(data), np.min(col_values), np.max(col_values), np.std(col_values), np.median(col_values) ] def calculate_value_counts(data, col_name): return data[col_name].value_counts() if __name__ == '__main__': train = pd.read_csv('barcelona_weekends.train.csv') dev = pd.read_csv('barcelona_weekends.dev.csv') test = pd.read_csv('barcelona_weekends.test.csv') train_set_stats = calculate_stats(train, 'realSum') dev_set_stats = calculate_stats(dev, 'realSum') test_set_stats = calculate_stats(test, 'realSum') columns = ['size', 'minimum', 'maximum', 'standard deviation', 'median'] rows = ['train', 'dev', 'test'] df = pd.DataFrame( data=np.array([train_set_stats, dev_set_stats, test_set_stats]), index=rows, columns=columns) print(df) print('Train', calculate_value_counts(train, 'person_capacity'), end='\n\n') print('Dev', calculate_value_counts(dev, 'person_capacity'), end='\n\n') print('Test', calculate_value_counts(test, 'person_capacity'), end='\n\n') df.to_csv('stats.csv', index=False)