ium_444452/Scripts/generate_dataset_stats.py

30 lines
859 B
Python

#!/usr/bin/python
import os
import pprint
import sys
from pandas import read_csv
def save_stats_to_file(data_path, data_name, stats_name):
data = read_csv(os.path.join(data_path, data_name))
with open(os.path.join(data_path, stats_name), "w") as log_file:
for name, obj in (
('Description: ', data.describe(include='all')), ('Shape: ', data.shape), ('Head: ', data.head())):
pprint.pprint(name, log_file)
pprint.pprint(obj, log_file)
def main():
data_path = sys.argv[1]
abs_data_path = os.path.abspath(data_path)
for data_name, stats_name in (
('train_data.csv', 'train_stats.txt'), ('test_data.csv', 'test_stats.txt'),
('dev_data.csv', 'dev_stats.txt')):
save_stats_to_file(abs_data_path, data_name, stats_name)
if __name__ == '__main__':
main()