30 lines
859 B
Python
30 lines
859 B
Python
#!/usr/bin/python
|
|
import os
|
|
import pprint
|
|
import sys
|
|
|
|
from pandas import read_csv
|
|
|
|
|
|
def save_stats_to_file(data_path, data_name, stats_name):
|
|
data = read_csv(os.path.join(data_path, data_name))
|
|
with open(os.path.join(data_path, stats_name), "w") as log_file:
|
|
for name, obj in (
|
|
('Description: ', data.describe(include='all')), ('Shape: ', data.shape), ('Head: ', data.head())):
|
|
pprint.pprint(name, log_file)
|
|
pprint.pprint(obj, log_file)
|
|
|
|
|
|
def main():
|
|
data_path = sys.argv[1]
|
|
abs_data_path = os.path.abspath(data_path)
|
|
|
|
for data_name, stats_name in (
|
|
('train_data.csv', 'train_stats.txt'), ('test_data.csv', 'test_stats.txt'),
|
|
('dev_data.csv', 'dev_stats.txt')):
|
|
save_stats_to_file(abs_data_path, data_name, stats_name)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|