ium_444463/stare_zadania/stats.py

33 lines
799 B
Python
Raw Normal View History

2022-04-03 12:17:21 +02:00
import subprocess
import pandas as pd
import numpy as np
2022-04-03 12:52:57 +02:00
import os
2022-04-03 12:17:21 +02:00
2022-04-03 12:52:57 +02:00
path = ''
all_files = ['column_titles.csv', 'data_train.csv', 'data_dev.csv', 'data_test.csv']
data_file = open("data.csv", "w")
for name in all_files:
f = open(name, "r")
data_file.write(f.read())
f.close()
data_file.close()
data=pd.read_csv('data.csv')
2022-04-03 12:17:21 +02:00
data = data.replace(np.nan, '', regex=True)
print("="*20)
print('Ilość wierszy w zbiorze: ',len(data))
print("="*10, ' data["department"].value_counts() ', 10*'=')
print(data["department"].value_counts())
print("="*10, ' data.median() ', 10*'=')
print(data.median())
print("="*10, ' data.describe(include="all") ', 10*'=')
print(data.describe(include='all'))
data.describe(include="all").to_csv(r'stats.txt', header=None, index=None, sep='\t', mode='a')