ium_444463/stare_zadania/download_data_and_process.py
Mikołaj Pokrywka d36302317c NN with on value
2022-04-23 13:52:09 +02:00

30 lines
798 B
Python

import subprocess
import pandas as pd
import numpy as np
import subprocess
rc = subprocess.call("./process_data.sh")
# import kaggle
# kaggle.api.authenticate()
# kaggle.api.dataset_download_files('shivamb/real-or-fake-fake-jobposting-prediction', path='fake_job_postings.csv', unzip=True)
data=pd.read_csv('fake_job_postings.csv')
data = data.replace(np.nan, '', regex=True)
print("="*20)
print('Ilość wierszy w zbiorze: ',len(data))
print("="*10, ' data["department"].value_counts() ', 10*'=')
print(data["department"].value_counts())
print("="*10, ' data.median() ', 10*'=')
print(data.median())
print("="*10, ' data.describe(include="all") ', 10*'=')
print(data.describe(include='all'))
data.describe(include="all").to_csv(r'stats.txt', header=None, index=None, sep='\t', mode='a')