update
This commit is contained in:
parent
5ef8e1bd74
commit
69dbec64e7
@ -3,7 +3,7 @@ import os
|
||||
from sklearn.model_selection import train_test_split
|
||||
|
||||
CUTOFF = int(os.environ['CUTOFF'])
|
||||
adults = pandas.read_csv('adult.csv', engine='python', encoding='ISO-8859-1', sep=',')
|
||||
adults = pandas.read_csv('adult.csv')
|
||||
adults = adults.dropna()
|
||||
|
||||
adults = adults.sample(CUTOFF)
|
||||
|
12
script.py
12
script.py
@ -19,7 +19,7 @@ def convert_data_to_csv():
|
||||
csv_file = "adult.csv"
|
||||
df = pd.read_csv(data_file, header=None)
|
||||
df.to_csv(csv_file, index=False)
|
||||
delete_data_file()
|
||||
# delete_data_file()
|
||||
return csv_file
|
||||
|
||||
|
||||
@ -113,8 +113,8 @@ def clean(data):
|
||||
|
||||
if __name__ == '__main__':
|
||||
csv_file_name = download_file()
|
||||
check_if_data_set_has_division_into_subsets(csv_file_name)
|
||||
data = pd.read_csv(csv_file_name, dtype={"income": "category"})
|
||||
get_statistics(data)
|
||||
normalization(data)
|
||||
clean(data)
|
||||
# check_if_data_set_has_division_into_subsets(csv_file_name)
|
||||
# data = pd.read_csv(csv_file_name, dtype={"income": "category"})
|
||||
# get_statistics(data)
|
||||
# normalization(data)
|
||||
# clean(data)
|
||||
|
Loading…
Reference in New Issue
Block a user