update
This commit is contained in:
parent
5ef8e1bd74
commit
69dbec64e7
@ -3,7 +3,7 @@ import os
|
|||||||
from sklearn.model_selection import train_test_split
|
from sklearn.model_selection import train_test_split
|
||||||
|
|
||||||
CUTOFF = int(os.environ['CUTOFF'])
|
CUTOFF = int(os.environ['CUTOFF'])
|
||||||
adults = pandas.read_csv('adult.csv', engine='python', encoding='ISO-8859-1', sep=',')
|
adults = pandas.read_csv('adult.csv')
|
||||||
adults = adults.dropna()
|
adults = adults.dropna()
|
||||||
|
|
||||||
adults = adults.sample(CUTOFF)
|
adults = adults.sample(CUTOFF)
|
||||||
|
12
script.py
12
script.py
@ -19,7 +19,7 @@ def convert_data_to_csv():
|
|||||||
csv_file = "adult.csv"
|
csv_file = "adult.csv"
|
||||||
df = pd.read_csv(data_file, header=None)
|
df = pd.read_csv(data_file, header=None)
|
||||||
df.to_csv(csv_file, index=False)
|
df.to_csv(csv_file, index=False)
|
||||||
delete_data_file()
|
# delete_data_file()
|
||||||
return csv_file
|
return csv_file
|
||||||
|
|
||||||
|
|
||||||
@ -113,8 +113,8 @@ def clean(data):
|
|||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
csv_file_name = download_file()
|
csv_file_name = download_file()
|
||||||
check_if_data_set_has_division_into_subsets(csv_file_name)
|
# check_if_data_set_has_division_into_subsets(csv_file_name)
|
||||||
data = pd.read_csv(csv_file_name, dtype={"income": "category"})
|
# data = pd.read_csv(csv_file_name, dtype={"income": "category"})
|
||||||
get_statistics(data)
|
# get_statistics(data)
|
||||||
normalization(data)
|
# normalization(data)
|
||||||
clean(data)
|
# clean(data)
|
||||||
|
Loading…
Reference in New Issue
Block a user