This commit is contained in:
s444439 2023-04-19 20:08:15 +02:00
parent 5ef8e1bd74
commit 69dbec64e7
2 changed files with 7 additions and 7 deletions

View File

@ -3,7 +3,7 @@ import os
from sklearn.model_selection import train_test_split
CUTOFF = int(os.environ['CUTOFF'])
adults = pandas.read_csv('adult.csv', engine='python', encoding='ISO-8859-1', sep=',')
adults = pandas.read_csv('adult.csv')
adults = adults.dropna()
adults = adults.sample(CUTOFF)

View File

@ -19,7 +19,7 @@ def convert_data_to_csv():
csv_file = "adult.csv"
df = pd.read_csv(data_file, header=None)
df.to_csv(csv_file, index=False)
delete_data_file()
# delete_data_file()
return csv_file
@ -113,8 +113,8 @@ def clean(data):
if __name__ == '__main__':
csv_file_name = download_file()
check_if_data_set_has_division_into_subsets(csv_file_name)
data = pd.read_csv(csv_file_name, dtype={"income": "category"})
get_statistics(data)
normalization(data)
clean(data)
# check_if_data_set_has_division_into_subsets(csv_file_name)
# data = pd.read_csv(csv_file_name, dtype={"income": "category"})
# get_statistics(data)
# normalization(data)
# clean(data)