mirror of
https://github.com/andre-wojtowicz/uci-ml-to-r.git
synced 2024-11-03 14:20:28 +01:00
40 lines
1.8 KiB
R
40 lines
1.8 KiB
R
preprocess.dataset = function()
|
|
{
|
|
xls.file = "default of credit card clients.xls"
|
|
|
|
wk = loadWorkbook(file.path(orig.dir, xls.file))
|
|
dataset = readWorksheet(wk, sheet = "Data", startRow = 2, startCol = 2,
|
|
check.names = FALSE)
|
|
|
|
dataset = dataset %>%
|
|
mutate(LIMIT_BAL = as.integer(LIMIT_BAL),
|
|
SEX = factor(SEX),
|
|
EDUCATION = factor(EDUCATION), # can not order due to
|
|
# inconsistency with
|
|
# UCI description
|
|
MARRIAGE = factor(MARRIAGE),
|
|
AGE = as.integer(AGE),
|
|
PAY_0 = as.integer(replace(PAY_0, PAY_0 < 0, 0)),
|
|
PAY_2 = as.integer(replace(PAY_2, PAY_2 < 0, 0)),
|
|
PAY_3 = as.integer(replace(PAY_3, PAY_3 < 0, 0)),
|
|
PAY_4 = as.integer(replace(PAY_4, PAY_4 < 0, 0)),
|
|
PAY_5 = as.integer(replace(PAY_5, PAY_5 < 0, 0)),
|
|
PAY_6 = as.integer(replace(PAY_6, PAY_6 < 0, 0)),
|
|
BILL_AMT1 = as.integer(BILL_AMT1),
|
|
BILL_AMT2 = as.integer(BILL_AMT2),
|
|
BILL_AMT3 = as.integer(BILL_AMT3),
|
|
BILL_AMT4 = as.integer(BILL_AMT4),
|
|
BILL_AMT5 = as.integer(BILL_AMT5),
|
|
BILL_AMT6 = as.integer(BILL_AMT6),
|
|
PAY_AMT1 = as.integer(PAY_AMT1),
|
|
PAY_AMT2 = as.integer(PAY_AMT2),
|
|
PAY_AMT3 = as.integer(PAY_AMT3),
|
|
PAY_AMT4 = as.integer(PAY_AMT4),
|
|
PAY_AMT5 = as.integer(PAY_AMT5),
|
|
PAY_AMT6 = as.integer(PAY_AMT6),
|
|
`default payment next month` =
|
|
factor(`default payment next month`)
|
|
)
|
|
|
|
return(dataset)
|
|
} |