1
0
mirror of https://github.com/andre-wojtowicz/uci-ml-to-r.git synced 2024-11-03 14:20:28 +01:00
uci-ml-to-r/data-collection/credit-card/preprocess.R

48 lines
2.1 KiB
R

preprocessDataset = function()
{
#set.seed(SEED)
xls.file = "default of credit card clients.xls"
wk = loadWorkbook(paste0(orig.dir, "/", xls.file))
dataset = readWorksheet(wk, sheet="Data", startRow=2, startCol=2,
check.names=FALSE)
dataset = dataset %>%
mutate(LIMIT_BAL=as.integer(LIMIT_BAL),
SEX=factor(SEX),
EDUCATION=factor(EDUCATION), # can't order due to
# inconsistency with
# UCI description
MARRIAGE=factor(MARRIAGE),
AGE=as.integer(AGE),
PAY_0=as.integer(replace(PAY_0, PAY_0<0, 0)),
PAY_2=as.integer(replace(PAY_2, PAY_2<0, 0)),
PAY_3=as.integer(replace(PAY_3, PAY_3<0, 0)),
PAY_4=as.integer(replace(PAY_4, PAY_4<0, 0)),
PAY_5=as.integer(replace(PAY_5, PAY_5<0, 0)),
PAY_6=as.integer(replace(PAY_6, PAY_6<0, 0)),
BILL_AMT1=as.integer(BILL_AMT1),
BILL_AMT2=as.integer(BILL_AMT2),
BILL_AMT3=as.integer(BILL_AMT3),
BILL_AMT4=as.integer(BILL_AMT4),
BILL_AMT5=as.integer(BILL_AMT5),
BILL_AMT6=as.integer(BILL_AMT6),
PAY_AMT1=as.integer(PAY_AMT1),
PAY_AMT2=as.integer(PAY_AMT2),
PAY_AMT3=as.integer(PAY_AMT3),
PAY_AMT4=as.integer(PAY_AMT4),
PAY_AMT5=as.integer(PAY_AMT5),
PAY_AMT6=as.integer(PAY_AMT6),
`default payment next month`=factor(
`default payment next month`)
)
#dataset.1 = dataset %>% filter(`default payment next month` == 1)
#dataset.0 = dataset %>% filter(`default payment next month` == 0) %>%
# sample_n(nrow(dataset.1))
#
#dataset = rbind(dataset.0, dataset.1)
return(dataset)
}