2016-07-17 02:35:55 +02:00
|
|
|
preprocess.dataset = function()
|
2016-04-15 15:44:49 +02:00
|
|
|
{
|
|
|
|
xls.file = "CTG.xls"
|
|
|
|
|
2016-07-17 02:35:55 +02:00
|
|
|
wk = loadWorkbook(file.path(orig.dir, xls.file))
|
|
|
|
dataset = readWorksheet(wk, sheet = "Raw Data")
|
2016-04-15 15:44:49 +02:00
|
|
|
|
|
|
|
dataset = dataset %>% select(LB:FS, NSP, -c(DS, DR)) %>%
|
2016-07-17 02:35:55 +02:00
|
|
|
filter(complete.cases(.)) %>%
|
|
|
|
mutate(LB = as.integer(LB),
|
|
|
|
AC = as.integer(AC),
|
|
|
|
FM = as.integer(FM),
|
|
|
|
UC = as.integer(UC),
|
|
|
|
ASTV = as.integer(ASTV),
|
|
|
|
ALTV = as.integer(ALTV),
|
|
|
|
DL = as.integer(DL),
|
|
|
|
DP = as.integer(DP),
|
|
|
|
Width = as.integer(Width),
|
|
|
|
Min = as.integer(Min),
|
|
|
|
Max = as.integer(Max),
|
|
|
|
Nmax = as.integer(Nmax),
|
|
|
|
Nzeros = as.integer(Nzeros),
|
|
|
|
Mode = as.integer(Mode),
|
|
|
|
Mean = as.integer(Mean),
|
|
|
|
Median = as.integer(Median),
|
|
|
|
Variance = as.integer(Variance),
|
|
|
|
Tendency = factor(Tendency, levels = c(-1, 0, 1),
|
|
|
|
ordered = TRUE),
|
|
|
|
A = factor(A),
|
|
|
|
B = factor(B),
|
|
|
|
C = factor(C),
|
|
|
|
D = factor(D),
|
|
|
|
E = factor(E),
|
|
|
|
AD = factor(AD),
|
|
|
|
DE = factor(DE),
|
|
|
|
LD = factor(LD),
|
|
|
|
FS = factor(FS),
|
|
|
|
NSP = factor(replace(NSP, NSP == 2, 3)))
|
2016-04-15 15:44:49 +02:00
|
|
|
|
|
|
|
return(dataset)
|
|
|
|
}
|