1
0
mirror of https://github.com/andre-wojtowicz/uci-ml-to-r.git synced 2024-07-22 07:35:30 +02:00
uci-ml-to-r/data-collection/breast-cancer-wisconsin-original/preprocess.R

21 lines
781 B
R

preprocess.dataset = function()
{
csv.file = "breast-cancer-wisconsin.data"
dataset = read.csv(file.path(orig.dir, csv.file), header = FALSE)
colnames(dataset) = c("Sample code number", "Clump Thickness",
"Uniformity of Cell Size", "Uniformity of Cell Shape",
"Marginal Adhesion", "Single Epithelial Cell Size",
"Bare Nuclei", "Bland Chromatin", "Normal Nucleoli",
"Mitoses", "Class")
dataset = dataset %>%
select(-`Sample code number`) %>%
filter(`Bare Nuclei` != "?") %>%
mutate(Class = factor(Class),
`Bare Nuclei` = as.integer(`Bare Nuclei`)) %>%
droplevels()
return(dataset)
}