--- title: "UCI Machine Learning datasets for R" author: "Andrzej Wójtowicz" output: html_document: keep_md: yes --- ```{r global-options, include=FALSE} knitr::opts_chunk$set(comment="", echo=FALSE, warning=FALSE, message=FALSE) source('config.R') ``` Document generation date: `r Sys.time()`. ```{r show-datasets, results='asis'} library(yaml) cat("\n# Table of Contents\n\n") for (dir.name in dir(PATH_DATASETS)) { config.yaml.file.path = paste0(PATH_DATASETS, dir.name, "/", FILE_CONFIG_YAML) config.yaml = yaml.load_file(config.yaml.file.path) anchor = gsub(" ", "-", gsub("[[:punct:]]", "", tolower(config.yaml$name))) cat(paste0("1. [", config.yaml$name, "](#", anchor, ")\n" )) } cat("\n---\n\n") for (dir.name in dir(PATH_DATASETS)) { config.yaml.file.path = paste0(PATH_DATASETS, dir.name, "/", FILE_CONFIG_YAML) config.yaml = yaml.load_file(config.yaml.file.path) cat(paste("#", config.yaml$name, "\n\n")) cat(paste("**Local directory**:", dir.name, "\n\n")) cat(paste0("**Details**: [link](", config.yaml$info, ")\n\n")) cat(paste("**Source data files**:\n\n")) for (file.url in config.yaml$urls) { cat(paste0("* [", URLdecode(basename(file.url)), "](", file.url, ")\n")) } cat("\n") cat(paste0("**Cite**:\n```nohighlight\n", config.yaml$cite, "\n```\n\n")) cat(paste("**Dataset**:\n\n")) preprocessed.dir = gsub("\\*", dir.name, PATH_DATASET_PREPROCESSED) preprocessed.file.path = paste0(preprocessed.dir, FILE_PREPROCESSED_OUTPUT) dataset = readRDS(preprocessed.file.path) cat("```nohighlight\n") cat(str(dataset)) cat("\n```\n\n") cat("**Predictors**:\n\n") df.pred = data.frame(table(sapply(dataset[, 1:(ncol(dataset)-1)], function(f){paste(class(f), collapse=" ")}))) colnames(df.pred) = c("Type", "Frequency") cat(knitr::kable(df.pred, format="markdown"), sep="\n") cat("\n") perc.classes = sort(round(100*as.numeric( table(dataset[, ncol(dataset)]))/nrow(dataset), 0)) num.classes = sort(as.numeric(table(dataset[, ncol(dataset)]))) cat("**Class imbalance**:\n\n") cat(knitr::kable(data.frame(A=c(paste(perc.classes[1], "%"), num.classes[1]), B=c(paste(perc.classes[2], "%"), num.classes[2])), format="markdown", col.names=c("class A", " class B"), align=c("c", "c")), sep="\n") cat("\n---\n\n") } ```