mirror of
https://github.com/andre-wojtowicz/uci-ml-to-r.git
synced 2024-10-01 12:50:36 +02:00
43 lines
1.0 KiB
R
43 lines
1.0 KiB
R
|
rm(list=ls())
|
||
|
|
||
|
source("config.R")
|
||
|
source("utils.R")
|
||
|
|
||
|
library(dplyr)
|
||
|
library(foreign)
|
||
|
library(XLConnect)
|
||
|
|
||
|
flog.info("Started preprocessing dataset collection")
|
||
|
|
||
|
for (dir.name in dir(PATH_DATASETS))
|
||
|
{
|
||
|
flog.info(paste("Dataset:", dir.name))
|
||
|
|
||
|
orig.dir = gsub("\\*", dir.name, PATH_DATASET_ORIGINAL)
|
||
|
dest.dir = gsub("\\*", dir.name, PATH_DATASET_PREPROCESSED)
|
||
|
dest.file.path = paste0(dest.dir, FILE_PREPROCESSED_OUTPUT)
|
||
|
|
||
|
if (file.exists(dest.file.path))
|
||
|
{
|
||
|
flog.warn(paste("Target file", basename(dest.file.path),
|
||
|
"already exists; skipping..."))
|
||
|
flog.info("*****")
|
||
|
next
|
||
|
}
|
||
|
|
||
|
r.src.file = paste0(PATH_DATASETS, dir.name, "/", FILE_PREPROCESSING_SCRIPT)
|
||
|
|
||
|
source(r.src.file)
|
||
|
dataset = preprocessDataset()
|
||
|
|
||
|
printDatasetStatistics(dataset)
|
||
|
|
||
|
mkdir(dest.dir)
|
||
|
saveRDS(dataset, dest.file.path)
|
||
|
|
||
|
flog.info(paste("Created preprocessed file", FILE_PREPROCESSED_OUTPUT))
|
||
|
|
||
|
flog.info("*****")
|
||
|
}
|
||
|
|
||
|
flog.info("Finished preprocessing dataset collection")
|