diff --git a/config-parallel.R b/config-parallel.R index cafecc9..aaf505d 100644 --- a/config-parallel.R +++ b/config-parallel.R @@ -3,6 +3,7 @@ PARALLEL.USED.METHOD = "REMOTE" # LOCAL or REMOTE PARALLEL.DISABLE.MKL.THREADS = TRUE +PARALLEL.RENICE = 19 # [-20; 19] or NA # local PARALLEL.LOCAL.METHOD = "PSOCK" diff --git a/init-parallel.R b/init-parallel.R index cb1300c..8ad35c9 100644 --- a/init-parallel.R +++ b/init-parallel.R @@ -15,7 +15,9 @@ cl = if (PARALLEL.USED.METHOD == "LOCAL") make.psock.cluster( names = PARALLEL.LOCAL.NODES, connection.timeout = PARALLEL.LOCAL.CONNECTION.TIMEOUT, - outfile = PARALLEL.LOCAL.SLAVE.OUT.FILE) + outfile = PARALLEL.LOCAL.SLAVE.OUT.FILE, + renice = PARALLEL.RENICE + ) } else if (PARALLEL.LOCAL.METHOD == "FORK") { flog.info("Creating local FORK cluster") @@ -49,7 +51,8 @@ cl = if (PARALLEL.USED.METHOD == "LOCAL") rscript = PARALLEL.REMOTE.SLAVE.RSCRIPT.PATH, homogeneous = PARALLEL.REMOTE.SLAVE.HOMOGENEOUS, methods = PARALLEL.REMOTE.SLAVE.METHODS, - useXDR = PARALLEL.REMOTE.SLAVE.USEXDR) + useXDR = PARALLEL.REMOTE.SLAVE.USEXDR, + renice = PARALLEL.RENICE) } else { stop.script(paste("Unknown remote parallel cluster method:", diff --git a/remote-commands.sh b/remote-commands.sh index 0d7fe3d..be0ef9f 100644 --- a/remote-commands.sh +++ b/remote-commands.sh @@ -16,10 +16,12 @@ MRO_INSTALL_URL="https://mran.microsoft.com/install" HOSTS_FILE="remote-hosts.txt" CONNECTION_LIST_FILE="remote-connection-list.txt" HOSTS_SCANNED_FILE="remote-hosts-scanned.txt" -DEBIAN_PACKAGES_TO_INSTALL="build-essential gfortran ed htop libxml2-dev ca-certificates curl libcurl4-openssl-dev gdebi-core sshpass default-jre default-jdk libpcre3-dev zlib1g-dev liblzma-dev libbz2-dev libicu-dev" +DEBIAN_PACKAGES_TO_INSTALL="build-essential gfortran ed htop libxml2-dev ca-certificates curl libcurl4-openssl-dev gdebi-core sshpass default-jre default-jdk libpcre3-dev zlib1g-dev liblzma-dev libbz2-dev libicu-dev at" REMOTE_DETECT_LOGICAL_CPUS="FALSE" MIN_HOSTS=1 SWAP_PART="/dev/mapper/linux-swap" +NEW_PASS="" +POWEROFF_TIME="7:00" SHELL_SCRIPT=$(basename $0) LOG_STEPS="logs/${SHELL_SCRIPT%.*}".log @@ -243,6 +245,28 @@ hosts_push_ssh_key() check_if_command_error } +hosts_change_password() +{ + info "Changing user password on hosts" + for host in "${HOSTS_ARRAY[@]}"; do + step "-- ${host}" + try ssh ${SSH_OPTIONS} -i ${SSH_KEYS_DIR}/${SSH_KEY_PRIV} ${SSH_USER}@${host} "chpasswd <<< $SSH_USER:$NEW_PASS" + next + done + check_if_command_error +} + +hosts_set_power_off() +{ + info "Setting power-off on hosts" + for host in "${HOSTS_ARRAY[@]}"; do + step "-- ${host}" + try ssh ${SSH_OPTIONS} -i ${SSH_KEYS_DIR}/${SSH_KEY_PRIV} ${SSH_USER}@${host} "at $POWEROFF_TIME <<< poweroff &> /dev/null" + next + done + check_if_command_error +} + hosts_scan_available() { HOSTS_SCANNED_ARRAY=() @@ -480,11 +504,13 @@ my_configure_hosts() #generate_ssh_keys #hosts_push_ssh_key hosts_scan_available + hosts_change_password hosts_push_shell_script dump_project_r_files dump_r_libraries hosts_push_project_r_files hosts_install_env + hosts_set_power_off hosts_install_mro #hosts_install_r_libraries hosts_push_r_libraries_dump @@ -496,12 +522,14 @@ configure_hosts() { generate_ssh_keys hosts_push_ssh_key + hosts_change_password hosts_push_shell_script hosts_enable_swap dump_project_r_files dump_r_libraries hosts_push_project_r_files hosts_install_env + hosts_set_power_off hosts_install_mro hosts_push_r_libraries_dump #hosts_install_r_libraries @@ -509,6 +537,12 @@ configure_hosts() #make_remote_connection_list_single } +# check if new password is set + +if [ "$NEW_PASS" == "" ]; then + warn "Empty new password" +fi + # read hosts from file or stdin if [ -t 0 ]; then diff --git a/utils-parallel.R b/utils-parallel.R index d5abc06..3e5d93c 100644 --- a/utils-parallel.R +++ b/utils-parallel.R @@ -33,9 +33,9 @@ make.psock.cluster = function(names, connection.timeout, ...) tryCatch({ cl.node = - evalWithTimeout(parallel:::newPSOCKnode(names[[i]], - options = options.copy, - rank = i), + evalWithTimeout(new.psock.node(names[[i]], + options = options.copy, + rank = i), timeout = connection.timeout, onTimeout = "error") cl[[i]] = cl.node @@ -79,6 +79,65 @@ make.psock.cluster = function(names, connection.timeout, ...) cl.filtered } +new.psock.node = function(machine = "localhost", ..., + options = parallel:::defaultClusterOptions, rank) +{ + options <- parallel:::addClusterOptions(options, list(...)) + if (is.list(machine)) { + options <- parallel:::addClusterOptions(options, machine) + machine <- machine$host + } + outfile <- parallel:::getClusterOption("outfile", options) + master <- if (machine == "localhost") + "localhost" + else parallel:::getClusterOption("master", options) + port <- parallel:::getClusterOption("port", options) + manual <- parallel:::getClusterOption("manual", options) + timeout <- parallel:::getClusterOption("timeout", options) + methods <- parallel:::getClusterOption("methods", options) + useXDR <- parallel:::getClusterOption("useXDR", options) + env <- paste0("MASTER=", master, " PORT=", port, " OUT=", + outfile, " TIMEOUT=", timeout, " XDR=", useXDR) + arg <- "parallel:::.slaveRSOCK()" + rscript <- if (parallel:::getClusterOption("homogeneous", options)) { + shQuote(parallel:::getClusterOption("rscript", options)) + } + else "Rscript" + rscript_args <- parallel:::getClusterOption("rscript_args", options) + if (methods) + rscript_args <- c("--default-packages=datasets,utils,grDevices,graphics,stats,methods", + rscript_args) + cmd <- if (length(rscript_args)) + paste(rscript, paste(rscript_args, collapse = " "), "-e", + shQuote(arg), env) + else paste(rscript, "-e", shQuote(arg), env) + renice <- parallel:::getClusterOption("renice", options) + if (!is.na(renice) && renice) + cmd <- sprintf("nice -%d %s", as.integer(renice), cmd) + if (manual) { + cat("Manually start worker on", machine, "with\n ", + cmd, "\n") + utils::flush.console() + } + else { + if (machine != "localhost") { + rshcmd <- parallel:::getClusterOption("rshcmd", options) + user <- parallel:::getClusterOption("user", options) + cmd <- shQuote(cmd) + cmd <- paste(rshcmd, "-l", user, machine, cmd) + } + if (.Platform$OS.type == "windows") { + system(cmd, wait = FALSE, input = "") + } + else system(cmd, wait = FALSE) + } + con <- socketConnection("localhost", port = port, server = TRUE, + blocking = TRUE, open = "a+b", timeout = timeout) + structure(list(con = con, host = machine, rank = rank), class = if (useXDR) + "SOCKnode" + else "SOCK0node") +} + stop.cluster = function(cl.to.stop = cl) { flog.info("Workers shut down") diff --git a/utils.R b/utils.R index 64ae030..1b42298 100644 --- a/utils.R +++ b/utils.R @@ -17,10 +17,11 @@ stop.script = function(error) if (is.character(error)) { flog.error(error) + } else if ("message" %in% attributes(x)$names) { + flog.error(error$message) } else { - flog.error(getMessage(error)) + try(flog.error(getMessage(error)), silent = TRUE) } throw(error) } -