mirror of
https://github.com/andre-wojtowicz/r-distributed-computing
synced 2024-11-24 15:45:26 +01:00
added renice, changing passwords, setting poweroff;
fixed displaying errors
This commit is contained in:
parent
3acc1457b7
commit
2db3c06d02
@ -3,6 +3,7 @@
|
||||
PARALLEL.USED.METHOD = "REMOTE" # LOCAL or REMOTE
|
||||
PARALLEL.DISABLE.MKL.THREADS = TRUE
|
||||
|
||||
PARALLEL.RENICE = 19 # [-20; 19] or NA
|
||||
|
||||
# local
|
||||
PARALLEL.LOCAL.METHOD = "PSOCK"
|
||||
|
@ -15,7 +15,9 @@ cl = if (PARALLEL.USED.METHOD == "LOCAL")
|
||||
make.psock.cluster(
|
||||
names = PARALLEL.LOCAL.NODES,
|
||||
connection.timeout = PARALLEL.LOCAL.CONNECTION.TIMEOUT,
|
||||
outfile = PARALLEL.LOCAL.SLAVE.OUT.FILE)
|
||||
outfile = PARALLEL.LOCAL.SLAVE.OUT.FILE,
|
||||
renice = PARALLEL.RENICE
|
||||
)
|
||||
} else if (PARALLEL.LOCAL.METHOD == "FORK")
|
||||
{
|
||||
flog.info("Creating local FORK cluster")
|
||||
@ -49,7 +51,8 @@ cl = if (PARALLEL.USED.METHOD == "LOCAL")
|
||||
rscript = PARALLEL.REMOTE.SLAVE.RSCRIPT.PATH,
|
||||
homogeneous = PARALLEL.REMOTE.SLAVE.HOMOGENEOUS,
|
||||
methods = PARALLEL.REMOTE.SLAVE.METHODS,
|
||||
useXDR = PARALLEL.REMOTE.SLAVE.USEXDR)
|
||||
useXDR = PARALLEL.REMOTE.SLAVE.USEXDR,
|
||||
renice = PARALLEL.RENICE)
|
||||
}
|
||||
else {
|
||||
stop.script(paste("Unknown remote parallel cluster method:",
|
||||
|
@ -16,10 +16,12 @@ MRO_INSTALL_URL="https://mran.microsoft.com/install"
|
||||
HOSTS_FILE="remote-hosts.txt"
|
||||
CONNECTION_LIST_FILE="remote-connection-list.txt"
|
||||
HOSTS_SCANNED_FILE="remote-hosts-scanned.txt"
|
||||
DEBIAN_PACKAGES_TO_INSTALL="build-essential gfortran ed htop libxml2-dev ca-certificates curl libcurl4-openssl-dev gdebi-core sshpass default-jre default-jdk libpcre3-dev zlib1g-dev liblzma-dev libbz2-dev libicu-dev"
|
||||
DEBIAN_PACKAGES_TO_INSTALL="build-essential gfortran ed htop libxml2-dev ca-certificates curl libcurl4-openssl-dev gdebi-core sshpass default-jre default-jdk libpcre3-dev zlib1g-dev liblzma-dev libbz2-dev libicu-dev at"
|
||||
REMOTE_DETECT_LOGICAL_CPUS="FALSE"
|
||||
MIN_HOSTS=1
|
||||
SWAP_PART="/dev/mapper/linux-swap"
|
||||
NEW_PASS=""
|
||||
POWEROFF_TIME="7:00"
|
||||
|
||||
SHELL_SCRIPT=$(basename $0)
|
||||
LOG_STEPS="logs/${SHELL_SCRIPT%.*}".log
|
||||
@ -243,6 +245,28 @@ hosts_push_ssh_key()
|
||||
check_if_command_error
|
||||
}
|
||||
|
||||
hosts_change_password()
|
||||
{
|
||||
info "Changing user password on hosts"
|
||||
for host in "${HOSTS_ARRAY[@]}"; do
|
||||
step "-- ${host}"
|
||||
try ssh ${SSH_OPTIONS} -i ${SSH_KEYS_DIR}/${SSH_KEY_PRIV} ${SSH_USER}@${host} "chpasswd <<< $SSH_USER:$NEW_PASS"
|
||||
next
|
||||
done
|
||||
check_if_command_error
|
||||
}
|
||||
|
||||
hosts_set_power_off()
|
||||
{
|
||||
info "Setting power-off on hosts"
|
||||
for host in "${HOSTS_ARRAY[@]}"; do
|
||||
step "-- ${host}"
|
||||
try ssh ${SSH_OPTIONS} -i ${SSH_KEYS_DIR}/${SSH_KEY_PRIV} ${SSH_USER}@${host} "at $POWEROFF_TIME <<< poweroff &> /dev/null"
|
||||
next
|
||||
done
|
||||
check_if_command_error
|
||||
}
|
||||
|
||||
hosts_scan_available()
|
||||
{
|
||||
HOSTS_SCANNED_ARRAY=()
|
||||
@ -480,11 +504,13 @@ my_configure_hosts()
|
||||
#generate_ssh_keys
|
||||
#hosts_push_ssh_key
|
||||
hosts_scan_available
|
||||
hosts_change_password
|
||||
hosts_push_shell_script
|
||||
dump_project_r_files
|
||||
dump_r_libraries
|
||||
hosts_push_project_r_files
|
||||
hosts_install_env
|
||||
hosts_set_power_off
|
||||
hosts_install_mro
|
||||
#hosts_install_r_libraries
|
||||
hosts_push_r_libraries_dump
|
||||
@ -496,12 +522,14 @@ configure_hosts()
|
||||
{
|
||||
generate_ssh_keys
|
||||
hosts_push_ssh_key
|
||||
hosts_change_password
|
||||
hosts_push_shell_script
|
||||
hosts_enable_swap
|
||||
dump_project_r_files
|
||||
dump_r_libraries
|
||||
hosts_push_project_r_files
|
||||
hosts_install_env
|
||||
hosts_set_power_off
|
||||
hosts_install_mro
|
||||
hosts_push_r_libraries_dump
|
||||
#hosts_install_r_libraries
|
||||
@ -509,6 +537,12 @@ configure_hosts()
|
||||
#make_remote_connection_list_single
|
||||
}
|
||||
|
||||
# check if new password is set
|
||||
|
||||
if [ "$NEW_PASS" == "" ]; then
|
||||
warn "Empty new password"
|
||||
fi
|
||||
|
||||
# read hosts from file or stdin
|
||||
|
||||
if [ -t 0 ]; then
|
||||
|
@ -33,7 +33,7 @@ make.psock.cluster = function(names, connection.timeout, ...)
|
||||
|
||||
tryCatch({
|
||||
cl.node =
|
||||
evalWithTimeout(parallel:::newPSOCKnode(names[[i]],
|
||||
evalWithTimeout(new.psock.node(names[[i]],
|
||||
options = options.copy,
|
||||
rank = i),
|
||||
timeout = connection.timeout,
|
||||
@ -79,6 +79,65 @@ make.psock.cluster = function(names, connection.timeout, ...)
|
||||
cl.filtered
|
||||
}
|
||||
|
||||
new.psock.node = function(machine = "localhost", ...,
|
||||
options = parallel:::defaultClusterOptions, rank)
|
||||
{
|
||||
options <- parallel:::addClusterOptions(options, list(...))
|
||||
if (is.list(machine)) {
|
||||
options <- parallel:::addClusterOptions(options, machine)
|
||||
machine <- machine$host
|
||||
}
|
||||
outfile <- parallel:::getClusterOption("outfile", options)
|
||||
master <- if (machine == "localhost")
|
||||
"localhost"
|
||||
else parallel:::getClusterOption("master", options)
|
||||
port <- parallel:::getClusterOption("port", options)
|
||||
manual <- parallel:::getClusterOption("manual", options)
|
||||
timeout <- parallel:::getClusterOption("timeout", options)
|
||||
methods <- parallel:::getClusterOption("methods", options)
|
||||
useXDR <- parallel:::getClusterOption("useXDR", options)
|
||||
env <- paste0("MASTER=", master, " PORT=", port, " OUT=",
|
||||
outfile, " TIMEOUT=", timeout, " XDR=", useXDR)
|
||||
arg <- "parallel:::.slaveRSOCK()"
|
||||
rscript <- if (parallel:::getClusterOption("homogeneous", options)) {
|
||||
shQuote(parallel:::getClusterOption("rscript", options))
|
||||
}
|
||||
else "Rscript"
|
||||
rscript_args <- parallel:::getClusterOption("rscript_args", options)
|
||||
if (methods)
|
||||
rscript_args <- c("--default-packages=datasets,utils,grDevices,graphics,stats,methods",
|
||||
rscript_args)
|
||||
cmd <- if (length(rscript_args))
|
||||
paste(rscript, paste(rscript_args, collapse = " "), "-e",
|
||||
shQuote(arg), env)
|
||||
else paste(rscript, "-e", shQuote(arg), env)
|
||||
renice <- parallel:::getClusterOption("renice", options)
|
||||
if (!is.na(renice) && renice)
|
||||
cmd <- sprintf("nice -%d %s", as.integer(renice), cmd)
|
||||
if (manual) {
|
||||
cat("Manually start worker on", machine, "with\n ",
|
||||
cmd, "\n")
|
||||
utils::flush.console()
|
||||
}
|
||||
else {
|
||||
if (machine != "localhost") {
|
||||
rshcmd <- parallel:::getClusterOption("rshcmd", options)
|
||||
user <- parallel:::getClusterOption("user", options)
|
||||
cmd <- shQuote(cmd)
|
||||
cmd <- paste(rshcmd, "-l", user, machine, cmd)
|
||||
}
|
||||
if (.Platform$OS.type == "windows") {
|
||||
system(cmd, wait = FALSE, input = "")
|
||||
}
|
||||
else system(cmd, wait = FALSE)
|
||||
}
|
||||
con <- socketConnection("localhost", port = port, server = TRUE,
|
||||
blocking = TRUE, open = "a+b", timeout = timeout)
|
||||
structure(list(con = con, host = machine, rank = rank), class = if (useXDR)
|
||||
"SOCKnode"
|
||||
else "SOCK0node")
|
||||
}
|
||||
|
||||
stop.cluster = function(cl.to.stop = cl)
|
||||
{
|
||||
flog.info("Workers shut down")
|
||||
|
Loading…
Reference in New Issue
Block a user