From 5eaccf86e6dc803db28e5b0438594ea6d272ab56 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andrzej=20W=C3=B3jtowicz?= Date: Thu, 10 Nov 2016 18:54:23 +0100 Subject: [PATCH] Added enabling swap and investigating dmesg, changed method for detecting logical CPUs, added info about tests. --- README.md | 5 +++-- remote-commands.sh | 28 +++++++++++++++++++++++++++- 2 files changed, 30 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 0299b4d..55be768 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # R distributed computing -In this repository I show a practical solution to massive distributed computing in R. I have successfully tested this approach in my research with ~200 remote hosts and ~800 remote connections. Brief examples concern [caret](https://topepo.github.io/caret/) package for creating predictive models and [foreach](https://cran.r-project.org/web/packages/doParallel/index.html) loop for more general parallel computing. +In this repository I show a practical solution to massive distributed computing in R. I have successfully tested this approach in my research with ~200 remote hosts (and ~800 remote connections) on computers located in [Faculty of Mathematics and Computer Science, Adam Mickiewicz University in PoznaƄ](https://translate.google.com/translate?hl=pl&sl=pl&tl=en&u=https://laboratoria.wmi.amu.edu.pl/Laboratoria). Brief examples concern [caret](https://topepo.github.io/caret/) package for creating predictive models and [foreach](https://cran.r-project.org/web/packages/doParallel/index.html) loop for more general parallel computing. The solution is based on: * [Microsoft R Open](http://mran.microsoft.com/), @@ -88,6 +88,7 @@ The `configure_hosts` is a short name for basic execution order: 1. `generate_ssh_keys` - generates ssh keys to communicate with hosts (see `SSH_*` variables), 1. `hosts_push_ssh_key` - pushes the keys to hosts and disables password authentication, 1. `hosts_push_shell_script` - pushes the bash script to hosts, + 1. `hosts_enable_swap` - enables on hosts swap defined in `SWAP_PART`, 1. `dump_project_r_files` - gathers all R project files used in the project, 1. `dump_r_libraries` - gathers all R libraries in `~/.checkpoint` directory, 1. `hosts_push_project_r_files` - pushes gathered R project files to hosts, @@ -106,7 +107,7 @@ If you want to make only one connection per node (regardless number of cores), y Alternatively, instead of installing soft on [WMI rescue](http://rescue.wmi.amu.edu.pl), you can create and boot on hosts your own customized distro. -You can investigate hosts install logs through `hosts_check_install_log_*` functions. You can also check and clean remote worker logs through `hosts_check_worker_log` and `hosts_clean_worker_log` procedures, respectively. +You can investigate hosts install logs through `hosts_check_install_log_*` functions. You can also check and clean remote worker logs through `hosts_check_worker_log` and `hosts_clean_worker_log` procedures, respectively. Moreover, you can investigate dmesg through `hosts_check_worker_dmesg`. The functions (excluding `hosts_scan_available`) stop the script if any part of the procedure fails. diff --git a/remote-commands.sh b/remote-commands.sh index 7f60830..0d7fe3d 100644 --- a/remote-commands.sh +++ b/remote-commands.sh @@ -19,6 +19,7 @@ HOSTS_SCANNED_FILE="remote-hosts-scanned.txt" DEBIAN_PACKAGES_TO_INSTALL="build-essential gfortran ed htop libxml2-dev ca-certificates curl libcurl4-openssl-dev gdebi-core sshpass default-jre default-jdk libpcre3-dev zlib1g-dev liblzma-dev libbz2-dev libicu-dev" REMOTE_DETECT_LOGICAL_CPUS="FALSE" MIN_HOSTS=1 +SWAP_PART="/dev/mapper/linux-swap" SHELL_SCRIPT=$(basename $0) LOG_STEPS="logs/${SHELL_SCRIPT%.*}".log @@ -279,6 +280,17 @@ hosts_scan_available() done } +hosts_enable_swap() +{ + info "Enabling swap on hosts" + for host in "${HOSTS_ARRAY[@]}"; do + step "-- ${host}" + try ssh ${SSH_OPTIONS} -i ${SSH_KEYS_DIR}/${SSH_KEY_PRIV} ${SSH_USER}@${host} "swapon $SWAP_PART" + next + done + check_if_command_error +} + hosts_push_r_libraries_dump() { info "Pushing R libraries dump to hosts" @@ -383,8 +395,9 @@ make_remote_connection_list() info "'number of cores' per host" for host in "${HOSTS_ARRAY[@]}"; do step "-- ${host}" - cornum=`try ssh ${SSH_OPTIONS} -i ${SSH_KEYS_DIR}/${SSH_KEY_PRIV} ${SSH_USER}@${host} '/usr/bin/Rscript -e "cat(parallel::detectCores(logical = ${REMOTE_DETECT_LOGICAL_CPUS}))"'` + [[ $REMOTE_DETECT_LOGICAL_CPUS == "TRUE" ]] && cornum=`ssh ${SSH_OPTIONS} -i ${SSH_KEYS_DIR}/${SSH_KEY_PRIV} ${SSH_USER}@${host} 'lscpu | grep "^CPU(s):" | grep -o "[0-9]*"'` || cornum=`ssh ${SSH_OPTIONS} -i ${SSH_KEYS_DIR}/${SSH_KEY_PRIV} ${SSH_USER}@${host} 'A=\$(lscpu | grep "Socket(s):" | grep -o "[0-9]*"); B=\$(lscpu | grep "Core(s) per socket:" | grep -o "[0-9]*"); echo \$((A*B))'` + regex='^[0-9]+$' if ! [[ $cornum =~ $regex ]] ; then try false @@ -433,6 +446,18 @@ hosts_check_worker_log() check_if_command_error } +hosts_check_worker_dmesg() +{ + info "Checking dmesg on hosts" + for host in "${HOSTS_ARRAY[@]}"; do + step "-- ${host}" + echo + try ssh ${SSH_OPTIONS/-q/} -o LogLevel=error -i ${SSH_KEYS_DIR}/${SSH_KEY_PRIV} ${SSH_USER}@${host} "dmesg -T | tail -n 20" + next + done + check_if_command_error +} + hosts_clean_worker_log() { info "Cleaning workers logs" @@ -472,6 +497,7 @@ configure_hosts() generate_ssh_keys hosts_push_ssh_key hosts_push_shell_script + hosts_enable_swap dump_project_r_files dump_r_libraries hosts_push_project_r_files