Added enabling swap and investigating dmesg, changed method for detecting logical CPUs, added info about tests.

This commit is contained in:
Andrzej Wójtowicz 2016-11-10 18:54:23 +01:00
parent d213ed6cf6
commit 5eaccf86e6
2 changed files with 30 additions and 3 deletions

View File

@ -1,6 +1,6 @@
# R distributed computing # R distributed computing
In this repository I show a practical solution to massive distributed computing in R. I have successfully tested this approach in my research with ~200 remote hosts and ~800 remote connections. Brief examples concern [caret](https://topepo.github.io/caret/) package for creating predictive models and [foreach](https://cran.r-project.org/web/packages/doParallel/index.html) loop for more general parallel computing. In this repository I show a practical solution to massive distributed computing in R. I have successfully tested this approach in my research with ~200 remote hosts (and ~800 remote connections) on computers located in [Faculty of Mathematics and Computer Science, Adam Mickiewicz University in Poznań](https://translate.google.com/translate?hl=pl&sl=pl&tl=en&u=https://laboratoria.wmi.amu.edu.pl/Laboratoria). Brief examples concern [caret](https://topepo.github.io/caret/) package for creating predictive models and [foreach](https://cran.r-project.org/web/packages/doParallel/index.html) loop for more general parallel computing.
The solution is based on: The solution is based on:
* [Microsoft R Open](http://mran.microsoft.com/), * [Microsoft R Open](http://mran.microsoft.com/),
@ -88,6 +88,7 @@ The `configure_hosts` is a short name for basic execution order:
1. `generate_ssh_keys` - generates ssh keys to communicate with hosts (see `SSH_*` variables), 1. `generate_ssh_keys` - generates ssh keys to communicate with hosts (see `SSH_*` variables),
1. `hosts_push_ssh_key` - pushes the keys to hosts and disables password authentication, 1. `hosts_push_ssh_key` - pushes the keys to hosts and disables password authentication,
1. `hosts_push_shell_script` - pushes the bash script to hosts, 1. `hosts_push_shell_script` - pushes the bash script to hosts,
1. `hosts_enable_swap` - enables on hosts swap defined in `SWAP_PART`,
1. `dump_project_r_files` - gathers all R project files used in the project, 1. `dump_project_r_files` - gathers all R project files used in the project,
1. `dump_r_libraries` - gathers all R libraries in `~/.checkpoint` directory, 1. `dump_r_libraries` - gathers all R libraries in `~/.checkpoint` directory,
1. `hosts_push_project_r_files` - pushes gathered R project files to hosts, 1. `hosts_push_project_r_files` - pushes gathered R project files to hosts,
@ -106,7 +107,7 @@ If you want to make only one connection per node (regardless number of cores), y
Alternatively, instead of installing soft on [WMI rescue](http://rescue.wmi.amu.edu.pl), you can create and boot on hosts your own customized distro. Alternatively, instead of installing soft on [WMI rescue](http://rescue.wmi.amu.edu.pl), you can create and boot on hosts your own customized distro.
You can investigate hosts install logs through `hosts_check_install_log_*` functions. You can also check and clean remote worker logs through `hosts_check_worker_log` and `hosts_clean_worker_log` procedures, respectively. You can investigate hosts install logs through `hosts_check_install_log_*` functions. You can also check and clean remote worker logs through `hosts_check_worker_log` and `hosts_clean_worker_log` procedures, respectively. Moreover, you can investigate dmesg through `hosts_check_worker_dmesg`.
The functions (excluding `hosts_scan_available`) stop the script if any part of the procedure fails. The functions (excluding `hosts_scan_available`) stop the script if any part of the procedure fails.

View File

@ -19,6 +19,7 @@ HOSTS_SCANNED_FILE="remote-hosts-scanned.txt"
DEBIAN_PACKAGES_TO_INSTALL="build-essential gfortran ed htop libxml2-dev ca-certificates curl libcurl4-openssl-dev gdebi-core sshpass default-jre default-jdk libpcre3-dev zlib1g-dev liblzma-dev libbz2-dev libicu-dev" DEBIAN_PACKAGES_TO_INSTALL="build-essential gfortran ed htop libxml2-dev ca-certificates curl libcurl4-openssl-dev gdebi-core sshpass default-jre default-jdk libpcre3-dev zlib1g-dev liblzma-dev libbz2-dev libicu-dev"
REMOTE_DETECT_LOGICAL_CPUS="FALSE" REMOTE_DETECT_LOGICAL_CPUS="FALSE"
MIN_HOSTS=1 MIN_HOSTS=1
SWAP_PART="/dev/mapper/linux-swap"
SHELL_SCRIPT=$(basename $0) SHELL_SCRIPT=$(basename $0)
LOG_STEPS="logs/${SHELL_SCRIPT%.*}".log LOG_STEPS="logs/${SHELL_SCRIPT%.*}".log
@ -279,6 +280,17 @@ hosts_scan_available()
done done
} }
hosts_enable_swap()
{
info "Enabling swap on hosts"
for host in "${HOSTS_ARRAY[@]}"; do
step "-- ${host}"
try ssh ${SSH_OPTIONS} -i ${SSH_KEYS_DIR}/${SSH_KEY_PRIV} ${SSH_USER}@${host} "swapon $SWAP_PART"
next
done
check_if_command_error
}
hosts_push_r_libraries_dump() hosts_push_r_libraries_dump()
{ {
info "Pushing R libraries dump to hosts" info "Pushing R libraries dump to hosts"
@ -383,8 +395,9 @@ make_remote_connection_list()
info "'number of cores' per host" info "'number of cores' per host"
for host in "${HOSTS_ARRAY[@]}"; do for host in "${HOSTS_ARRAY[@]}"; do
step "-- ${host}" step "-- ${host}"
cornum=`try ssh ${SSH_OPTIONS} -i ${SSH_KEYS_DIR}/${SSH_KEY_PRIV} ${SSH_USER}@${host} '/usr/bin/Rscript -e "cat(parallel::detectCores(logical = ${REMOTE_DETECT_LOGICAL_CPUS}))"'`
[[ $REMOTE_DETECT_LOGICAL_CPUS == "TRUE" ]] && cornum=`ssh ${SSH_OPTIONS} -i ${SSH_KEYS_DIR}/${SSH_KEY_PRIV} ${SSH_USER}@${host} 'lscpu | grep "^CPU(s):" | grep -o "[0-9]*"'` || cornum=`ssh ${SSH_OPTIONS} -i ${SSH_KEYS_DIR}/${SSH_KEY_PRIV} ${SSH_USER}@${host} 'A=\$(lscpu | grep "Socket(s):" | grep -o "[0-9]*"); B=\$(lscpu | grep "Core(s) per socket:" | grep -o "[0-9]*"); echo \$((A*B))'`
regex='^[0-9]+$' regex='^[0-9]+$'
if ! [[ $cornum =~ $regex ]] ; then if ! [[ $cornum =~ $regex ]] ; then
try false try false
@ -433,6 +446,18 @@ hosts_check_worker_log()
check_if_command_error check_if_command_error
} }
hosts_check_worker_dmesg()
{
info "Checking dmesg on hosts"
for host in "${HOSTS_ARRAY[@]}"; do
step "-- ${host}"
echo
try ssh ${SSH_OPTIONS/-q/} -o LogLevel=error -i ${SSH_KEYS_DIR}/${SSH_KEY_PRIV} ${SSH_USER}@${host} "dmesg -T | tail -n 20"
next
done
check_if_command_error
}
hosts_clean_worker_log() hosts_clean_worker_log()
{ {
info "Cleaning workers logs" info "Cleaning workers logs"
@ -472,6 +497,7 @@ configure_hosts()
generate_ssh_keys generate_ssh_keys
hosts_push_ssh_key hosts_push_ssh_key
hosts_push_shell_script hosts_push_shell_script
hosts_enable_swap
dump_project_r_files dump_project_r_files
dump_r_libraries dump_r_libraries
hosts_push_project_r_files hosts_push_project_r_files