mirror of
https://github.com/andre-wojtowicz/r-distributed-computing
synced 2025-01-04 18:45:28 +01:00
Added enabling swap and investigating dmesg, changed method for detecting logical CPUs, added info about tests.
This commit is contained in:
parent
d213ed6cf6
commit
5eaccf86e6
@ -1,6 +1,6 @@
|
||||
# R distributed computing
|
||||
|
||||
In this repository I show a practical solution to massive distributed computing in R. I have successfully tested this approach in my research with ~200 remote hosts and ~800 remote connections. Brief examples concern [caret](https://topepo.github.io/caret/) package for creating predictive models and [foreach](https://cran.r-project.org/web/packages/doParallel/index.html) loop for more general parallel computing.
|
||||
In this repository I show a practical solution to massive distributed computing in R. I have successfully tested this approach in my research with ~200 remote hosts (and ~800 remote connections) on computers located in [Faculty of Mathematics and Computer Science, Adam Mickiewicz University in Poznań](https://translate.google.com/translate?hl=pl&sl=pl&tl=en&u=https://laboratoria.wmi.amu.edu.pl/Laboratoria). Brief examples concern [caret](https://topepo.github.io/caret/) package for creating predictive models and [foreach](https://cran.r-project.org/web/packages/doParallel/index.html) loop for more general parallel computing.
|
||||
|
||||
The solution is based on:
|
||||
* [Microsoft R Open](http://mran.microsoft.com/),
|
||||
@ -88,6 +88,7 @@ The `configure_hosts` is a short name for basic execution order:
|
||||
1. `generate_ssh_keys` - generates ssh keys to communicate with hosts (see `SSH_*` variables),
|
||||
1. `hosts_push_ssh_key` - pushes the keys to hosts and disables password authentication,
|
||||
1. `hosts_push_shell_script` - pushes the bash script to hosts,
|
||||
1. `hosts_enable_swap` - enables on hosts swap defined in `SWAP_PART`,
|
||||
1. `dump_project_r_files` - gathers all R project files used in the project,
|
||||
1. `dump_r_libraries` - gathers all R libraries in `~/.checkpoint` directory,
|
||||
1. `hosts_push_project_r_files` - pushes gathered R project files to hosts,
|
||||
@ -106,7 +107,7 @@ If you want to make only one connection per node (regardless number of cores), y
|
||||
|
||||
Alternatively, instead of installing soft on [WMI rescue](http://rescue.wmi.amu.edu.pl), you can create and boot on hosts your own customized distro.
|
||||
|
||||
You can investigate hosts install logs through `hosts_check_install_log_*` functions. You can also check and clean remote worker logs through `hosts_check_worker_log` and `hosts_clean_worker_log` procedures, respectively.
|
||||
You can investigate hosts install logs through `hosts_check_install_log_*` functions. You can also check and clean remote worker logs through `hosts_check_worker_log` and `hosts_clean_worker_log` procedures, respectively. Moreover, you can investigate dmesg through `hosts_check_worker_dmesg`.
|
||||
|
||||
The functions (excluding `hosts_scan_available`) stop the script if any part of the procedure fails.
|
||||
|
||||
|
@ -19,6 +19,7 @@ HOSTS_SCANNED_FILE="remote-hosts-scanned.txt"
|
||||
DEBIAN_PACKAGES_TO_INSTALL="build-essential gfortran ed htop libxml2-dev ca-certificates curl libcurl4-openssl-dev gdebi-core sshpass default-jre default-jdk libpcre3-dev zlib1g-dev liblzma-dev libbz2-dev libicu-dev"
|
||||
REMOTE_DETECT_LOGICAL_CPUS="FALSE"
|
||||
MIN_HOSTS=1
|
||||
SWAP_PART="/dev/mapper/linux-swap"
|
||||
|
||||
SHELL_SCRIPT=$(basename $0)
|
||||
LOG_STEPS="logs/${SHELL_SCRIPT%.*}".log
|
||||
@ -279,6 +280,17 @@ hosts_scan_available()
|
||||
done
|
||||
}
|
||||
|
||||
hosts_enable_swap()
|
||||
{
|
||||
info "Enabling swap on hosts"
|
||||
for host in "${HOSTS_ARRAY[@]}"; do
|
||||
step "-- ${host}"
|
||||
try ssh ${SSH_OPTIONS} -i ${SSH_KEYS_DIR}/${SSH_KEY_PRIV} ${SSH_USER}@${host} "swapon $SWAP_PART"
|
||||
next
|
||||
done
|
||||
check_if_command_error
|
||||
}
|
||||
|
||||
hosts_push_r_libraries_dump()
|
||||
{
|
||||
info "Pushing R libraries dump to hosts"
|
||||
@ -383,8 +395,9 @@ make_remote_connection_list()
|
||||
info "'number of cores' per host"
|
||||
for host in "${HOSTS_ARRAY[@]}"; do
|
||||
step "-- ${host}"
|
||||
cornum=`try ssh ${SSH_OPTIONS} -i ${SSH_KEYS_DIR}/${SSH_KEY_PRIV} ${SSH_USER}@${host} '/usr/bin/Rscript -e "cat(parallel::detectCores(logical = ${REMOTE_DETECT_LOGICAL_CPUS}))"'`
|
||||
|
||||
[[ $REMOTE_DETECT_LOGICAL_CPUS == "TRUE" ]] && cornum=`ssh ${SSH_OPTIONS} -i ${SSH_KEYS_DIR}/${SSH_KEY_PRIV} ${SSH_USER}@${host} 'lscpu | grep "^CPU(s):" | grep -o "[0-9]*"'` || cornum=`ssh ${SSH_OPTIONS} -i ${SSH_KEYS_DIR}/${SSH_KEY_PRIV} ${SSH_USER}@${host} 'A=\$(lscpu | grep "Socket(s):" | grep -o "[0-9]*"); B=\$(lscpu | grep "Core(s) per socket:" | grep -o "[0-9]*"); echo \$((A*B))'`
|
||||
|
||||
regex='^[0-9]+$'
|
||||
if ! [[ $cornum =~ $regex ]] ; then
|
||||
try false
|
||||
@ -433,6 +446,18 @@ hosts_check_worker_log()
|
||||
check_if_command_error
|
||||
}
|
||||
|
||||
hosts_check_worker_dmesg()
|
||||
{
|
||||
info "Checking dmesg on hosts"
|
||||
for host in "${HOSTS_ARRAY[@]}"; do
|
||||
step "-- ${host}"
|
||||
echo
|
||||
try ssh ${SSH_OPTIONS/-q/} -o LogLevel=error -i ${SSH_KEYS_DIR}/${SSH_KEY_PRIV} ${SSH_USER}@${host} "dmesg -T | tail -n 20"
|
||||
next
|
||||
done
|
||||
check_if_command_error
|
||||
}
|
||||
|
||||
hosts_clean_worker_log()
|
||||
{
|
||||
info "Cleaning workers logs"
|
||||
@ -472,6 +497,7 @@ configure_hosts()
|
||||
generate_ssh_keys
|
||||
hosts_push_ssh_key
|
||||
hosts_push_shell_script
|
||||
hosts_enable_swap
|
||||
dump_project_r_files
|
||||
dump_r_libraries
|
||||
hosts_push_project_r_files
|
||||
|
Loading…
Reference in New Issue
Block a user