mirror of
https://github.com/andre-wojtowicz/r-distributed-computing
synced 2024-11-28 16:00:28 +01:00
Added enabling swap and investigating dmesg, changed method for detecting logical CPUs, added info about tests.
This commit is contained in:
parent
d213ed6cf6
commit
5eaccf86e6
@ -1,6 +1,6 @@
|
|||||||
# R distributed computing
|
# R distributed computing
|
||||||
|
|
||||||
In this repository I show a practical solution to massive distributed computing in R. I have successfully tested this approach in my research with ~200 remote hosts and ~800 remote connections. Brief examples concern [caret](https://topepo.github.io/caret/) package for creating predictive models and [foreach](https://cran.r-project.org/web/packages/doParallel/index.html) loop for more general parallel computing.
|
In this repository I show a practical solution to massive distributed computing in R. I have successfully tested this approach in my research with ~200 remote hosts (and ~800 remote connections) on computers located in [Faculty of Mathematics and Computer Science, Adam Mickiewicz University in Poznań](https://translate.google.com/translate?hl=pl&sl=pl&tl=en&u=https://laboratoria.wmi.amu.edu.pl/Laboratoria). Brief examples concern [caret](https://topepo.github.io/caret/) package for creating predictive models and [foreach](https://cran.r-project.org/web/packages/doParallel/index.html) loop for more general parallel computing.
|
||||||
|
|
||||||
The solution is based on:
|
The solution is based on:
|
||||||
* [Microsoft R Open](http://mran.microsoft.com/),
|
* [Microsoft R Open](http://mran.microsoft.com/),
|
||||||
@ -88,6 +88,7 @@ The `configure_hosts` is a short name for basic execution order:
|
|||||||
1. `generate_ssh_keys` - generates ssh keys to communicate with hosts (see `SSH_*` variables),
|
1. `generate_ssh_keys` - generates ssh keys to communicate with hosts (see `SSH_*` variables),
|
||||||
1. `hosts_push_ssh_key` - pushes the keys to hosts and disables password authentication,
|
1. `hosts_push_ssh_key` - pushes the keys to hosts and disables password authentication,
|
||||||
1. `hosts_push_shell_script` - pushes the bash script to hosts,
|
1. `hosts_push_shell_script` - pushes the bash script to hosts,
|
||||||
|
1. `hosts_enable_swap` - enables on hosts swap defined in `SWAP_PART`,
|
||||||
1. `dump_project_r_files` - gathers all R project files used in the project,
|
1. `dump_project_r_files` - gathers all R project files used in the project,
|
||||||
1. `dump_r_libraries` - gathers all R libraries in `~/.checkpoint` directory,
|
1. `dump_r_libraries` - gathers all R libraries in `~/.checkpoint` directory,
|
||||||
1. `hosts_push_project_r_files` - pushes gathered R project files to hosts,
|
1. `hosts_push_project_r_files` - pushes gathered R project files to hosts,
|
||||||
@ -106,7 +107,7 @@ If you want to make only one connection per node (regardless number of cores), y
|
|||||||
|
|
||||||
Alternatively, instead of installing soft on [WMI rescue](http://rescue.wmi.amu.edu.pl), you can create and boot on hosts your own customized distro.
|
Alternatively, instead of installing soft on [WMI rescue](http://rescue.wmi.amu.edu.pl), you can create and boot on hosts your own customized distro.
|
||||||
|
|
||||||
You can investigate hosts install logs through `hosts_check_install_log_*` functions. You can also check and clean remote worker logs through `hosts_check_worker_log` and `hosts_clean_worker_log` procedures, respectively.
|
You can investigate hosts install logs through `hosts_check_install_log_*` functions. You can also check and clean remote worker logs through `hosts_check_worker_log` and `hosts_clean_worker_log` procedures, respectively. Moreover, you can investigate dmesg through `hosts_check_worker_dmesg`.
|
||||||
|
|
||||||
The functions (excluding `hosts_scan_available`) stop the script if any part of the procedure fails.
|
The functions (excluding `hosts_scan_available`) stop the script if any part of the procedure fails.
|
||||||
|
|
||||||
|
@ -19,6 +19,7 @@ HOSTS_SCANNED_FILE="remote-hosts-scanned.txt"
|
|||||||
DEBIAN_PACKAGES_TO_INSTALL="build-essential gfortran ed htop libxml2-dev ca-certificates curl libcurl4-openssl-dev gdebi-core sshpass default-jre default-jdk libpcre3-dev zlib1g-dev liblzma-dev libbz2-dev libicu-dev"
|
DEBIAN_PACKAGES_TO_INSTALL="build-essential gfortran ed htop libxml2-dev ca-certificates curl libcurl4-openssl-dev gdebi-core sshpass default-jre default-jdk libpcre3-dev zlib1g-dev liblzma-dev libbz2-dev libicu-dev"
|
||||||
REMOTE_DETECT_LOGICAL_CPUS="FALSE"
|
REMOTE_DETECT_LOGICAL_CPUS="FALSE"
|
||||||
MIN_HOSTS=1
|
MIN_HOSTS=1
|
||||||
|
SWAP_PART="/dev/mapper/linux-swap"
|
||||||
|
|
||||||
SHELL_SCRIPT=$(basename $0)
|
SHELL_SCRIPT=$(basename $0)
|
||||||
LOG_STEPS="logs/${SHELL_SCRIPT%.*}".log
|
LOG_STEPS="logs/${SHELL_SCRIPT%.*}".log
|
||||||
@ -279,6 +280,17 @@ hosts_scan_available()
|
|||||||
done
|
done
|
||||||
}
|
}
|
||||||
|
|
||||||
|
hosts_enable_swap()
|
||||||
|
{
|
||||||
|
info "Enabling swap on hosts"
|
||||||
|
for host in "${HOSTS_ARRAY[@]}"; do
|
||||||
|
step "-- ${host}"
|
||||||
|
try ssh ${SSH_OPTIONS} -i ${SSH_KEYS_DIR}/${SSH_KEY_PRIV} ${SSH_USER}@${host} "swapon $SWAP_PART"
|
||||||
|
next
|
||||||
|
done
|
||||||
|
check_if_command_error
|
||||||
|
}
|
||||||
|
|
||||||
hosts_push_r_libraries_dump()
|
hosts_push_r_libraries_dump()
|
||||||
{
|
{
|
||||||
info "Pushing R libraries dump to hosts"
|
info "Pushing R libraries dump to hosts"
|
||||||
@ -383,8 +395,9 @@ make_remote_connection_list()
|
|||||||
info "'number of cores' per host"
|
info "'number of cores' per host"
|
||||||
for host in "${HOSTS_ARRAY[@]}"; do
|
for host in "${HOSTS_ARRAY[@]}"; do
|
||||||
step "-- ${host}"
|
step "-- ${host}"
|
||||||
cornum=`try ssh ${SSH_OPTIONS} -i ${SSH_KEYS_DIR}/${SSH_KEY_PRIV} ${SSH_USER}@${host} '/usr/bin/Rscript -e "cat(parallel::detectCores(logical = ${REMOTE_DETECT_LOGICAL_CPUS}))"'`
|
|
||||||
|
|
||||||
|
[[ $REMOTE_DETECT_LOGICAL_CPUS == "TRUE" ]] && cornum=`ssh ${SSH_OPTIONS} -i ${SSH_KEYS_DIR}/${SSH_KEY_PRIV} ${SSH_USER}@${host} 'lscpu | grep "^CPU(s):" | grep -o "[0-9]*"'` || cornum=`ssh ${SSH_OPTIONS} -i ${SSH_KEYS_DIR}/${SSH_KEY_PRIV} ${SSH_USER}@${host} 'A=\$(lscpu | grep "Socket(s):" | grep -o "[0-9]*"); B=\$(lscpu | grep "Core(s) per socket:" | grep -o "[0-9]*"); echo \$((A*B))'`
|
||||||
|
|
||||||
regex='^[0-9]+$'
|
regex='^[0-9]+$'
|
||||||
if ! [[ $cornum =~ $regex ]] ; then
|
if ! [[ $cornum =~ $regex ]] ; then
|
||||||
try false
|
try false
|
||||||
@ -433,6 +446,18 @@ hosts_check_worker_log()
|
|||||||
check_if_command_error
|
check_if_command_error
|
||||||
}
|
}
|
||||||
|
|
||||||
|
hosts_check_worker_dmesg()
|
||||||
|
{
|
||||||
|
info "Checking dmesg on hosts"
|
||||||
|
for host in "${HOSTS_ARRAY[@]}"; do
|
||||||
|
step "-- ${host}"
|
||||||
|
echo
|
||||||
|
try ssh ${SSH_OPTIONS/-q/} -o LogLevel=error -i ${SSH_KEYS_DIR}/${SSH_KEY_PRIV} ${SSH_USER}@${host} "dmesg -T | tail -n 20"
|
||||||
|
next
|
||||||
|
done
|
||||||
|
check_if_command_error
|
||||||
|
}
|
||||||
|
|
||||||
hosts_clean_worker_log()
|
hosts_clean_worker_log()
|
||||||
{
|
{
|
||||||
info "Cleaning workers logs"
|
info "Cleaning workers logs"
|
||||||
@ -472,6 +497,7 @@ configure_hosts()
|
|||||||
generate_ssh_keys
|
generate_ssh_keys
|
||||||
hosts_push_ssh_key
|
hosts_push_ssh_key
|
||||||
hosts_push_shell_script
|
hosts_push_shell_script
|
||||||
|
hosts_enable_swap
|
||||||
dump_project_r_files
|
dump_project_r_files
|
||||||
dump_r_libraries
|
dump_r_libraries
|
||||||
hosts_push_project_r_files
|
hosts_push_project_r_files
|
||||||
|
Loading…
Reference in New Issue
Block a user