How to run R on a HPC system:
(More info here: https://www.rdocumentation.org/packages/foreach/versions/1.4.7/topics/foreach)
Modules:
Find a R module version on login node: Use module spider R
Use eg
module load R/4.2.2-foss-2022b
Sequential Code: (Example code)
testseq.R
n<-20000
m<-100000
sum<-0.0
for(i in 1:n) {
for(j in 1:m) {
sum<-sum+sqrt(i+j)+cos(i+j)+sin(i+j)
}
}
sprintf("Sum %f",sum)
Job script (Idun) (job.sh)
!/bin/bash #SBATCH -J job # sensible name for the job #SBATCH -N 1 # Allocate 1 nodes for the job #SBATCH -t 00:10:00 # Upper time limit for the job (d-HH:MM:SS) #SBATCH -p CPUQ
module load R/4.2.2-foss-2022b export LANG=C R --vanilla -f testseq.R
Parallel Code (doParallel for 1 compute node and all cores on the node (20))
Example: testdopar.R
library(doParallel)
registerDoParallel(cores=20)
n<-20000
m<-100000
ls<-0.0
gsum<-foreach(i=istart:iend,.combine='+') %dopar% {
for(j in 1:m) {
ls<-ls+sqrt(i+j)+cos(i+j)+sin(i+j)
}
ls
}
sprintf("Sum %f",gsum)
Job script (Idun) (job.sh)
!/bin/bash #SBATCH -J job # sensible name for the job #SBATCH -N 1 # Allocate 1 nodes for the job #SBATCH -c 20 #SBATCH -t 00:10:00 # Upper time limit for the job #SBATCH -p CPUQ module load R/4.2.2-foss-2022b export LANG=C R --vanilla -f testdopar.R
Hybrid Code ( MPI and doParallel for several compute nodes)
Exemple: testhybr.R for 2 compute nodes
library(doParallel)
library("Rmpi")
options(echo=FALSE)
registerDoParallel(cores=20)
myrank<-mpi.comm.rank(0)
ranks <-mpi.comm.size(0)
n<-0
m<-0
if (myrank==0)
{
# Master rank set number of iterations
n<-20000
m<-100000
}
# y = mpi.bcast (x, type, rank , comm ) , type=1 integer, type=2 double
n<-mpi.bcast(n,1,0,0);
m<-mpi.bcast(m,1,0,0);
# For 2 compute nodes
if (myrank==0) {
istart<- 1
iend <- n/ranks
} else {
istart<- n/ranks+1
iend <- n
}
ls<-0.0
lsum<-foreach(i=istart:iend,.combine='+') %dopar% {
for(j in 1:m) {
ls<-ls+sqrt(i+j)+cos(i+j)+sin(i+j)
}
ls
}
# y = mpi.allreduce(x, type, op="sum",comm), type=2 double
gsum<-mpi.allreduce(lsum,2,op="sum",0)
sprintf("Sum %f",gsum)
mpi.quit()
Job script
#!/bin/bash
#SBATCH -J job # sensible name for the job
#SBATCH -N 2 # Allocate 2 nodes for the job
#SBATCH --ntasks-per-node=1 # 1 task per node
#SBATCH -c 20
#SBATCH -t 00:10:00 # Upper time limit for the job
#SBATCH -p CPUQ
module load R/4.2.2-foss-2022b
export LANG=C
time mpirun R --vanilla -f testhybr.R
Performance (idun):
Sequential (1 core): 7 min 30 sec
Hybrid (40 cores): 16 sec