Parallel R for HPC system

 

How to run R on a HPC system:

(More info here: https://www.rdocumentation.org/packages/foreach/versions/1.4.7/topics/foreach)

Modules:

Find a R module version on login node: Use module spider R

Use eg

module load R/4.2.2-foss-2022b

Sequential Code: (Example code)

testseq.R

n<-20000
m<-100000
sum<-0.0
for(i in 1:n) {
  for(j in 1:m) {
     sum<-sum+sqrt(i+j)+cos(i+j)+sin(i+j)
  }
}
sprintf("Sum %f",sum)

Job script (Idun) (job.sh)

!/bin/bash
#SBATCH -J job                 # sensible name for the job
#SBATCH -N 1                   # Allocate 1 nodes for the job
#SBATCH -t 00:10:00            # Upper time limit for the job (d-HH:MM:SS)
#SBATCH -p CPUQ
module load R/4.2.2-foss-2022b export LANG=C R --vanilla -f testseq.R

Parallel Code (doParallel for 1 compute node and all cores on the node (20))

Example: testdopar.R

library(doParallel)
registerDoParallel(cores=20)
n<-20000
m<-100000
 
ls<-0.0
gsum<-foreach(i=istart:iend,.combine='+') %dopar% {
   for(j in 1:m) {
       ls<-ls+sqrt(i+j)+cos(i+j)+sin(i+j)
   }
   ls
}
sprintf("Sum %f",gsum)

Job script (Idun) (job.sh)

!/bin/bash
#SBATCH -J job                   # sensible name for the job
#SBATCH -N 1                    # Allocate 1 nodes for the job
#SBATCH -c 20
#SBATCH -t 00:10:00            # Upper time limit for the job
#SBATCH -p CPUQ
module load R/4.2.2-foss-2022b

export LANG=C
R --vanilla -f testdopar.R

Hybrid Code ( MPI and doParallel for several compute nodes)

Exemple: testhybr.R for 2 compute nodes

library(doParallel)
library("Rmpi")
options(echo=FALSE)
registerDoParallel(cores=20)
myrank<-mpi.comm.rank(0)
ranks <-mpi.comm.size(0)
n<-0
m<-0
if (myrank==0)
{
   # Master rank set number of iterations
   n<-20000
   m<-100000
}
# y = mpi.bcast (x, type, rank , comm ) , type=1 integer, type=2 double
n<-mpi.bcast(n,1,0,0);
m<-mpi.bcast(m,1,0,0);
 
# For 2 compute nodes
if (myrank==0) {
   istart<- 1
   iend  <- n/ranks
} else {
   istart<- n/ranks+1
   iend  <- n
}

ls<-0.0
lsum<-foreach(i=istart:iend,.combine='+') %dopar% {
   for(j in 1:m) {
       ls<-ls+sqrt(i+j)+cos(i+j)+sin(i+j)
   }
   ls
}
# y = mpi.allreduce(x, type, op="sum",comm), type=2 double
gsum<-mpi.allreduce(lsum,2,op="sum",0)
sprintf("Sum %f",gsum)
mpi.quit()

Job script

#!/bin/bash
#SBATCH -J job               # sensible name for the job
#SBATCH -N 2                    # Allocate 2 nodes for the job
#SBATCH --ntasks-per-node=1     # 1 task per node
#SBATCH -c 20
#SBATCH -t 00:10:00             # Upper time limit for the job
#SBATCH -p CPUQ
module load R/4.2.2-foss-2022b

export LANG=C
time mpirun R --vanilla -f testhybr.R

Performance (idun):

Sequential (1 core): 7 min 30 sec

Hybrid (40 cores): 16 sec

Scroll to Top