Base on OPENMP and MPICH2 Quick Install and Examples
libgomp: 4.4.6,GCC OpenMP v3.0 shared support librarympich2: 1.4.1p1, A high-performance implementation of MPI
libgomp and mpich2-devel should be installed both in cent145 and cent146
[C/C++ Examples]
[Example 1]
[root@cent146 hybrid]# cat hybrid_hi.c
#include "mpi.h"
int main(int argc, char *argv[]) {
int numprocs, rank, namelen;
char processor_name[MPI_MAX_PROCESSOR_NAME];
int iam = 0, np = 1;
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &numprocs);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Get_processor_name(processor_name, &namelen);
#pragma omp parallel default(shared) private(iam, np)
np = omp_get_num_threads();
iam = omp_get_thread_num();
printf("Hello from thread %d out of %d from process %d out of %d on %s\n",
iam, np, rank, numprocs, processor_name);
[root@cent146 hybrid]# cat
mpicc -o hybrid_hi hybrid_hi.c -L/usr/lib64/mpich2/lib/ -lmpl -lopa -fopenmp
[root@cent146 hybrid]# mpiexec -f hydra.hosts -n 2 ./hybrid_hi
Hello from thread 0 out of 1 from process 1 out of 2 on cent146
Hello from thread 0 out of 1 from process 0 out of 2 on cent145
[Example 2]
[root@cent146 hybrid]# cat hybrid_pi.c
#define NBIN 100000
#define MAX_THREADS 8
void main(int argc,char **argv) {
int nbin,myid,nproc,nthreads,tid;
double step,sum[MAX_THREADS]={0.0},pi=0.0,pig;
nbin = NBIN/nproc; step = 1.0/(nbin*nproc);
#pragma omp parallel private(tid)
int i;
double x;
nthreads = omp_get_num_threads();
tid = omp_get_thread_num();
for (i=nbin*myid+tid; i
x = (i+0.5)*step; sum[tid] += 4.0/(1.0+x*x);}
printf("rank tid sum = %d %d %e\n",myid,tid,sum[tid]);
for(tid=0; tid
if (myid==0) printf("PI = %f\n",pig);
[root@cent146 hybrid]# cat
mpicc -o hybrid_pi hybrid_pi.c -L/usr/lib64/mpich2/lib/ -lmpl -lopa -fopenmp
[root@cent146 hybrid]# mpiexec -f hydra.hosts -n 2 ./hybrid_pi
rank tid sum = 1 0 1.287002e+05
rank tid sum = 0 0 1.854590e+05
PI = 3.141593
[Example 3]
[root@cent146 hybrid]# cat hybrid.c
#ifdef USE_MPI
#endif /* USE_MPI */
#ifdef _OPENMP
#endif /* _OPENMP */
int read_slab_info() {
/* This should read info from a file or something,
but we fake it */
return 80;
double process_slab(int snum)
int i, j;
double x;
for (i = 0; i < 10000; i++)
for (j = 0; j < 10000; j++)
x += sqrt((i-j)*(i-j) / (sqrt((i*i) + (j*j)) + 1));
return x;
void exit_on_error(char *message)
fprintf(stderr, "%s\n", message);
#ifdef USE_MPI
int main(int argc, char **argv)
int i, j, p, me, nprocs, num_threads, num_slabs, spp;
int *my_slabs, *count;
double x, sum;
#ifdef _OPENMP
int np;
#endif /* _OPENMP */
#ifdef USE_MPI
int namelen;
char processor_name[MPI_MAX_PROCESSOR_NAME];
#endif /* USE_MPI */
#ifdef USE_MPI
MPI_Init(&argc, &argv);
MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
MPI_Comm_rank(MPI_COMM_WORLD, &me);
MPI_Get_processor_name(processor_name, &namelen);
#else /* USE_MPI */
nprocs = 1;
me = 0;
#endif /* USE_MPI */
#ifdef _OPENMP
np = omp_get_num_procs();
num_threads = omp_get_max_threads();
#else /* _OPENMP */
num_threads = 1;
#endif /* _OPENMP */
printf("Process %d of %d", me, nprocs);
#ifdef USE_MPI
printf(" running on %s", processor_name);
#endif /* USE_MPI */
#ifdef _OPENMP
printf(" using OpenMP with %d threads",
#endif /* _OPENMP */
/* Master process reads slab data */
if (!me) num_slabs = read_slab_info();
#ifdef USE_MPI
if (MPI_Bcast(&num_slabs, 1, MPI_INT, 0,
exit_on_error("Error in MPI_Bcast()");
#endif /* USE_MPI */
if (num_slabs < nprocs)
exit_on_error("Number of slabs may not exceed \
number of processes");
/* maximum number of slabs per process */
spp = (int)ceil((double)num_slabs /
if (!me) printf("No more than %d slabs will \
assigned to each process\n", spp);
/* allocate list and count of slabs for each
process */
if (!(my_slabs = (int *)malloc(nprocs*spp*
sizeof(int)))) {
if (!(count = (int *)malloc(nprocs*sizeof(int)))) {
/* initialize slab counts */
for (p = 0; p < nprocs; p++) count[p] = 0;
/* round robin assignment of slabs to processes
for better potential
* load balancing
for (i = j = p = 0; i < num_slabs; i++) {
my_slabs[p*spp+j] = i;
if (p == nprocs -1)
p = 0, j++;
/* each process works on its own list of slabs,
but OpenMP threads
* divide up the slabs on each process because
of OpenMP directive
#pragma omp parallel for reduction(+: x)
for (i = 0; i < count[me]; i++) {
printf("%d: slab %d being processed", me,
#ifdef _OPENMP
printf(" by thread %d", omp_get_thread_num());
#endif /* _OPENMP */
x += process_slab(my_slabs[me*spp+i]);
#ifdef USE_MPI
if (MPI_Reduce(&x, &sum, 1, MPI_DOUBLE, MPI_SUM, 0,
exit_on_error("Error in MPI_Reduce()");
#else /* USE_MPI */
sum = x;
#endif /* USE_MPI */
if (!me) printf("Sum is %lg\n", sum);
#ifdef USE_MPI
printf("%d: Calling MPI_Finalize()\n", me);
#endif /* USE_MPI */
[root@cent146 hybrid]# cat
if [ "$1" == "" ] || [ "$1" == "hybrid" ]; then
mpicc -o hybrid hybrid.c -DUSE_MPI -L/usr/lib64/mpich2/lib/ -lmpl -lopa -lm -fopenmp
elif [ "$1" == "mpi" ]; then
mpicc -o hybrid_mpi hybrid.c -DUSE_MPI -L/usr/lib64/mpich2/lib/ -lmpl -lopa -lm
elif [ "$1" == "omp" ]; then
mpicc -o hybrid_omp hybrid.c -L/usr/lib64/mpich2/lib/ -lm -fopenmp
echo "Illegal make parameter, try:"
echo "$0 < hybrid / mpi / omp >"
[root@cent146 hybrid]# mpiexec -f hydra.hosts -n 2 ./hybrid
Process 1 of 2 running on cent146 using OpenMP with 1 threads
Process 0 of 2 running on cent145 using OpenMP with 1 threads
No more than 40 slabs will assigned to each process
0: slab 0 being processed by thread 0
1: slab 1 being processed by thread 0
1: slab 3 being processed by thread 0
0: slab 2 being processed by thread 0
1: slab 5 being processed by thread 0
0: slab 4 being processed by thread 0
[root@cent146 hybrid]# mpiexec -f hydra.hosts -n 2 ./hybrid_mpi
Process 1 of 2 running on cent146
Process 0 of 2 running on cent145
No more than 40 slabs will assigned to each process
0: slab 0 being processed
1: slab 1 being processed
1: slab 3 being processed
0: slab 2 being processed
1: slab 5 being processed
0: slab 4 being processed
[root@cent146 hybrid]# ./hybrid_ompProcess 0 of 1 using OpenMP with 1 threads
No more than 80 slabs will assigned to each process
0: slab 0 being processed by thread 0
0: slab 1 being processed by thread 0
0: slab 2 being processed by thread 0
0: slab 3 being processed by thread 0
[Fortran Examples]
[Example 1]
[root@cent146 hybrid]# cat hi_f.f
program f1 !Hello World MPI/F90 style
implicit none
include 'mpif.h'
integer::myrank, numprocs,strlen, ierr, comm = MPI_COMM_WORLD
call mpi_init(ierr)
call mpi_comm_size(comm, numprocs, ierr)
call mpi_comm_rank(comm, myrank, ierr)
if (myrank .EQ. 0) print *, 'Num procs ', numprocs
call mpi_get_processor_name(str, strlen,ierr)
print *, 'Hello world : I am processor ', myrank, ':', str
call mpi_finalize(ierr)
[root@cent146 hybrid]# mpif77 -o hi_f hi_f.f
[root@cent146 hybrid]# mpiexec -f hydra.hosts -n 2 ./hi_f
Hello world : I am processor 1 :cent146
Num procs 2
Hello world : I am processor 0 :cent145
[Example 2]
[root@cent146 hybrid]# cat hybrid_hi_f.f
program hybrid_hi_f ! hello from OMP_threads
implicit none
include 'mpif.h'
integer nthreads, mythrd, omp_get_num_threads
integer::myrank, numprocs, ierr,strlen, comm=MPI_COMM_WORLD
!integer myrank numprocs, ierr, comm
!parameter (comm = MPI_COMM_WORLD)
integer omp_get_thread_num
call mpi_init(ierr)
call mpi_comm_size(comm, numprocs, ierr)
call mpi_comm_rank(comm, myrank, ierr)
nthreads = omp_get_num_threads()
mythrd = omp_get_thread_num()
call mpi_get_processor_name(str, strlen, ierr)
if (mythrd.eq.0) print *, "Num threads = ", nthreads
print *, 'th ' , mythrd, 'of', nthreads, 'prc', myrank , ':' , str
!print *, ' proc ', myrank , ':' , str
call mpi_finalize(ierr)
[root@cent146 hybrid]# mpif77 -o hybrid_hi_f hybrid_hi_f.f -fopenmp
[root@cent146 hybrid]# mpiexec -f hydra.hosts -n 2 ./hybrid_hi_f
Num threads = 1
Num threads = 1
th 0 of 1 prc 1 :cent146
th 0 of 1 prc 0 :cent145