2012年10月5日 星期五

OpenMP and MPICH2 hybrid example

Base on OPENMP and MPICH2 Quick Install and Examples

libgomp: 4.4.6,GCC OpenMP v3.0 shared support library
mpich2: 1.4.1p1, A high-performance implementation of MPI
libgomp and mpich2-devel should be installed both in cent145 and cent146

[C/C++ Examples]


[Example 1] 

[root@cent146 hybrid]# cat hybrid_hi.c
#include
#include "mpi.h"
#include

int main(int argc, char *argv[]) {
  int numprocs, rank, namelen;
  char processor_name[MPI_MAX_PROCESSOR_NAME];
  int iam = 0, np = 1;

  MPI_Init(&argc, &argv);
  MPI_Comm_size(MPI_COMM_WORLD, &numprocs);
  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
  MPI_Get_processor_name(processor_name, &namelen);

  #pragma omp parallel default(shared) private(iam, np)
  {
    np = omp_get_num_threads();
    iam = omp_get_thread_num();
    printf("Hello from thread %d out of %d from process %d out of %d on %s\n",
           iam, np, rank, numprocs, processor_name);
  }

  MPI_Finalize();
}

[root@cent146 hybrid]# cat make_hybrid_hi.sh
#!/bin/bash
mpicc  -o hybrid_hi hybrid_hi.c -L/usr/lib64/mpich2/lib/ -lmpl -lopa -fopenmp

[root@cent146 hybrid]# mpiexec -f hydra.hosts -n 2 ./hybrid_hi
Hello from thread 0 out of 1 from process 1 out of 2 on cent146
Hello from thread 0 out of 1 from process 0 out of 2 on cent145


[Example 2]

[root@cent146 hybrid]# cat hybrid_pi.c
#include
#include
#include
#define NBIN 100000
#define MAX_THREADS 8
void main(int argc,char **argv) {
  int nbin,myid,nproc,nthreads,tid;
  double step,sum[MAX_THREADS]={0.0},pi=0.0,pig;
  MPI_Init(&argc,&argv);
  MPI_Comm_rank(MPI_COMM_WORLD,&myid);
  MPI_Comm_size(MPI_COMM_WORLD,&nproc);
  nbin = NBIN/nproc; step = 1.0/(nbin*nproc);
#pragma omp parallel private(tid)
  {
    int i;
    double x;
    nthreads = omp_get_num_threads();
    tid = omp_get_thread_num();
    for (i=nbin*myid+tid; i
        x = (i+0.5)*step; sum[tid] += 4.0/(1.0+x*x);}
    printf("rank tid sum = %d %d %e\n",myid,tid,sum[tid]);
  }
  for(tid=0; tid
  MPI_Allreduce(&pi,&pig,1,MPI_DOUBLE,MPI_SUM,MPI_COMM_WORLD);
  if (myid==0) printf("PI = %f\n",pig);
  MPI_Finalize();
}

[root@cent146 hybrid]# cat make_hybrid_pi.sh
#!/bin/bash
        mpicc  -o hybrid_pi hybrid_pi.c -L/usr/lib64/mpich2/lib/ -lmpl -lopa -fopenmp

[root@cent146 hybrid]# mpiexec -f hydra.hosts -n 2 ./hybrid_pi
rank tid sum = 1 0 1.287002e+05
rank tid sum = 0 0 1.854590e+05
PI = 3.141593

[Example 3]

[root@cent146 hybrid]# cat hybrid.c
#include
#include
#include
#include
#ifdef USE_MPI
  #include
#endif /* USE_MPI */
#ifdef _OPENMP
  #include
#endif /* _OPENMP */

int read_slab_info() {
  /* This should read info from a file or something,
     but we fake it */
  return 80;
}

double process_slab(int snum)
{
  int i, j;
  double x;
  for (i = 0; i < 10000; i++)
    for (j = 0; j < 10000; j++)
      x += sqrt((i-j)*(i-j) / (sqrt((i*i) + (j*j)) + 1));
  return x;
}

void exit_on_error(char *message)
{
  fprintf(stderr, "%s\n", message);
#ifdef USE_MPI
  MPI_Finalize();
#endif
  exit(1);
}


int main(int argc, char **argv)
{
  int i, j, p, me, nprocs, num_threads, num_slabs, spp;
  int *my_slabs, *count;
  double x, sum;
#ifdef _OPENMP
  int np;
#endif /* _OPENMP */
#ifdef USE_MPI
  int namelen;
  char processor_name[MPI_MAX_PROCESSOR_NAME];
#endif /* USE_MPI */

#ifdef USE_MPI
  MPI_Init(&argc, &argv);
  MPI_Comm_size(MPI_COMM_WORLD, &nprocs);
  MPI_Comm_rank(MPI_COMM_WORLD, &me);
  MPI_Get_processor_name(processor_name, &namelen);
#else /* USE_MPI */
  nprocs = 1;
  me = 0;
#endif /* USE_MPI */

#ifdef _OPENMP
  np = omp_get_num_procs();
  omp_set_num_threads(np);
  num_threads = omp_get_max_threads();
#else /* _OPENMP */
  num_threads = 1;
#endif /* _OPENMP */

  printf("Process %d of %d", me, nprocs);
#ifdef USE_MPI
  printf(" running on %s", processor_name);
#endif /* USE_MPI */
#ifdef _OPENMP
  printf(" using OpenMP with %d threads",
    num_threads);
#endif /* _OPENMP */
  printf("\n");

  /* Master process reads slab data */
  if (!me) num_slabs = read_slab_info();
#ifdef USE_MPI
  if (MPI_Bcast(&num_slabs, 1, MPI_INT, 0,
      MPI_COMM_WORLD) != MPI_SUCCESS)
    exit_on_error("Error in MPI_Bcast()");
#endif /* USE_MPI */

  if (num_slabs < nprocs)
    exit_on_error("Number of slabs may not exceed \
      number of processes");
  /* maximum number of slabs per process */
  spp = (int)ceil((double)num_slabs /
  (double)nprocs);

  if (!me) printf("No more than %d slabs will \
    assigned to each process\n", spp);

  /* allocate list and count of slabs for each
    process */
  if (!(my_slabs = (int *)malloc(nprocs*spp*
  sizeof(int)))) {
  perror("my_slabs");
  exit(2);
  }

  if (!(count = (int *)malloc(nprocs*sizeof(int)))) {
    perror("count");
    exit(2);
  }

  /* initialize slab counts */
  for (p = 0; p < nprocs; p++) count[p] = 0;
  /* round robin assignment of slabs to processes
    for better potential
   * load balancing
   */
  for (i = j = p = 0; i < num_slabs; i++) {
    my_slabs[p*spp+j] = i;
    count[p]++;
    if (p == nprocs -1)
      p = 0, j++;
    else
      p++;
  }


  /* each process works on its own list of slabs,
     but OpenMP threads
   * divide up the slabs on each process because
     of OpenMP directive
   */

#pragma omp parallel for reduction(+: x)
  for (i = 0; i < count[me]; i++) {
    printf("%d: slab %d being processed", me,
      my_slabs[me*spp+i]);
#ifdef _OPENMP
    printf(" by thread %d", omp_get_thread_num());
#endif /* _OPENMP */
    printf("\n");
    x += process_slab(my_slabs[me*spp+i]);
  }


#ifdef USE_MPI
  if (MPI_Reduce(&x, &sum, 1, MPI_DOUBLE, MPI_SUM, 0,
      MPI_COMM_WORLD) != MPI_SUCCESS)
    exit_on_error("Error in MPI_Reduce()");
#else /* USE_MPI */
  sum = x;
#endif /* USE_MPI */

  if (!me) printf("Sum is %lg\n", sum);

#ifdef USE_MPI
  printf("%d: Calling MPI_Finalize()\n", me);
  MPI_Finalize();
#endif /* USE_MPI */

  exit(0);
}

[root@cent146 hybrid]# cat make_hybrid.sh
#!/bin/bash
if [ "$1" == "" ] || [ "$1" == "hybrid" ]; then
        mpicc  -o hybrid hybrid.c -DUSE_MPI -L/usr/lib64/mpich2/lib/ -lmpl -lopa -lm -fopenmp
elif  [ "$1" == "mpi" ]; then
        mpicc  -o hybrid_mpi hybrid.c -DUSE_MPI -L/usr/lib64/mpich2/lib/ -lmpl -lopa -lm
elif  [ "$1" == "omp" ]; then
        mpicc  -o hybrid_omp hybrid.c -L/usr/lib64/mpich2/lib/ -lm -fopenmp
else
        echo "Illegal make parameter, try:"
        echo "$0 < hybrid / mpi / omp >"
fi

[root@cent146 hybrid]# mpiexec -f hydra.hosts -n 2 ./hybrid
Process 1 of 2 running on cent146 using OpenMP with 1 threads
Process 0 of 2 running on cent145 using OpenMP with 1 threads
No more than 40 slabs will     assigned to each process
0: slab 0 being processed by thread 0
1: slab 1 being processed by thread 0
1: slab 3 being processed by thread 0
0: slab 2 being processed by thread 0
1: slab 5 being processed by thread 0
0: slab 4 being processed by thread 0
    :
    :
    :
[root@cent146 hybrid]# mpiexec -f hydra.hosts -n 2 ./hybrid_mpi
Process 1 of 2 running on cent146
Process 0 of 2 running on cent145
No more than 40 slabs will     assigned to each process
0: slab 0 being processed
1: slab 1 being processed
1: slab 3 being processed
0: slab 2 being processed
1: slab 5 being processed
0: slab 4 being processed
    :
    :
    :
[root@cent146 hybrid]# ./hybrid_ompProcess 0 of 1 using OpenMP with 1 threads
No more than 80 slabs will     assigned to each process
0: slab 0 being processed by thread 0
0: slab 1 being processed by thread 0
0: slab 2 being processed by thread 0
0: slab 3 being processed by thread 0

[Fortran Examples]


[Example 1]

[root@cent146 hybrid]# cat hi_f.f
        program f1 !Hello World MPI/F90 style
        implicit none
        include 'mpif.h'
        integer::myrank, numprocs,strlen, ierr, comm = MPI_COMM_WORLD
        character(80)::str

        call mpi_init(ierr)
        call mpi_comm_size(comm, numprocs, ierr)
        call mpi_comm_rank(comm, myrank, ierr)

        if (myrank .EQ. 0) print *, 'Num procs ', numprocs

        call mpi_get_processor_name(str, strlen,ierr)

        print *, 'Hello world : I am processor ', myrank, ':', str

        call mpi_finalize(ierr)

        end

[root@cent146 hybrid]# mpif77 -o hi_f hi_f.f

[root@cent146 hybrid]# mpiexec -f hydra.hosts -n 2 ./hi_f
 Hello world : I am processor            1 :cent146                                     
 Num procs            2
 Hello world : I am processor            0 :cent145                                     

[Example 2]

[root@cent146 hybrid]# cat hybrid_hi_f.f
        program hybrid_hi_f ! hello from OMP_threads
        implicit none
        include 'mpif.h'
        integer nthreads, mythrd, omp_get_num_threads
        integer::myrank, numprocs, ierr,strlen, comm=MPI_COMM_WORLD
        !integer myrank numprocs, ierr, comm
        !parameter (comm = MPI_COMM_WORLD)
        character(40)::str
        integer omp_get_thread_num

        call mpi_init(ierr)
        call mpi_comm_size(comm, numprocs, ierr)
        call mpi_comm_rank(comm, myrank, ierr)


!$OMP PARALLEL PRIVATE(mythrd) SHARED(nthreads)

        nthreads = omp_get_num_threads()
        mythrd = omp_get_thread_num()

        call mpi_get_processor_name(str, strlen, ierr)

        if (mythrd.eq.0) print *, "Num threads = ", nthreads
        print *, 'th ' , mythrd, 'of', nthreads, 'prc', myrank , ':' , str
        !print *, ' proc ', myrank , ':' , str


!$OMP END PARALLEL

        call mpi_finalize(ierr)
        end

[root@cent146 hybrid]# mpif77 -o hybrid_hi_f hybrid_hi_f.f -fopenmp

[root@cent146 hybrid]# mpiexec -f hydra.hosts -n 2 ./hybrid_hi_f
 Num threads =            1
 Num threads =            1
 th            0 of           1 prc           1 :cent146                                
 th            0 of           1 prc           0 :cent145                                

[reference]

http://www.math.ntu.edu.tw/~wwang/cola_lab/knowledge/download/Parallel_Computing/7-Hybrid%20MPI+OpenMP.ppt
http://www.linux-mag.com/id/1631/

沒有留言:

張貼留言

文章分類