1: #include <petscsys.h> 2: #include <petsc/private/petscimpl.h> 4: struct _n_PetscShmComm {
5: PetscMPIInt *globranks; /* global ranks of each rank in the shared memory communicator */
6: PetscMPIInt shmsize; /* size of the shared memory communicator */
7: MPI_Comm globcomm,shmcomm; /* global communicator and shared memory communicator (a sub-communicator of the former) */
8: };
10: /*
11: Private routine to delete internal shared memory communicator when a communicator is freed.
13: This is called by MPI, not by users. This is called by MPI_Comm_free() when the communicator that has this data as an attribute is freed.
15: Note: this is declared extern "C" because it is passed to MPI_Comm_create_keyval()
17: */
18: PETSC_EXTERN PetscMPIInt MPIAPI Petsc_ShmComm_Attr_Delete_Fn(MPI_Comm comm,PetscMPIInt keyval,void *val,void *extra_state) 19: {
20: PetscErrorCode ierr;
21: PetscShmComm p = (PetscShmComm)val;
24: PetscInfo1(NULL,"Deleting shared memory subcommunicator in a MPI_Comm %ld\n",(long)comm);CHKERRMPI(ierr);
25: MPI_Comm_free(&p->shmcomm);CHKERRMPI(ierr);
26: PetscFree(p->globranks);CHKERRMPI(ierr);
27: PetscFree(val);CHKERRMPI(ierr);
28: PetscFunctionReturn(MPI_SUCCESS);
29: }
31: /*@C
32: PetscShmCommGet - Given a PETSc communicator returns a communicator of all ranks that share a common memory
35: Collective.
37: Input Parameter:
38: . globcomm - MPI_Comm 40: Output Parameter:
41: . pshmcomm - the PETSc shared memory communicator object
43: Level: developer
45: Notes:
46: This should be called only with an PetscCommDuplicate() communictor
48: When used with MPICH, MPICH must be configured with --download-mpich-device=ch3:nemesis
50: @*/
51: PetscErrorCodePetscShmCommGet(MPI_Comm globcomm,PetscShmComm *pshmcomm) 52: {
53: #ifdef PETSC_HAVE_MPI_PROCESS_SHARED_MEMORY
54: PetscErrorCode ierr;
55: MPI_Group globgroup,shmgroup;
56: PetscMPIInt *shmranks,i,flg;
57: PetscCommCounter *counter;
60: MPI_Comm_get_attr(globcomm,Petsc_Counter_keyval,&counter,&flg);
61: if (!flg) SETERRQ(globcomm,PETSC_ERR_ARG_CORRUPT,"Bad MPI communicator supplied; must be a PETSc communicator");
63: MPI_Comm_get_attr(globcomm,Petsc_ShmComm_keyval,pshmcomm,&flg);
64: if (flg) return(0);
66: PetscNew(pshmcomm);
67: (*pshmcomm)->globcomm = globcomm;
69: MPI_Comm_split_type(globcomm, MPI_COMM_TYPE_SHARED,0, MPI_INFO_NULL,&(*pshmcomm)->shmcomm);
71: MPI_Comm_size((*pshmcomm)->shmcomm,&(*pshmcomm)->shmsize);
72: MPI_Comm_group(globcomm, &globgroup);
73: MPI_Comm_group((*pshmcomm)->shmcomm, &shmgroup);
74: PetscMalloc1((*pshmcomm)->shmsize,&shmranks);
75: PetscMalloc1((*pshmcomm)->shmsize,&(*pshmcomm)->globranks);
76: for (i=0; i<(*pshmcomm)->shmsize; i++) shmranks[i] = i;
77: MPI_Group_translate_ranks(shmgroup, (*pshmcomm)->shmsize, shmranks, globgroup, (*pshmcomm)->globranks);
78: PetscFree(shmranks);
79: MPI_Group_free(&globgroup);
80: MPI_Group_free(&shmgroup);
82: for (i=0; i<(*pshmcomm)->shmsize; i++) {
83: PetscInfo2(NULL,"Shared memory rank %d global rank %d\n",i,(*pshmcomm)->globranks[i]);
84: }
85: MPI_Comm_set_attr(globcomm,Petsc_ShmComm_keyval,*pshmcomm);
86: return(0);
87: #else
88: SETERRQ(globcomm, PETSC_ERR_SUP, "Shared memory communicators need MPI-3 package support.\nPlease upgrade your MPI or reconfigure with --download-mpich.");
89: #endif
90: }
92: /*@C
93: PetscShmCommGlobalToLocal - Given a global rank returns the local rank in the shared memory communicator
95: Input Parameters:
96: + pshmcomm - the shared memory communicator object
97: - grank - the global rank
99: Output Parameter:
100: . lrank - the local rank, or MPI_PROC_NULL if it does not exist
102: Level: developer
104: Developer Notes:
105: Assumes the pshmcomm->globranks[] is sorted
107: It may be better to rewrite this to map multiple global ranks to local in the same function call
109: @*/
110: PetscErrorCodePetscShmCommGlobalToLocal(PetscShmComm pshmcomm,PetscMPIInt grank,PetscMPIInt *lrank)111: {
112: PetscMPIInt low,high,t,i;
113: PetscBool flg = PETSC_FALSE;
117: *lrank = MPI_PROC_NULL;
118: if (grank < pshmcomm->globranks[0]) return(0);
119: if (grank > pshmcomm->globranks[pshmcomm->shmsize-1]) return(0);
120: PetscOptionsGetBool(NULL,NULL,"-noshared",&flg,NULL);
121: if (flg) return(0);
122: low = 0;
123: high = pshmcomm->shmsize;
124: while (high-low > 5) {
125: t = (low+high)/2;
126: if (pshmcomm->globranks[t] > grank) high = t;
127: else low = t;
128: }
129: for (i=low; i<high; i++) {
130: if (pshmcomm->globranks[i] > grank) return(0);
131: if (pshmcomm->globranks[i] == grank) {
132: *lrank = i;
133: return(0);
134: }
135: }
136: return(0);
137: }
139: /*@C
140: PetscShmCommLocalToGlobal - Given a local rank in the shared memory communicator returns the global rank
142: Input Parameters:
143: + pshmcomm - the shared memory communicator object
144: - lrank - the local rank in the shared memory communicator
146: Output Parameter:
147: . grank - the global rank in the global communicator where the shared memory communicator is built
149: Level: developer
151: @*/
152: PetscErrorCodePetscShmCommLocalToGlobal(PetscShmComm pshmcomm,PetscMPIInt lrank,PetscMPIInt *grank)153: {
155: #ifdef PETSC_USE_DEBUG
156: {
158: if (lrank < 0 || lrank >= pshmcomm->shmsize) { SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"No rank %D in the shared memory communicator",lrank); }
159: }
160: #endif
161: *grank = pshmcomm->globranks[lrank];
162: return(0);
163: }
165: /*@C
166: PetscShmCommGetMpiShmComm - Returns the MPI communicator that represents all processes with common shared memory
168: Input Parameter:
169: . pshmcomm - PetscShmComm object obtained with PetscShmCommGet()
171: Output Parameter:
172: . comm - the MPI communicator
174: Level: developer
176: @*/
177: PetscErrorCodePetscShmCommGetMpiShmComm(PetscShmComm pshmcomm,MPI_Comm *comm)178: {
180: *comm = pshmcomm->shmcomm;
181: return(0);
182: }
184: #if defined(PETSC_HAVE_OPENMP_SUPPORT)
185: #include <pthread.h>
186: #include <hwloc.h>
187: #include <omp.h>
189: /* Use mmap() to allocate shared mmeory (for the pthread_barrier_t object) if it is available,
190: otherwise use MPI_Win_allocate_shared. They should have the same effect except MPI-3 is much
191: simpler to use. However, on a Cori Haswell node with Cray MPI, MPI-3 worsened a test's performance
192: by 50%. Until the reason is found out, we use mmap() instead.
193: */
194: #define USE_MMAP_ALLOCATE_SHARED_MEMORY196: #if defined(USE_MMAP_ALLOCATE_SHARED_MEMORY) && defined(PETSC_HAVE_MMAP)
197: #include <sys/mman.h>
198: #include <sys/types.h>
199: #include <sys/stat.h>
200: #include <fcntl.h>
201: #endif
203: struct _n_PetscOmpCtrl {
204: MPI_Comm omp_comm; /* a shared memory communicator to spawn omp threads */
205: MPI_Comm omp_master_comm; /* a communicator to give to third party libraries */
206: PetscMPIInt omp_comm_size; /* size of omp_comm, a kind of OMP_NUM_THREADS */
207: PetscBool is_omp_master; /* rank 0's in omp_comm */
208: MPI_Win omp_win; /* a shared memory window containing a barrier */
209: pthread_barrier_t *barrier; /* pointer to the barrier */
210: hwloc_topology_t topology;
211: hwloc_cpuset_t cpuset; /* cpu bindings of omp master */
212: hwloc_cpuset_t omp_cpuset; /* union of cpu bindings of ranks in omp_comm */
213: };
216: /* Allocate and initialize a pthread_barrier_t object in memory shared by processes in omp_comm
217: contained by the controller.
219: PETSc OpenMP controller users do not call this function directly. This function exists
220: only because we want to separate shared memory allocation methods from other code.
221: */
222: PETSC_STATIC_INLINE PetscErrorCode PetscOmpCtrlCreateBarrier(PetscOmpCtrl ctrl)223: {
224: PetscErrorCode ierr;
225: MPI_Aint size;
226: void *baseptr;
227: pthread_barrierattr_t attr;
229: #if defined(USE_MMAP_ALLOCATE_SHARED_MEMORY) && defined(PETSC_HAVE_MMAP)
230: PetscInt fd;
231: PetscChar pathname[PETSC_MAX_PATH_LEN];
232: #else
233: PetscMPIInt disp_unit;
234: #endif
237: #if defined(USE_MMAP_ALLOCATE_SHARED_MEMORY) && defined(PETSC_HAVE_MMAP)
238: size = sizeof(pthread_barrier_t);
239: if (ctrl->is_omp_master) {
240: /* use PETSC_COMM_SELF in PetscGetTmp, since it is a collective call. Using omp_comm would otherwise bcast the partially populated pathname to slaves */
241: PetscGetTmp(PETSC_COMM_SELF,pathname,PETSC_MAX_PATH_LEN);
242: PetscStrlcat(pathname,"/petsc-shm-XXXXXX",PETSC_MAX_PATH_LEN);
243: /* mkstemp replaces XXXXXX with a unique file name and opens the file for us */
244: fd = mkstemp(pathname); if(fd == -1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Could not create tmp file %s with mkstemp\n", pathname);
245: ftruncate(fd,size);
246: baseptr = mmap(NULL,size,PROT_READ | PROT_WRITE, MAP_SHARED,fd,0); if (baseptr == MAP_FAILED) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"mmap() failed\n");
247: close(fd);
248: MPI_Bcast(pathname,PETSC_MAX_PATH_LEN,MPI_CHAR,0,ctrl->omp_comm);
249: /* this MPI_Barrier is to wait slaves to open the file before master unlinks it */
250: MPI_Barrier(ctrl->omp_comm);
251: unlink(pathname);
252: } else {
253: MPI_Bcast(pathname,PETSC_MAX_PATH_LEN,MPI_CHAR,0,ctrl->omp_comm);
254: fd = open(pathname,O_RDWR); if(fd == -1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Could not open tmp file %s\n", pathname);
255: baseptr = mmap(NULL,size,PROT_READ | PROT_WRITE, MAP_SHARED,fd,0); if (baseptr == MAP_FAILED) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"mmap() failed\n");
256: close(fd);
257: MPI_Barrier(ctrl->omp_comm);
258: }
259: #else
260: size = ctrl->is_omp_master ? sizeof(pthread_barrier_t) : 0;
261: MPI_Win_allocate_shared(size,1,MPI_INFO_NULL,ctrl->omp_comm,&baseptr,&ctrl->omp_win);
262: MPI_Win_shared_query(ctrl->omp_win,0,&size,&disp_unit,&baseptr);
263: #endif
264: ctrl->barrier = (pthread_barrier_t*)baseptr;
266: /* omp master initializes the barrier */
267: if (ctrl->is_omp_master) {
268: MPI_Comm_size(ctrl->omp_comm,&ctrl->omp_comm_size);
269: pthread_barrierattr_init(&attr);
270: pthread_barrierattr_setpshared(&attr,PTHREAD_PROCESS_SHARED); /* make the barrier also work for processes */
271: pthread_barrier_init(ctrl->barrier,&attr,(unsigned int)ctrl->omp_comm_size);
272: pthread_barrierattr_destroy(&attr);
273: }
275: /* this MPI_Barrier is to make sure the omp barrier is initialized before slaves use it */
276: MPI_Barrier(ctrl->omp_comm);
277: return(0);
278: }
280: /* Destroy the pthread barrier in the PETSc OpenMP controller */
281: PETSC_STATIC_INLINE PetscErrorCode PetscOmpCtrlDestroyBarrier(PetscOmpCtrl ctrl)282: {
286: /* this MPI_Barrier is to make sure slaves have finished using the omp barrier before master destroys it */
287: MPI_Barrier(ctrl->omp_comm);
288: if (ctrl->is_omp_master) { pthread_barrier_destroy(ctrl->barrier); }
290: #if defined(USE_MMAP_ALLOCATE_SHARED_MEMORY) && defined(PETSC_HAVE_MMAP)
291: munmap(ctrl->barrier,sizeof(pthread_barrier_t));
292: #else
293: MPI_Win_free(&ctrl->omp_win);
294: #endif
295: return(0);
296: }
298: /*@C
299: PetscOmpCtrlCreate - create a PETSc OpenMP controller, which manages PETSc's interaction with third party libraries using OpenMP
301: Input Parameter:
302: + petsc_comm - a communicator some PETSc object (for example, a matrix) lives in
303: - nthreads - number of threads per MPI rank to spawn in a library using OpenMP. If nthreads = -1, let PETSc decide a suitable value
305: Output Parameter:
306: . pctrl - a PETSc OpenMP controller
308: Level: developer
310: TODO: Possibly use the variable PetscNumOMPThreads to determine the number for threads to use
312: .seealso PetscOmpCtrlDestroy()
313: @*/
314: PetscErrorCodePetscOmpCtrlCreate(MPI_Comm petsc_comm,PetscInt nthreads,PetscOmpCtrl *pctrl)315: {
316: PetscErrorCode ierr;
317: PetscOmpCtrl ctrl;
318: unsigned long *cpu_ulongs=NULL;
319: PetscInt i,nr_cpu_ulongs;
320: PetscShmComm pshmcomm;
321: MPI_Comm shm_comm;
322: PetscMPIInt shm_rank,shm_comm_size,omp_rank,color;
323: PetscInt num_packages,num_cores;
326: PetscNew(&ctrl);
328: /*=================================================================================
329: Init hwloc
330: ==================================================================================*/
331: hwloc_topology_init(&ctrl->topology);
332: #if HWLOC_API_VERSION >= 0x00020000
333: /* to filter out unneeded info and have faster hwloc_topology_load */
334: hwloc_topology_set_all_types_filter(ctrl->topology,HWLOC_TYPE_FILTER_KEEP_NONE);
335: hwloc_topology_set_type_filter(ctrl->topology,HWLOC_OBJ_CORE,HWLOC_TYPE_FILTER_KEEP_ALL);
336: #endif
337: hwloc_topology_load(ctrl->topology);
339: /*=================================================================================
340: Split petsc_comm into multiple omp_comms. Ranks in an omp_comm have access to
341: physically shared memory. Rank 0 of each omp_comm is called an OMP master, and
342: others are called slaves. OMP Masters make up a new comm called omp_master_comm,
343: which is usually passed to third party libraries.
344: ==================================================================================*/
346: /* fetch the stored shared memory communicator */
347: PetscShmCommGet(petsc_comm,&pshmcomm);
348: PetscShmCommGetMpiShmComm(pshmcomm,&shm_comm);
350: MPI_Comm_rank(shm_comm,&shm_rank);
351: MPI_Comm_size(shm_comm,&shm_comm_size);
353: /* PETSc decides nthreads, which is the smaller of shm_comm_size or cores per package(socket) */
354: if (nthreads == -1) {
355: num_packages = hwloc_get_nbobjs_by_type(ctrl->topology,HWLOC_OBJ_PACKAGE) <= 0 ? 1 : hwloc_get_nbobjs_by_type(ctrl->topology,HWLOC_OBJ_PACKAGE);
356: num_cores = hwloc_get_nbobjs_by_type(ctrl->topology,HWLOC_OBJ_CORE) <= 0 ? 1 : hwloc_get_nbobjs_by_type(ctrl->topology,HWLOC_OBJ_CORE);
357: nthreads = num_cores/num_packages;
358: if (nthreads > shm_comm_size) nthreads = shm_comm_size;
359: }
361: if (nthreads < 1 || nthreads > shm_comm_size) SETERRQ2(petsc_comm,PETSC_ERR_ARG_OUTOFRANGE,"number of OpenMP threads %D can not be < 1 or > the MPI shared memory communicator size %d\n",nthreads,shm_comm_size);
362: if (shm_comm_size % nthreads) { PetscPrintf(petsc_comm,"Warning: number of OpenMP threads %D is not a factor of the MPI shared memory communicator size %d, which may cause load-imbalance!\n",nthreads,shm_comm_size); }
364: /* split shm_comm into a set of omp_comms with each of size nthreads. Ex., if
365: shm_comm_size=16, nthreads=8, then ranks 0~7 get color 0 and ranks 8~15 get
366: color 1. They are put in two omp_comms. Note that petsc_ranks may or may not
367: be consecutive in a shm_comm, but shm_ranks always run from 0 to shm_comm_size-1.
368: Use 0 as key so that rank ordering wont change in new comm.
369: */
370: color = shm_rank / nthreads;
371: MPI_Comm_split(shm_comm,color,0/*key*/,&ctrl->omp_comm);
373: /* put rank 0's in omp_comms (i.e., master ranks) into a new comm - omp_master_comm */
374: MPI_Comm_rank(ctrl->omp_comm,&omp_rank);
375: if (!omp_rank) {
376: ctrl->is_omp_master = PETSC_TRUE; /* master */
377: color = 0;
378: } else {
379: ctrl->is_omp_master = PETSC_FALSE; /* slave */
380: color = MPI_UNDEFINED; /* to make slaves get omp_master_comm = MPI_COMM_NULL in MPI_Comm_split */
381: }
382: MPI_Comm_split(petsc_comm,color,0/*key*/,&ctrl->omp_master_comm); /* rank 0 in omp_master_comm is rank 0 in petsc_comm */
384: /*=================================================================================
385: Each omp_comm has a pthread_barrier_t in its shared memory, which is used to put
386: slave ranks in sleep and idle their CPU, so that the master can fork OMP threads
387: and run them on the idle CPUs.
388: ==================================================================================*/
389: PetscOmpCtrlCreateBarrier(ctrl);
391: /*=================================================================================
392: omp master logs its cpu binding (i.e., cpu set) and computes a new binding that
393: is the union of the bindings of all ranks in the omp_comm
394: =================================================================================*/
396: ctrl->cpuset = hwloc_bitmap_alloc(); if (!ctrl->cpuset) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"hwloc_bitmap_alloc() failed\n");
397: hwloc_get_cpubind(ctrl->topology,ctrl->cpuset, HWLOC_CPUBIND_PROCESS);
399: /* hwloc main developer said they will add new APIs hwloc_bitmap_{nr,to,from}_ulongs in 2.1 to help us simplify the following bitmap pack/unpack code */
400: nr_cpu_ulongs = (hwloc_bitmap_last(hwloc_topology_get_topology_cpuset (ctrl->topology))+sizeof(unsigned long)*8)/sizeof(unsigned long)/8;
401: PetscMalloc1(nr_cpu_ulongs,&cpu_ulongs);
402: if (nr_cpu_ulongs == 1) {
403: cpu_ulongs[0] = hwloc_bitmap_to_ulong(ctrl->cpuset);
404: } else {
405: for (i=0; i<nr_cpu_ulongs; i++) cpu_ulongs[i] = hwloc_bitmap_to_ith_ulong(ctrl->cpuset,(unsigned)i);
406: }
408: MPI_Reduce(ctrl->is_omp_master ? MPI_IN_PLACE : cpu_ulongs, cpu_ulongs,nr_cpu_ulongs, MPI_UNSIGNED_LONG,MPI_BOR,0,ctrl->omp_comm);
410: if (ctrl->is_omp_master) {
411: ctrl->omp_cpuset = hwloc_bitmap_alloc(); if (!ctrl->omp_cpuset) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_LIB,"hwloc_bitmap_alloc() failed\n");
412: if (nr_cpu_ulongs == 1) {
413: #if HWLOC_API_VERSION >= 0x00020000
414: hwloc_bitmap_from_ulong(ctrl->omp_cpuset,cpu_ulongs[0]);
415: #else
416: hwloc_bitmap_from_ulong(ctrl->omp_cpuset,cpu_ulongs[0]);
417: #endif
418: } else {
419: for (i=0; i<nr_cpu_ulongs; i++) {
420: #if HWLOC_API_VERSION >= 0x00020000
421: hwloc_bitmap_set_ith_ulong(ctrl->omp_cpuset,(unsigned)i,cpu_ulongs[i]);
422: #else
423: hwloc_bitmap_set_ith_ulong(ctrl->omp_cpuset,(unsigned)i,cpu_ulongs[i]);
424: #endif
425: }
426: }
427: }
429: PetscFree(cpu_ulongs);
430: *pctrl = ctrl;
431: return(0);
432: }
434: /*@C
435: PetscOmpCtrlDestroy - destroy the PETSc OpenMP controller
437: Input Parameter:
438: . pctrl - a PETSc OpenMP controller
440: Level: developer
442: .seealso PetscOmpCtrlCreate()
443: @*/
444: PetscErrorCodePetscOmpCtrlDestroy(PetscOmpCtrl *pctrl)445: {
446: PetscErrorCode ierr;
447: PetscOmpCtrl ctrl = *pctrl;
450: hwloc_bitmap_free(ctrl->cpuset);
451: hwloc_topology_destroy(ctrl->topology);
452: PetscOmpCtrlDestroyBarrier(ctrl);
453: MPI_Comm_free(&ctrl->omp_comm);
454: if (ctrl->is_omp_master) {
455: hwloc_bitmap_free(ctrl->omp_cpuset);
456: MPI_Comm_free(&ctrl->omp_master_comm);
457: }
458: PetscFree(ctrl);
459: return(0);
460: }
462: /*@C
463: PetscOmpCtrlGetOmpComms - Get MPI communicators from a PETSc OMP controller
465: Input Parameter:
466: . ctrl - a PETSc OMP controller
468: Output Parameter:
469: + omp_comm - a communicator that includes a master rank and slave ranks where master spawns threads
470: . omp_master_comm - on master ranks, return a communicator that include master ranks of each omp_comm;
471: on slave ranks, MPI_COMM_NULL will be return in reality.
472: - is_omp_master - true if the calling process is an OMP master rank.
474: Notes: any output parameter can be NULL. The parameter is just ignored.
476: Level: developer
477: @*/
478: PetscErrorCodePetscOmpCtrlGetOmpComms(PetscOmpCtrl ctrl,MPI_Comm *omp_comm,MPI_Comm *omp_master_comm,PetscBool *is_omp_master)479: {
481: if (omp_comm) *omp_comm = ctrl->omp_comm;
482: if (omp_master_comm) *omp_master_comm = ctrl->omp_master_comm;
483: if (is_omp_master) *is_omp_master = ctrl->is_omp_master;
484: return(0);
485: }
487: /*@C
488: PetscOmpCtrlBarrier - Do barrier on MPI ranks in omp_comm contained by the PETSc OMP controller (to let slave ranks free their CPU)
490: Input Parameter:
491: . ctrl - a PETSc OMP controller
493: Notes:
494: this is a pthread barrier on MPI processes. Using MPI_Barrier instead is conceptually correct. But MPI standard does not
495: require processes blocked by MPI_Barrier free their CPUs to let other processes progress. In practice, to minilize latency,
496: MPI processes stuck in MPI_Barrier keep polling and do not free CPUs. In contrast, pthread_barrier has this requirement.
498: A code using PetscOmpCtrlBarrier() would be like this,
500: if (is_omp_master) {
501: PetscOmpCtrlOmpRegionOnMasterBegin(ctrl);
502: Call the library using OpenMP
503: PetscOmpCtrlOmpRegionOnMasterEnd(ctrl);
504: }
505: PetscOmpCtrlBarrier(ctrl);
507: Level: developer
509: .seealso PetscOmpCtrlOmpRegionOnMasterBegin(), PetscOmpCtrlOmpRegionOnMasterEnd()
510: @*/
511: PetscErrorCodePetscOmpCtrlBarrier(PetscOmpCtrl ctrl)512: {
516: pthread_barrier_wait(ctrl->barrier);
517: if (ierr && ierr != PTHREAD_BARRIER_SERIAL_THREAD) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"pthread_barrier_wait failed within PetscOmpCtrlBarrier with return code %D\n", ierr);
518: return(0);
519: }
521: /*@C
522: PetscOmpCtrlOmpRegionOnMasterBegin - Mark the beginning of an OpenMP library call on master ranks
524: Input Parameter:
525: . ctrl - a PETSc OMP controller
527: Notes:
528: Only master ranks can call this function. Call PetscOmpCtrlGetOmpComms() to know if this is a master rank.
529: This function changes CPU binding of master ranks and nthreads-var of OpenMP runtime
531: Level: developer
533: .seealso: PetscOmpCtrlOmpRegionOnMasterEnd()
534: @*/
535: PetscErrorCodePetscOmpCtrlOmpRegionOnMasterBegin(PetscOmpCtrl ctrl)536: {
540: hwloc_set_cpubind(ctrl->topology,ctrl->omp_cpuset,HWLOC_CPUBIND_PROCESS);
541: omp_set_num_threads(ctrl->omp_comm_size); /* may override the OMP_NUM_THREAD env var */
542: return(0);
543: }
545: /*@C
546: PetscOmpCtrlOmpRegionOnMasterEnd - Mark the end of an OpenMP library call on master ranks
548: Input Parameter:
549: . ctrl - a PETSc OMP controller
551: Notes:
552: Only master ranks can call this function. Call PetscOmpCtrlGetOmpComms() to know if this is a master rank.
553: This function restores the CPU binding of master ranks and set and nthreads-var of OpenMP runtime to 1.
555: Level: developer
557: .seealso: PetscOmpCtrlOmpRegionOnMasterBegin()
558: @*/
559: PetscErrorCodePetscOmpCtrlOmpRegionOnMasterEnd(PetscOmpCtrl ctrl)560: {
564: hwloc_set_cpubind(ctrl->topology,ctrl->cpuset,HWLOC_CPUBIND_PROCESS);
565: omp_set_num_threads(1);
566: return(0);
567: }
569: #undef USE_MMAP_ALLOCATE_SHARED_MEMORY570: #endif /* defined(PETSC_HAVE_OPENMP_SUPPORT) */