Actual source code: mpishm.c
1: #include <petscsys.h>
2: #include <petsc/private/petscimpl.h>
4: struct _n_PetscShmComm {
5: PetscMPIInt *globranks; /* global ranks of each rank in the shared memory communicator */
6: PetscMPIInt shmsize; /* size of the shared memory communicator */
7: MPI_Comm globcomm,shmcomm; /* global communicator and shared memory communicator (a sub-communicator of the former) */
8: };
10: /*
11: Private routine to delete internal shared memory communicator when a communicator is freed.
13: This is called by MPI, not by users. This is called by MPI_Comm_free() when the communicator that has this data as an attribute is freed.
15: Note: this is declared extern "C" because it is passed to MPI_Comm_create_keyval()
17: */
18: PETSC_EXTERN PetscMPIInt MPIAPI Petsc_ShmComm_Attr_Delete_Fn(MPI_Comm comm,PetscMPIInt keyval,void *val,void *extra_state)
19: {
20: PetscShmComm p = (PetscShmComm)val;
22: PetscInfo(NULL,"Deleting shared memory subcommunicator in a MPI_Comm %ld\n",(long)comm);
23: MPI_Comm_free(&p->shmcomm);
24: PetscFree(p->globranks);
25: PetscFree(val);
26: return MPI_SUCCESS;
27: }
29: #ifdef PETSC_HAVE_MPI_PROCESS_SHARED_MEMORY
30: /* Data structures to support freeing comms created in PetscShmCommGet().
31: Since we predict communicators passed to PetscShmCommGet() are very likely
32: either a petsc inner communicator or an MPI communicator with a linked petsc
33: inner communicator, we use a simple static array to store dupped communicators
34: on rare cases otherwise.
35: */
36: #define MAX_SHMCOMM_DUPPED_COMMS 16
37: static PetscInt num_dupped_comms=0;
38: static MPI_Comm shmcomm_dupped_comms[MAX_SHMCOMM_DUPPED_COMMS];
39: static PetscErrorCode PetscShmCommDestroyDuppedComms(void)
40: {
41: PetscInt i;
42: for (i=0; i<num_dupped_comms; i++) PetscCommDestroy(&shmcomm_dupped_comms[i]);
43: num_dupped_comms = 0; /* reset so that PETSc can be reinitialized */
44: return 0;
45: }
46: #endif
48: /*@C
49: PetscShmCommGet - Given a communicator returns a sub-communicator of all ranks that share a common memory
51: Collective.
53: Input Parameter:
54: . globcomm - MPI_Comm, which can be a user MPI_Comm or a PETSc inner MPI_Comm
56: Output Parameter:
57: . pshmcomm - the PETSc shared memory communicator object
59: Level: developer
61: Notes:
62: When used with MPICH, MPICH must be configured with --download-mpich-device=ch3:nemesis
64: @*/
65: PetscErrorCode PetscShmCommGet(MPI_Comm globcomm,PetscShmComm *pshmcomm)
66: {
67: #ifdef PETSC_HAVE_MPI_PROCESS_SHARED_MEMORY
68: MPI_Group globgroup,shmgroup;
69: PetscMPIInt *shmranks,i,flg;
70: PetscCommCounter *counter;
73: /* Get a petsc inner comm, since we always want to stash pshmcomm on petsc inner comms */
74: MPI_Comm_get_attr(globcomm,Petsc_Counter_keyval,&counter,&flg);
75: if (!flg) { /* globcomm is not a petsc comm */
76: union {MPI_Comm comm; void *ptr;} ucomm;
77: /* check if globcomm already has a linked petsc inner comm */
78: MPI_Comm_get_attr(globcomm,Petsc_InnerComm_keyval,&ucomm,&flg);
79: if (!flg) {
80: /* globcomm does not have a linked petsc inner comm, so we create one and replace globcomm with it */
82: PetscCommDuplicate(globcomm,&globcomm,NULL);
83: /* Register a function to free the dupped petsc comms at PetscFinalize at the first time */
84: if (num_dupped_comms == 0) PetscRegisterFinalize(PetscShmCommDestroyDuppedComms);
85: shmcomm_dupped_comms[num_dupped_comms] = globcomm;
86: num_dupped_comms++;
87: } else {
88: /* otherwise, we pull out the inner comm and use it as globcomm */
89: globcomm = ucomm.comm;
90: }
91: }
93: /* Check if globcomm already has an attached pshmcomm. If no, create one */
94: MPI_Comm_get_attr(globcomm,Petsc_ShmComm_keyval,pshmcomm,&flg);
95: if (flg) return 0;
97: PetscNew(pshmcomm);
98: (*pshmcomm)->globcomm = globcomm;
100: MPI_Comm_split_type(globcomm, MPI_COMM_TYPE_SHARED,0, MPI_INFO_NULL,&(*pshmcomm)->shmcomm);
102: MPI_Comm_size((*pshmcomm)->shmcomm,&(*pshmcomm)->shmsize);
103: MPI_Comm_group(globcomm, &globgroup);
104: MPI_Comm_group((*pshmcomm)->shmcomm, &shmgroup);
105: PetscMalloc1((*pshmcomm)->shmsize,&shmranks);
106: PetscMalloc1((*pshmcomm)->shmsize,&(*pshmcomm)->globranks);
107: for (i=0; i<(*pshmcomm)->shmsize; i++) shmranks[i] = i;
108: MPI_Group_translate_ranks(shmgroup, (*pshmcomm)->shmsize, shmranks, globgroup, (*pshmcomm)->globranks);
109: PetscFree(shmranks);
110: MPI_Group_free(&globgroup);
111: MPI_Group_free(&shmgroup);
113: for (i=0; i<(*pshmcomm)->shmsize; i++) {
114: PetscInfo(NULL,"Shared memory rank %d global rank %d\n",i,(*pshmcomm)->globranks[i]);
115: }
116: MPI_Comm_set_attr(globcomm,Petsc_ShmComm_keyval,*pshmcomm);
117: return 0;
118: #else
119: SETERRQ(globcomm, PETSC_ERR_SUP, "Shared memory communicators need MPI-3 package support.\nPlease upgrade your MPI or reconfigure with --download-mpich.");
120: #endif
121: }
123: /*@C
124: PetscShmCommGlobalToLocal - Given a global rank returns the local rank in the shared memory communicator
126: Input Parameters:
127: + pshmcomm - the shared memory communicator object
128: - grank - the global rank
130: Output Parameter:
131: . lrank - the local rank, or MPI_PROC_NULL if it does not exist
133: Level: developer
135: Developer Notes:
136: Assumes the pshmcomm->globranks[] is sorted
138: It may be better to rewrite this to map multiple global ranks to local in the same function call
140: @*/
141: PetscErrorCode PetscShmCommGlobalToLocal(PetscShmComm pshmcomm,PetscMPIInt grank,PetscMPIInt *lrank)
142: {
143: PetscMPIInt low,high,t,i;
144: PetscBool flg = PETSC_FALSE;
148: *lrank = MPI_PROC_NULL;
149: if (grank < pshmcomm->globranks[0]) return 0;
150: if (grank > pshmcomm->globranks[pshmcomm->shmsize-1]) return 0;
151: PetscOptionsGetBool(NULL,NULL,"-noshared",&flg,NULL);
152: if (flg) return 0;
153: low = 0;
154: high = pshmcomm->shmsize;
155: while (high-low > 5) {
156: t = (low+high)/2;
157: if (pshmcomm->globranks[t] > grank) high = t;
158: else low = t;
159: }
160: for (i=low; i<high; i++) {
161: if (pshmcomm->globranks[i] > grank) return 0;
162: if (pshmcomm->globranks[i] == grank) {
163: *lrank = i;
164: return 0;
165: }
166: }
167: return 0;
168: }
170: /*@C
171: PetscShmCommLocalToGlobal - Given a local rank in the shared memory communicator returns the global rank
173: Input Parameters:
174: + pshmcomm - the shared memory communicator object
175: - lrank - the local rank in the shared memory communicator
177: Output Parameter:
178: . grank - the global rank in the global communicator where the shared memory communicator is built
180: Level: developer
182: @*/
183: PetscErrorCode PetscShmCommLocalToGlobal(PetscShmComm pshmcomm,PetscMPIInt lrank,PetscMPIInt *grank)
184: {
188: *grank = pshmcomm->globranks[lrank];
189: return 0;
190: }
192: /*@C
193: PetscShmCommGetMpiShmComm - Returns the MPI communicator that represents all processes with common shared memory
195: Input Parameter:
196: . pshmcomm - PetscShmComm object obtained with PetscShmCommGet()
198: Output Parameter:
199: . comm - the MPI communicator
201: Level: developer
203: @*/
204: PetscErrorCode PetscShmCommGetMpiShmComm(PetscShmComm pshmcomm,MPI_Comm *comm)
205: {
208: *comm = pshmcomm->shmcomm;
209: return 0;
210: }
212: #if defined(PETSC_HAVE_OPENMP_SUPPORT)
213: #include <pthread.h>
214: #include <hwloc.h>
215: #include <omp.h>
217: /* Use mmap() to allocate shared mmeory (for the pthread_barrier_t object) if it is available,
218: otherwise use MPI_Win_allocate_shared. They should have the same effect except MPI-3 is much
219: simpler to use. However, on a Cori Haswell node with Cray MPI, MPI-3 worsened a test's performance
220: by 50%. Until the reason is found out, we use mmap() instead.
221: */
222: #define USE_MMAP_ALLOCATE_SHARED_MEMORY
224: #if defined(USE_MMAP_ALLOCATE_SHARED_MEMORY) && defined(PETSC_HAVE_MMAP)
225: #include <sys/mman.h>
226: #include <sys/types.h>
227: #include <sys/stat.h>
228: #include <fcntl.h>
229: #endif
231: struct _n_PetscOmpCtrl {
232: MPI_Comm omp_comm; /* a shared memory communicator to spawn omp threads */
233: MPI_Comm omp_master_comm; /* a communicator to give to third party libraries */
234: PetscMPIInt omp_comm_size; /* size of omp_comm, a kind of OMP_NUM_THREADS */
235: PetscBool is_omp_master; /* rank 0's in omp_comm */
236: MPI_Win omp_win; /* a shared memory window containing a barrier */
237: pthread_barrier_t *barrier; /* pointer to the barrier */
238: hwloc_topology_t topology;
239: hwloc_cpuset_t cpuset; /* cpu bindings of omp master */
240: hwloc_cpuset_t omp_cpuset; /* union of cpu bindings of ranks in omp_comm */
241: };
243: /* Allocate and initialize a pthread_barrier_t object in memory shared by processes in omp_comm
244: contained by the controller.
246: PETSc OpenMP controller users do not call this function directly. This function exists
247: only because we want to separate shared memory allocation methods from other code.
248: */
249: static inline PetscErrorCode PetscOmpCtrlCreateBarrier(PetscOmpCtrl ctrl)
250: {
251: MPI_Aint size;
252: void *baseptr;
253: pthread_barrierattr_t attr;
255: #if defined(USE_MMAP_ALLOCATE_SHARED_MEMORY) && defined(PETSC_HAVE_MMAP)
256: PetscInt fd;
257: PetscChar pathname[PETSC_MAX_PATH_LEN];
258: #else
259: PetscMPIInt disp_unit;
260: #endif
262: #if defined(USE_MMAP_ALLOCATE_SHARED_MEMORY) && defined(PETSC_HAVE_MMAP)
263: size = sizeof(pthread_barrier_t);
264: if (ctrl->is_omp_master) {
265: /* use PETSC_COMM_SELF in PetscGetTmp, since it is a collective call. Using omp_comm would otherwise bcast the partially populated pathname to slaves */
266: PetscGetTmp(PETSC_COMM_SELF,pathname,PETSC_MAX_PATH_LEN);
267: PetscStrlcat(pathname,"/petsc-shm-XXXXXX",PETSC_MAX_PATH_LEN);
268: /* mkstemp replaces XXXXXX with a unique file name and opens the file for us */
270: ftruncate(fd,size);
272: close(fd);
273: MPI_Bcast(pathname,PETSC_MAX_PATH_LEN,MPI_CHAR,0,ctrl->omp_comm);
274: /* this MPI_Barrier is to wait slaves to open the file before master unlinks it */
275: MPI_Barrier(ctrl->omp_comm);
276: unlink(pathname);
277: } else {
278: MPI_Bcast(pathname,PETSC_MAX_PATH_LEN,MPI_CHAR,0,ctrl->omp_comm);
281: close(fd);
282: MPI_Barrier(ctrl->omp_comm);
283: }
284: #else
285: size = ctrl->is_omp_master ? sizeof(pthread_barrier_t) : 0;
286: MPI_Win_allocate_shared(size,1,MPI_INFO_NULL,ctrl->omp_comm,&baseptr,&ctrl->omp_win);
287: MPI_Win_shared_query(ctrl->omp_win,0,&size,&disp_unit,&baseptr);
288: #endif
289: ctrl->barrier = (pthread_barrier_t*)baseptr;
291: /* omp master initializes the barrier */
292: if (ctrl->is_omp_master) {
293: MPI_Comm_size(ctrl->omp_comm,&ctrl->omp_comm_size);
294: pthread_barrierattr_init(&attr);
295: pthread_barrierattr_setpshared(&attr,PTHREAD_PROCESS_SHARED); /* make the barrier also work for processes */
296: pthread_barrier_init(ctrl->barrier,&attr,(unsigned int)ctrl->omp_comm_size);
297: pthread_barrierattr_destroy(&attr);
298: }
300: /* this MPI_Barrier is to make sure the omp barrier is initialized before slaves use it */
301: MPI_Barrier(ctrl->omp_comm);
302: return 0;
303: }
305: /* Destroy the pthread barrier in the PETSc OpenMP controller */
306: static inline PetscErrorCode PetscOmpCtrlDestroyBarrier(PetscOmpCtrl ctrl)
307: {
308: /* this MPI_Barrier is to make sure slaves have finished using the omp barrier before master destroys it */
309: MPI_Barrier(ctrl->omp_comm);
310: if (ctrl->is_omp_master) pthread_barrier_destroy(ctrl->barrier);
312: #if defined(USE_MMAP_ALLOCATE_SHARED_MEMORY) && defined(PETSC_HAVE_MMAP)
313: munmap(ctrl->barrier,sizeof(pthread_barrier_t));
314: #else
315: MPI_Win_free(&ctrl->omp_win);
316: #endif
317: return 0;
318: }
320: /*@C
321: PetscOmpCtrlCreate - create a PETSc OpenMP controller, which manages PETSc's interaction with third party libraries using OpenMP
323: Input Parameters:
324: + petsc_comm - a communicator some PETSc object (for example, a matrix) lives in
325: - nthreads - number of threads per MPI rank to spawn in a library using OpenMP. If nthreads = -1, let PETSc decide a suitable value
327: Output Parameter:
328: . pctrl - a PETSc OpenMP controller
330: Level: developer
332: TODO: Possibly use the variable PetscNumOMPThreads to determine the number for threads to use
334: .seealso PetscOmpCtrlDestroy()
335: @*/
336: PetscErrorCode PetscOmpCtrlCreate(MPI_Comm petsc_comm,PetscInt nthreads,PetscOmpCtrl *pctrl)
337: {
338: PetscOmpCtrl ctrl;
339: unsigned long *cpu_ulongs = NULL;
340: PetscInt i,nr_cpu_ulongs;
341: PetscShmComm pshmcomm;
342: MPI_Comm shm_comm;
343: PetscMPIInt shm_rank,shm_comm_size,omp_rank,color;
344: PetscInt num_packages,num_cores;
346: PetscNew(&ctrl);
348: /*=================================================================================
349: Init hwloc
350: ==================================================================================*/
351: hwloc_topology_init(&ctrl->topology);
352: #if HWLOC_API_VERSION >= 0x00020000
353: /* to filter out unneeded info and have faster hwloc_topology_load */
354: hwloc_topology_set_all_types_filter(ctrl->topology,HWLOC_TYPE_FILTER_KEEP_NONE);
355: hwloc_topology_set_type_filter(ctrl->topology,HWLOC_OBJ_CORE,HWLOC_TYPE_FILTER_KEEP_ALL);
356: #endif
357: hwloc_topology_load(ctrl->topology);
359: /*=================================================================================
360: Split petsc_comm into multiple omp_comms. Ranks in an omp_comm have access to
361: physically shared memory. Rank 0 of each omp_comm is called an OMP master, and
362: others are called slaves. OMP Masters make up a new comm called omp_master_comm,
363: which is usually passed to third party libraries.
364: ==================================================================================*/
366: /* fetch the stored shared memory communicator */
367: PetscShmCommGet(petsc_comm,&pshmcomm);
368: PetscShmCommGetMpiShmComm(pshmcomm,&shm_comm);
370: MPI_Comm_rank(shm_comm,&shm_rank);
371: MPI_Comm_size(shm_comm,&shm_comm_size);
373: /* PETSc decides nthreads, which is the smaller of shm_comm_size or cores per package(socket) */
374: if (nthreads == -1) {
375: num_packages = hwloc_get_nbobjs_by_type(ctrl->topology,HWLOC_OBJ_PACKAGE) <= 0 ? 1 : hwloc_get_nbobjs_by_type(ctrl->topology,HWLOC_OBJ_PACKAGE);
376: num_cores = hwloc_get_nbobjs_by_type(ctrl->topology,HWLOC_OBJ_CORE) <= 0 ? 1 : hwloc_get_nbobjs_by_type(ctrl->topology,HWLOC_OBJ_CORE);
377: nthreads = num_cores/num_packages;
378: if (nthreads > shm_comm_size) nthreads = shm_comm_size;
379: }
382: if (shm_comm_size % nthreads) PetscPrintf(petsc_comm,"Warning: number of OpenMP threads %" PetscInt_FMT " is not a factor of the MPI shared memory communicator size %d, which may cause load-imbalance!\n",nthreads,shm_comm_size);
384: /* split shm_comm into a set of omp_comms with each of size nthreads. Ex., if
385: shm_comm_size=16, nthreads=8, then ranks 0~7 get color 0 and ranks 8~15 get
386: color 1. They are put in two omp_comms. Note that petsc_ranks may or may not
387: be consecutive in a shm_comm, but shm_ranks always run from 0 to shm_comm_size-1.
388: Use 0 as key so that rank ordering wont change in new comm.
389: */
390: color = shm_rank / nthreads;
391: MPI_Comm_split(shm_comm,color,0/*key*/,&ctrl->omp_comm);
393: /* put rank 0's in omp_comms (i.e., master ranks) into a new comm - omp_master_comm */
394: MPI_Comm_rank(ctrl->omp_comm,&omp_rank);
395: if (!omp_rank) {
396: ctrl->is_omp_master = PETSC_TRUE; /* master */
397: color = 0;
398: } else {
399: ctrl->is_omp_master = PETSC_FALSE; /* slave */
400: color = MPI_UNDEFINED; /* to make slaves get omp_master_comm = MPI_COMM_NULL in MPI_Comm_split */
401: }
402: MPI_Comm_split(petsc_comm,color,0/*key*/,&ctrl->omp_master_comm);
404: /*=================================================================================
405: Each omp_comm has a pthread_barrier_t in its shared memory, which is used to put
406: slave ranks in sleep and idle their CPU, so that the master can fork OMP threads
407: and run them on the idle CPUs.
408: ==================================================================================*/
409: PetscOmpCtrlCreateBarrier(ctrl);
411: /*=================================================================================
412: omp master logs its cpu binding (i.e., cpu set) and computes a new binding that
413: is the union of the bindings of all ranks in the omp_comm
414: =================================================================================*/
417: hwloc_get_cpubind(ctrl->topology,ctrl->cpuset, HWLOC_CPUBIND_PROCESS);
419: /* hwloc main developer said they will add new APIs hwloc_bitmap_{nr,to,from}_ulongs in 2.1 to help us simplify the following bitmap pack/unpack code */
420: nr_cpu_ulongs = (hwloc_bitmap_last(hwloc_topology_get_topology_cpuset (ctrl->topology))+sizeof(unsigned long)*8)/sizeof(unsigned long)/8;
421: PetscMalloc1(nr_cpu_ulongs,&cpu_ulongs);
422: if (nr_cpu_ulongs == 1) {
423: cpu_ulongs[0] = hwloc_bitmap_to_ulong(ctrl->cpuset);
424: } else {
425: for (i=0; i<nr_cpu_ulongs; i++) cpu_ulongs[i] = hwloc_bitmap_to_ith_ulong(ctrl->cpuset,(unsigned)i);
426: }
428: MPI_Reduce(ctrl->is_omp_master ? MPI_IN_PLACE : cpu_ulongs, cpu_ulongs,nr_cpu_ulongs, MPI_UNSIGNED_LONG,MPI_BOR,0,ctrl->omp_comm);
430: if (ctrl->is_omp_master) {
432: if (nr_cpu_ulongs == 1) {
433: #if HWLOC_API_VERSION >= 0x00020000
434: hwloc_bitmap_from_ulong(ctrl->omp_cpuset,cpu_ulongs[0]);
435: #else
436: hwloc_bitmap_from_ulong(ctrl->omp_cpuset,cpu_ulongs[0]);
437: #endif
438: } else {
439: for (i=0; i<nr_cpu_ulongs; i++) {
440: #if HWLOC_API_VERSION >= 0x00020000
441: hwloc_bitmap_set_ith_ulong(ctrl->omp_cpuset,(unsigned)i,cpu_ulongs[i]);
442: #else
443: hwloc_bitmap_set_ith_ulong(ctrl->omp_cpuset,(unsigned)i,cpu_ulongs[i]);
444: #endif
445: }
446: }
447: }
449: PetscFree(cpu_ulongs);
450: *pctrl = ctrl;
451: return 0;
452: }
454: /*@C
455: PetscOmpCtrlDestroy - destroy the PETSc OpenMP controller
457: Input Parameter:
458: . pctrl - a PETSc OpenMP controller
460: Level: developer
462: .seealso PetscOmpCtrlCreate()
463: @*/
464: PetscErrorCode PetscOmpCtrlDestroy(PetscOmpCtrl *pctrl)
465: {
466: PetscOmpCtrl ctrl = *pctrl;
468: hwloc_bitmap_free(ctrl->cpuset);
469: hwloc_topology_destroy(ctrl->topology);
470: PetscOmpCtrlDestroyBarrier(ctrl);
471: MPI_Comm_free(&ctrl->omp_comm);
472: if (ctrl->is_omp_master) {
473: hwloc_bitmap_free(ctrl->omp_cpuset);
474: MPI_Comm_free(&ctrl->omp_master_comm);
475: }
476: PetscFree(ctrl);
477: return 0;
478: }
480: /*@C
481: PetscOmpCtrlGetOmpComms - Get MPI communicators from a PETSc OMP controller
483: Input Parameter:
484: . ctrl - a PETSc OMP controller
486: Output Parameters:
487: + omp_comm - a communicator that includes a master rank and slave ranks where master spawns threads
488: . omp_master_comm - on master ranks, return a communicator that include master ranks of each omp_comm;
489: on slave ranks, MPI_COMM_NULL will be return in reality.
490: - is_omp_master - true if the calling process is an OMP master rank.
492: Notes: any output parameter can be NULL. The parameter is just ignored.
494: Level: developer
495: @*/
496: PetscErrorCode PetscOmpCtrlGetOmpComms(PetscOmpCtrl ctrl,MPI_Comm *omp_comm,MPI_Comm *omp_master_comm,PetscBool *is_omp_master)
497: {
498: if (omp_comm) *omp_comm = ctrl->omp_comm;
499: if (omp_master_comm) *omp_master_comm = ctrl->omp_master_comm;
500: if (is_omp_master) *is_omp_master = ctrl->is_omp_master;
501: return 0;
502: }
504: /*@C
505: PetscOmpCtrlBarrier - Do barrier on MPI ranks in omp_comm contained by the PETSc OMP controller (to let slave ranks free their CPU)
507: Input Parameter:
508: . ctrl - a PETSc OMP controller
510: Notes:
511: this is a pthread barrier on MPI processes. Using MPI_Barrier instead is conceptually correct. But MPI standard does not
512: require processes blocked by MPI_Barrier free their CPUs to let other processes progress. In practice, to minilize latency,
513: MPI processes stuck in MPI_Barrier keep polling and do not free CPUs. In contrast, pthread_barrier has this requirement.
515: A code using PetscOmpCtrlBarrier() would be like this,
517: if (is_omp_master) {
518: PetscOmpCtrlOmpRegionOnMasterBegin(ctrl);
519: Call the library using OpenMP
520: PetscOmpCtrlOmpRegionOnMasterEnd(ctrl);
521: }
522: PetscOmpCtrlBarrier(ctrl);
524: Level: developer
526: .seealso PetscOmpCtrlOmpRegionOnMasterBegin(), PetscOmpCtrlOmpRegionOnMasterEnd()
527: @*/
528: PetscErrorCode PetscOmpCtrlBarrier(PetscOmpCtrl ctrl)
529: {
530: int err;
532: err = pthread_barrier_wait(ctrl->barrier);
534: return 0;
535: }
537: /*@C
538: PetscOmpCtrlOmpRegionOnMasterBegin - Mark the beginning of an OpenMP library call on master ranks
540: Input Parameter:
541: . ctrl - a PETSc OMP controller
543: Notes:
544: Only master ranks can call this function. Call PetscOmpCtrlGetOmpComms() to know if this is a master rank.
545: This function changes CPU binding of master ranks and nthreads-var of OpenMP runtime
547: Level: developer
549: .seealso: PetscOmpCtrlOmpRegionOnMasterEnd()
550: @*/
551: PetscErrorCode PetscOmpCtrlOmpRegionOnMasterBegin(PetscOmpCtrl ctrl)
552: {
553: hwloc_set_cpubind(ctrl->topology,ctrl->omp_cpuset,HWLOC_CPUBIND_PROCESS);
554: omp_set_num_threads(ctrl->omp_comm_size); /* may override the OMP_NUM_THREAD env var */
555: return 0;
556: }
558: /*@C
559: PetscOmpCtrlOmpRegionOnMasterEnd - Mark the end of an OpenMP library call on master ranks
561: Input Parameter:
562: . ctrl - a PETSc OMP controller
564: Notes:
565: Only master ranks can call this function. Call PetscOmpCtrlGetOmpComms() to know if this is a master rank.
566: This function restores the CPU binding of master ranks and set and nthreads-var of OpenMP runtime to 1.
568: Level: developer
570: .seealso: PetscOmpCtrlOmpRegionOnMasterBegin()
571: @*/
572: PetscErrorCode PetscOmpCtrlOmpRegionOnMasterEnd(PetscOmpCtrl ctrl)
573: {
574: hwloc_set_cpubind(ctrl->topology,ctrl->cpuset,HWLOC_CPUBIND_PROCESS);
575: omp_set_num_threads(1);
576: return 0;
577: }
579: #undef USE_MMAP_ALLOCATE_SHARED_MEMORY
580: #endif /* defined(PETSC_HAVE_OPENMP_SUPPORT) */