Actual source code: threadcomm.c
petsc-3.4.5 2014-06-29
1: #include <petsc-private/threadcommimpl.h> /*I "petscthreadcomm.h" I*/
2: #include <petscviewer.h>
3: #if defined(PETSC_HAVE_MALLOC_H)
4: #include <malloc.h>
5: #endif
7: static PetscInt N_CORES = -1;
8: PetscBool PetscThreadCommRegisterAllCalled = PETSC_FALSE;
9: PetscFunctionList PetscThreadCommList = NULL;
10: PetscMPIInt Petsc_ThreadComm_keyval = MPI_KEYVAL_INVALID;
11: PetscThreadCommJobQueue PetscJobQueue = NULL;
12: PetscThreadComm PETSC_THREAD_COMM_WORLD = NULL;
14: /* Logging support */
15: PetscLogEvent ThreadComm_RunKernel, ThreadComm_Barrier;
17: static PetscErrorCode PetscThreadCommRunKernel0_Private(PetscThreadComm tcomm,PetscErrorCode (*func)(PetscInt,...));
21: /*@
22: PetscGetNCores - Gets the number of available cores on the system
24: Not Collective
26: Level: developer
28: Notes
29: Defaults to 1 if the available core count cannot be found
31: @*/
32: PetscErrorCode PetscGetNCores(PetscInt *ncores)
33: {
35: if (N_CORES == -1) {
36: N_CORES = 1; /* Default value if number of cores cannot be found out */
38: #if defined(PETSC_HAVE_SYS_SYSINFO_H) && (PETSC_HAVE_GET_NPROCS) /* Linux */
39: N_CORES = get_nprocs();
40: #elif defined(PETSC_HAVE_SYS_SYSCTL_H) && (PETSC_HAVE_SYSCTLBYNAME) /* MacOS, BSD */
41: {
43: size_t len = sizeof(N_CORES);
44: sysctlbyname("hw.activecpu",&N_CORES,&len,NULL,0); /* osx preferes activecpu over ncpu */
45: if (ierr) { /* freebsd check ncpu */
46: sysctlbyname("hw.ncpu",&N_CORES,&len,NULL,0);
47: /* continue even if there is an error */
48: }
49: }
50: #elif defined(PETSC_HAVE_WINDOWS_H) /* Windows */
51: {
52: SYSTEM_INFO sysinfo;
53: GetSystemInfo(&sysinfo);
54: N_CORES = sysinfo.dwNumberOfProcessors;
55: }
56: #endif
57: }
58: if (ncores) *ncores = N_CORES;
59: return(0);
60: }
62: PetscErrorCode PetscThreadCommWorldInitialize();
65: /*
66: PetscGetThreadCommWorld - Gets the global thread communicator.
67: Creates it if it does not exist already.
69: Not Collective
71: Output Parameters:
72: tcommp - pointer to the global thread communicator
74: Level: Intermediate
75: */
76: PetscErrorCode PetscGetThreadCommWorld(PetscThreadComm *tcommp)
77: {
81: if (!PETSC_THREAD_COMM_WORLD) {
82: PetscThreadCommWorldInitialize();
83: }
84: *tcommp = PETSC_THREAD_COMM_WORLD;
85: return(0);
86: }
90: /*@C
91: PetscCommGetThreadComm - Gets the thread communicator
92: associated with the MPI communicator
94: Not Collective
96: Input Parameters:
97: . comm - the MPI communicator
99: Output Parameters:
100: . tcommp - pointer to the thread communicator
102: Notes: If no thread communicator is on the MPI_Comm then the global thread communicator
103: is returned.
104: Level: Intermediate
106: .seealso: PetscThreadCommCreate(), PetscThreadCommDestroy()
107: @*/
108: PetscErrorCode PetscCommGetThreadComm(MPI_Comm comm,PetscThreadComm *tcommp)
109: {
111: PetscMPIInt flg;
112: void *ptr;
115: MPI_Attr_get(comm,Petsc_ThreadComm_keyval,(PetscThreadComm*)&ptr,&flg);
116: if (!flg) {
117: PetscGetThreadCommWorld(tcommp);
118: } else *tcommp = (PetscThreadComm)ptr;
119: return(0);
120: }
124: /*
125: PetscThreadCommCreate - Allocates a thread communicator object
127: Not Collective
129: Output Parameters:
130: . tcomm - pointer to the thread communicator object
132: Level: developer
134: .seealso: PetscThreadCommDestroy()
135: */
136: PetscErrorCode PetscThreadCommCreate(PetscThreadComm *tcomm)
137: {
138: PetscErrorCode ierr;
139: PetscThreadComm tcommout;
144: *tcomm = NULL;
146: PetscNew(struct _p_PetscThreadComm,&tcommout);
147: tcommout->refct = 0;
148: tcommout->nworkThreads = -1;
149: tcommout->affinities = NULL;
150: PetscNew(struct _PetscThreadCommOps,&tcommout->ops);
151: tcommout->leader = 0;
152: *tcomm = tcommout;
154: return(0);
155: }
157: #if defined(PETSC_USE_DEBUG)
159: static PetscErrorCode PetscThreadCommStackCreate_kernel(PetscInt trank)
160: {
161: if (trank && !PetscStackActive()) {
162: PetscStack *petscstack_in;
163: petscstack_in = (PetscStack*)malloc(sizeof(PetscStack));
164: petscstack_in->currentsize = 0;
165: PetscThreadLocalSetValue((PetscThreadKey*)&petscstack,petscstack_in);
166: }
167: return 0;
168: }
170: /* Creates stack frames for threads other than the main thread */
173: PetscErrorCode PetscThreadCommStackCreate(void)
174: {
176: PetscThreadCommRunKernel0(PETSC_COMM_SELF,(PetscThreadKernel)PetscThreadCommStackCreate_kernel);
177: PetscThreadCommBarrier(PETSC_COMM_SELF);
178: return 0;
179: }
181: static PetscErrorCode PetscThreadCommStackDestroy_kernel(PetscInt trank)
182: {
183: if (trank && PetscStackActive()) {
184: PetscStack *petscstack_in;
185: petscstack_in = (PetscStack*)PetscThreadLocalGetValue(petscstack);
186: free(petscstack_in);
187: PetscThreadLocalSetValue((PetscThreadKey*)&petscstack,(PetscStack*)0);
188: }
189: return 0;
190: }
194: /* Destroy stack frames for threads other than main thread
195: *
196: * The keyval may have been destroyed by the time this function is called, thus we must call
197: * PetscThreadCommRunKernel0_Private so that we never reference an MPI_Comm.
198: */
199: PetscErrorCode PetscThreadCommStackDestroy(void)
200: {
203: PetscThreadCommRunKernel0_Private(PETSC_THREAD_COMM_WORLD,(PetscThreadKernel)PetscThreadCommStackDestroy_kernel);
204: PETSC_THREAD_COMM_WORLD = NULL;
205: return(0);
206: return 0;
207: }
208: #else
211: PetscErrorCode PetscThreadCommStackCreate(void)
212: {
214: return(0);
215: }
219: PetscErrorCode PetscThreadCommStackDestroy(void)
220: {
222: PETSC_THREAD_COMM_WORLD = NULL;
223: return(0);
224: }
226: #endif
230: /*
231: PetscThreadCommDestroy - Frees a thread communicator object
233: Not Collective
235: Input Parameters:
236: . tcomm - the PetscThreadComm object
238: Level: developer
240: .seealso: PetscThreadCommCreate()
241: */
242: PetscErrorCode PetscThreadCommDestroy(PetscThreadComm *tcomm)
243: {
247: if (!*tcomm) return(0);
248: if (!--(*tcomm)->refct) {
249: PetscThreadCommStackDestroy();
250: /* Destroy the implementation specific data struct */
251: if ((*tcomm)->ops->destroy) (*(*tcomm)->ops->destroy)(*tcomm);
253: PetscFree((*tcomm)->affinities);
254: PetscFree((*tcomm)->ops);
255: PetscFree(PetscJobQueue->jobs[0].job_status);
256: PetscFree(PetscJobQueue->jobs);
257: PetscFree(PetscJobQueue);
258: PetscThreadCommReductionDestroy((*tcomm)->red);
259: PetscFree((*tcomm));
260: }
261: *tcomm = NULL;
262: return(0);
263: }
267: /*@C
268: PetscThreadCommView - view a thread communicator
270: Collective on comm
272: Input Parameters:
273: + comm - MPI communicator
274: - viewer - viewer to display, for example PETSC_VIEWER_STDOUT_WORLD
276: Level: developer
278: .seealso: PetscThreadCommCreate()
279: @*/
280: PetscErrorCode PetscThreadCommView(MPI_Comm comm,PetscViewer viewer)
281: {
282: PetscErrorCode ierr;
283: PetscBool iascii;
284: PetscThreadComm tcomm=0;
287: PetscCommGetThreadComm(comm,&tcomm);
288: if (!viewer) {PetscViewerASCIIGetStdout(comm,&viewer);}
289: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
290: if (iascii) {
291: PetscViewerASCIIPrintf(viewer,"Thread Communicator\n");
292: PetscViewerASCIIPushTab(viewer);
293: PetscViewerASCIIPrintf(viewer,"Number of threads = %D\n",tcomm->nworkThreads);
294: PetscViewerASCIIPrintf(viewer,"Type = %s\n",tcomm->type);
295: PetscViewerASCIIPopTab(viewer);
296: if (tcomm->ops->view) {
297: PetscViewerASCIIPushTab(viewer);
298: (*tcomm->ops->view)(tcomm,viewer);
299: PetscViewerASCIIPopTab(viewer);
300: }
301: }
302: return(0);
303: }
307: /*
308: PetscThreadCommSetNThreads - Set the thread count for the thread communicator
310: Not collective
312: Input Parameters:
313: + tcomm - the thread communicator
314: - nthreads - Number of threads
316: Options Database keys:
317: -threadcomm_nthreads <nthreads> Number of threads to use
319: Level: developer
321: Notes:
322: Defaults to using 1 thread.
324: Use nthreads = PETSC_DECIDE or -threadcomm_nthreads PETSC_DECIDE for PETSc to decide the number of threads.
327: .seealso: PetscThreadCommGetNThreads()
328: */
329: PetscErrorCode PetscThreadCommSetNThreads(PetscThreadComm tcomm,PetscInt nthreads)
330: {
332: PetscBool flg;
333: PetscInt nthr;
336: if (nthreads == PETSC_DECIDE) {
337: tcomm->nworkThreads = 1;
338: PetscOptionsBegin(PETSC_COMM_WORLD,NULL,"Thread comm - setting number of threads",NULL);
339: PetscOptionsInt("-threadcomm_nthreads","number of threads to use in the thread communicator","PetscThreadCommSetNThreads",1,&nthr,&flg);
340: PetscOptionsEnd();
341: if (flg) {
342: if (nthr == PETSC_DECIDE) tcomm->nworkThreads = N_CORES;
343: else tcomm->nworkThreads = nthr;
344: }
345: } else tcomm->nworkThreads = nthreads;
346: return(0);
347: }
351: /*@C
352: PetscThreadCommGetNThreads - Gets the thread count from the thread communicator
353: associated with the MPI communicator
355: Not collective
357: Input Parameters:
358: . comm - the MPI communicator
360: Output Parameters:
361: . nthreads - number of threads
363: Level: developer
365: .seealso: PetscThreadCommSetNThreads()
366: @*/
367: PetscErrorCode PetscThreadCommGetNThreads(MPI_Comm comm,PetscInt *nthreads)
368: {
369: PetscErrorCode ierr;
370: PetscThreadComm tcomm=0;
373: PetscCommGetThreadComm(comm,&tcomm);
374: *nthreads = tcomm->nworkThreads;
375: return(0);
376: }
380: /*
381: PetscThreadCommSetAffinities - Sets the core affinity for threads
382: (which threads run on which cores)
384: Not collective
386: Input Parameters:
387: + tcomm - the thread communicator
388: - affinities - array of core affinity for threads
390: Options Database keys:
391: . -threadcomm_affinities <list of thread affinities>
393: Level: developer
395: Notes:
396: Use affinities = NULL for PETSc to decide the affinities.
397: If PETSc decides affinities, then each thread has affinity to
398: a unique core with the main thread on Core 0, thread0 on core 1,
399: and so on. If the thread count is more the number of available
400: cores then multiple threads share a core.
402: The first value is the affinity for the main thread
404: The affinity list can be passed as
405: a comma seperated list: 0,1,2,3,4,5,6,7
406: a range (start-end+1): 0-8
407: a range with given increment (start-end+1:inc): 0-7:2
408: a combination of values and ranges seperated by commas: 0,1-8,8-15:2
410: There must be no intervening spaces between the values.
412: .seealso: PetscThreadCommGetAffinities(), PetscThreadCommSetNThreads()
413: */
414: PetscErrorCode PetscThreadCommSetAffinities(PetscThreadComm tcomm,const PetscInt affinities[])
415: {
417: PetscBool flg;
418: PetscInt nmax=tcomm->nworkThreads;
421: /* Free if affinities set already */
422: PetscFree(tcomm->affinities);
423: PetscMalloc(tcomm->nworkThreads*sizeof(PetscInt),&tcomm->affinities);
425: if (!affinities) {
426: /* Check if option is present in the options database */
427: PetscOptionsBegin(PETSC_COMM_WORLD,NULL,"Thread comm - setting thread affinities",NULL);
428: PetscOptionsIntArray("-threadcomm_affinities","Set core affinities of threads","PetscThreadCommSetAffinities",tcomm->affinities,&nmax,&flg);
429: PetscOptionsEnd();
430: if (flg) {
431: if (nmax != tcomm->nworkThreads) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Must set affinities for all threads, Threads = %D, Core affinities set = %D",tcomm->nworkThreads,nmax);
432: } else {
433: /* PETSc default affinities */
434: PetscInt i;
435: for (i=0; i<tcomm->nworkThreads; i++) tcomm->affinities[i] = i%N_CORES;
436: }
437: } else {
438: PetscMemcpy(tcomm->affinities,affinities,tcomm->nworkThreads*sizeof(PetscInt));
439: }
440: return(0);
441: }
445: /*@C
446: PetscThreadCommGetAffinities - Returns the core affinities set for the
447: thread communicator associated with the MPI_Comm
449: Not collective
451: Input Parameters:
452: . comm - MPI communicator
454: Output Parameters:
455: . affinities - thread affinities
457: Level: developer
459: Notes:
460: The user must allocate space (nthreads PetscInts) for the
461: affinities. Must call PetscThreadCommSetAffinities before.
463: */
464: PetscErrorCode PetscThreadCommGetAffinities(MPI_Comm comm,PetscInt affinities[])
465: {
466: PetscErrorCode ierr;
467: PetscThreadComm tcomm=0;
470: PetscCommGetThreadComm(comm,&tcomm);
472: PetscMemcpy(affinities,tcomm->affinities,tcomm->nworkThreads*sizeof(PetscInt));
473: return(0);
474: }
478: /*
479: PetscThreadCommSetType - Sets the threading model for the thread communicator
481: Logically collective
483: Input Parameters:
484: + tcomm - the thread communicator
485: - type - the type of thread model needed
488: Options Database keys:
489: -threadcomm_type <type>
491: Available types
492: See "petsc/include/petscthreadcomm.h" for available types
494: */
495: PetscErrorCode PetscThreadCommSetType(PetscThreadComm tcomm,PetscThreadCommType type)
496: {
497: PetscErrorCode ierr,(*r)(PetscThreadComm);
498: char ttype[256];
499: PetscBool flg;
503: if (!PetscThreadCommRegisterAllCalled) { PetscThreadCommRegisterAll();}
505: PetscOptionsBegin(PETSC_COMM_WORLD,NULL,"Thread comm - setting threading model",NULL);
506: PetscOptionsList("-threadcomm_type","Thread communicator model","PetscThreadCommSetType",PetscThreadCommList,type,ttype,256,&flg);
507: PetscOptionsEnd();
508: if (!flg) {
509: PetscStrcpy(ttype,type);
510: }
511: PetscFunctionListFind(PetscThreadCommList,ttype,&r);
512: if (!r) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Unable to find requested PetscThreadComm type %s",ttype);
513: (*r)(tcomm);
514: PetscStrcmp(NOTHREAD,tcomm->type,&tcomm->isnothread);
515: return(0);
516: }
520: /* PetscThreadCommBarrier - Apply a barrier on the thread communicator
521: associated with the MPI communicator
523: Not collective
525: Input Parameters:
526: . comm - the MPI communicator
528: Level: developer
530: Notes:
531: This routine provides an interface to put an explicit barrier between
532: successive kernel calls to ensure that the first kernel is executed
533: by all the threads before calling the next one.
535: Called by the main thread only.
537: May not be applicable to all types.
538: */
539: PetscErrorCode PetscThreadCommBarrier(MPI_Comm comm)
540: {
541: PetscErrorCode ierr;
542: PetscThreadComm tcomm=0;
545: PetscLogEventBegin(ThreadComm_Barrier,0,0,0,0);
546: PetscCommGetThreadComm(comm,&tcomm);
547: if (tcomm->ops->barrier) {
548: (*tcomm->ops->barrier)(tcomm);
549: }
550: PetscLogEventEnd(ThreadComm_Barrier,0,0,0,0);
551: return(0);
552: }
556: /*@C
557: PetscThreadCommRegister -
559: Level: advanced
560: @*/
561: PetscErrorCode PetscThreadCommRegister(const char sname[],PetscErrorCode (*function)(PetscThreadComm))
562: {
566: PetscFunctionListAdd(&PetscThreadCommList,sname,function);
567: return(0);
568: }
572: /*@C
573: PetscThreadCommGetScalars - Gets pointers to locations for storing three PetscScalars that may be passed
574: to PetscThreadCommRunKernel to ensure that the scalar values remain valid
575: even after the main thread exits the calling function.
577: Input Parameters:
578: + comm - the MPI communicator having the thread communicator
579: . val1 - pointer to store the first scalar value
580: . val2 - pointer to store the second scalar value
581: - val3 - pointer to store the third scalar value
583: Level: developer
585: Notes:
586: This is a utility function to ensure that any scalars passed to PetscThreadCommRunKernel remain
587: valid even after the main thread exits the calling function. If any scalars need to passed to
588: PetscThreadCommRunKernel then these should be first stored in the locations provided by PetscThreadCommGetScalars()
590: Pass NULL if any pointers are not needed.
592: Called by the main thread only, not from within kernels
594: Typical usage:
596: PetscScalar *valptr;
597: PetscThreadCommGetScalars(comm,&valptr,NULL,NULL);
598: *valptr = alpha; (alpha is the scalar you wish to pass in PetscThreadCommRunKernel)
600: PetscThreadCommRunKernel(comm,(PetscThreadKernel)kernel_func,3,x,y,valptr);
602: .seealso: PetscThreadCommRunKernel()
603: @*/
604: PetscErrorCode PetscThreadCommGetScalars(MPI_Comm comm,PetscScalar **val1, PetscScalar **val2, PetscScalar **val3)
605: {
606: PetscErrorCode ierr;
607: PetscThreadComm tcomm;
608: PetscThreadCommJobCtx job;
609: PetscInt job_num;
612: PetscCommGetThreadComm(comm,&tcomm);
613: job_num = PetscJobQueue->ctr%tcomm->nkernels;
614: job = &PetscJobQueue->jobs[job_num];
615: if (val1) *val1 = &job->scalars[0];
616: if (val2) *val2 = &job->scalars[1];
617: if (val3) *val3 = &job->scalars[2];
618: return(0);
619: }
623: /*@C
624: PetscThreadCommGetInts - Gets pointers to locations for storing three PetscInts that may be passed
625: to PetscThreadCommRunKernel to ensure that the scalar values remain valid
626: even after the main thread exits the calling function.
628: Input Parameters:
629: + comm - the MPI communicator having the thread communicator
630: . val1 - pointer to store the first integer value
631: . val2 - pointer to store the second integer value
632: - val3 - pointer to store the third integer value
634: Level: developer
636: Notes:
637: This is a utility function to ensure that any scalars passed to PetscThreadCommRunKernel remain
638: valid even after the main thread exits the calling function. If any scalars need to passed to
639: PetscThreadCommRunKernel then these should be first stored in the locations provided by PetscThreadCommGetInts()
641: Pass NULL if any pointers are not needed.
643: Called by the main thread only, not from within kernels
645: Typical usage:
647: PetscScalar *valptr;
648: PetscThreadCommGetScalars(comm,&valptr,NULL,NULL);
649: *valptr = alpha; (alpha is the scalar you wish to pass in PetscThreadCommRunKernel)
651: PetscThreadCommRunKernel(comm,(PetscThreadKernel)kernel_func,3,x,y,valptr);
653: .seealso: PetscThreadCommRunKernel()
654: @*/
655: PetscErrorCode PetscThreadCommGetInts(MPI_Comm comm,PetscInt **val1, PetscInt **val2, PetscInt **val3)
656: {
657: PetscErrorCode ierr;
658: PetscThreadComm tcomm;
659: PetscThreadCommJobCtx job;
660: PetscInt job_num;
663: PetscCommGetThreadComm(comm,&tcomm);
664: job_num = PetscJobQueue->ctr%tcomm->nkernels;
665: job = &PetscJobQueue->jobs[job_num];
666: if (val1) *val1 = &job->ints[0];
667: if (val2) *val2 = &job->ints[1];
668: if (val3) *val3 = &job->ints[2];
669: return(0);
670: }
674: /*@C
675: PetscThreadCommRunKernel - Runs the kernel using the thread communicator
676: associated with the MPI communicator
678: Not Collective
680: Input Parameters:
681: + comm - the MPI communicator
682: . func - the kernel (needs to be cast to PetscThreadKernel)
683: . nargs - Number of input arguments for the kernel
684: - ... - variable list of input arguments
686: Level: developer
688: Notes:
689: All input arguments to the kernel must be passed by reference, Petsc objects are
690: inherrently passed by reference so you don't need to additionally & them.
692: Example usage - PetscThreadCommRunKernel(comm,(PetscThreadKernel)kernel_func,3,x,y,z);
693: with kernel_func declared as
694: PetscErrorCode kernel_func(PetscInt thread_id,PetscInt* x, PetscScalar* y, PetscReal* z)
696: The first input argument of kernel_func, thread_id, is the thread rank. This is passed implicitly
697: by PETSc.
699: .seealso: PetscThreadCommCreate(), PetscThreadCommGNThreads()
700: @*/
701: PetscErrorCode PetscThreadCommRunKernel(MPI_Comm comm,PetscErrorCode (*func)(PetscInt,...),PetscInt nargs,...)
702: {
703: PetscErrorCode ierr;
704: va_list argptr;
705: PetscInt i;
706: PetscThreadComm tcomm=0;
707: PetscThreadCommJobCtx job;
710: if (nargs > PETSC_KERNEL_NARGS_MAX) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Requested %D input arguments for kernel, max. limit %D",nargs,PETSC_KERNEL_NARGS_MAX);
711: PetscLogEventBegin(ThreadComm_RunKernel,0,0,0,0);
712: PetscCommGetThreadComm(comm,&tcomm);
713: job = &PetscJobQueue->jobs[PetscJobQueue->ctr]; /* Get the job context from the queue to launch this job */
714: if (job->job_status[0] != THREAD_JOB_NONE) {
715: for (i=0; i<tcomm->nworkThreads; i++) {
716: while (PetscReadOnce(int,job->job_status[i]) != THREAD_JOB_COMPLETED) ;
717: }
718: }
720: job->tcomm = tcomm;
721: job->tcomm->job_ctr = PetscJobQueue->ctr;
722: job->nargs = nargs;
723: job->pfunc = (PetscThreadKernel)func;
724: va_start(argptr,nargs);
725: for (i=0; i < nargs; i++) job->args[i] = va_arg(argptr,void*);
726: va_end(argptr);
727: for (i=0; i<tcomm->nworkThreads; i++) job->job_status[i] = THREAD_JOB_POSTED;
729: PetscJobQueue->ctr = (PetscJobQueue->ctr+1)%tcomm->nkernels; /* Increment the queue ctr to point to the next available slot */
730: PetscJobQueue->kernel_ctr++;
731: if (tcomm->isnothread) {
732: PetscRunKernel(0,job->nargs,job);
733: job->job_status[0] = THREAD_JOB_COMPLETED;
734: } else {
735: (*tcomm->ops->runkernel)(tcomm,job);
736: }
737: PetscLogEventEnd(ThreadComm_RunKernel,0,0,0,0);
738: return(0);
739: }
743: /* The zero-argument kernel needs to be callable with an unwrapped PetscThreadComm after Petsc_ThreadComm_keyval has been freed. */
744: static PetscErrorCode PetscThreadCommRunKernel0_Private(PetscThreadComm tcomm,PetscErrorCode (*func)(PetscInt,...))
745: {
746: PetscErrorCode ierr;
747: PetscInt i;
748: PetscThreadCommJobCtx job;
751: if (tcomm->isnothread) {
752: (*func)(0);
753: return(0);
754: }
756: if (!PetscJobQueue) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Trying to run kernel with no job queue");
757: job = &PetscJobQueue->jobs[PetscJobQueue->ctr]; /* Get the job context from the queue to launch this job */
758: if (job->job_status[0] != THREAD_JOB_NONE) {
759: for (i=0; i<tcomm->nworkThreads; i++) {
760: while (PetscReadOnce(int,job->job_status[i]) != THREAD_JOB_COMPLETED) ;
761: }
762: }
764: job->tcomm = tcomm;
765: job->tcomm->job_ctr = PetscJobQueue->ctr;
766: job->nargs = 1;
767: job->pfunc = (PetscThreadKernel)func;
769: for (i=0; i<tcomm->nworkThreads; i++) job->job_status[i] = THREAD_JOB_POSTED;
771: PetscJobQueue->ctr = (PetscJobQueue->ctr+1)%tcomm->nkernels; /* Increment the queue ctr to point to the next available slot */
772: PetscJobQueue->kernel_ctr++;
774: (*tcomm->ops->runkernel)(tcomm,job);
775: return(0);
776: }
780: /*@C
781: PetscThreadCommRunKernel0 - PetscThreadCommRunKernel version for kernels with no
782: input arguments
784: Input Parameters:
785: + comm - the MPI communicator
786: - func - the kernel (needs to be cast to PetscThreadKernel)
788: Level: developer
790: Notes:
791: All input arguments to the kernel must be passed by reference, Petsc objects are
792: inherrently passed by reference so you don't need to additionally & them.
794: Example usage - PetscThreadCommRunKernel0(comm,(PetscThreadKernel)kernel_func);
795: with kernel_func declared as
796: PetscErrorCode kernel_func(PetscInt thread_id)
798: The first input argument of kernel_func, thread_id, is the thread rank. This is passed implicitly
799: by PETSc.
801: .seealso: PetscThreadCommCreate(), PetscThreadCommGNThreads()
802: @*/
803: PetscErrorCode PetscThreadCommRunKernel0(MPI_Comm comm,PetscErrorCode (*func)(PetscInt,...))
804: {
805: PetscErrorCode ierr;
806: PetscThreadComm tcomm=0;
809: PetscLogEventBegin(ThreadComm_RunKernel,0,0,0,0);
810: PetscCommGetThreadComm(comm,&tcomm);
811: PetscThreadCommRunKernel0_Private(tcomm,func);
812: PetscLogEventEnd(ThreadComm_RunKernel,0,0,0,0);
813: return(0);
814: }
818: /*@C
819: PetscThreadCommRunKernel1 - PetscThreadCommRunKernel version for kernels with 1
820: input argument
822: Input Parameters:
823: + comm - the MPI communicator
824: . func - the kernel (needs to be cast to PetscThreadKernel)
825: - in1 - input argument for the kernel
827: Level: developer
829: Notes:
830: All input arguments to the kernel must be passed by reference, Petsc objects are
831: inherrently passed by reference so you don't need to additionally & them.
833: Example usage - PetscThreadCommRunKernel1(comm,(PetscThreadKernel)kernel_func,x);
834: with kernel_func declared as
835: PetscErrorCode kernel_func(PetscInt thread_id,PetscInt* x)
837: The first input argument of kernel_func, thread_id, is the thread rank. This is passed implicitly
838: by PETSc.
840: .seealso: PetscThreadCommCreate(), PetscThreadCommGNThreads()
841: @*/
842: PetscErrorCode PetscThreadCommRunKernel1(MPI_Comm comm,PetscErrorCode (*func)(PetscInt,...),void *in1)
843: {
844: PetscErrorCode ierr;
845: PetscInt i;
846: PetscThreadComm tcomm=0;
847: PetscThreadCommJobCtx job;
850: PetscLogEventBegin(ThreadComm_RunKernel,0,0,0,0);
851: PetscCommGetThreadComm(comm,&tcomm);
852: if (tcomm->isnothread) {
853: (*func)(0,in1);
854: PetscLogEventEnd(ThreadComm_RunKernel,0,0,0,0);
855: return(0);
856: }
858: if (!PetscJobQueue) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Trying to run kernel with no job queue");
859: job = &PetscJobQueue->jobs[PetscJobQueue->ctr]; /* Get the job context from the queue to launch this job */
860: if (job->job_status[0] != THREAD_JOB_NONE) {
861: for (i=0; i<tcomm->nworkThreads; i++) {
862: while (PetscReadOnce(int,job->job_status[i]) != THREAD_JOB_COMPLETED) ;
863: }
864: }
866: job->tcomm = tcomm;
867: job->tcomm->job_ctr = PetscJobQueue->ctr;
868: job->nargs = 1;
869: job->pfunc = (PetscThreadKernel)func;
870: job->args[0] = in1;
872: for (i=0; i<tcomm->nworkThreads; i++) job->job_status[i] = THREAD_JOB_POSTED;
874: PetscJobQueue->ctr = (PetscJobQueue->ctr+1)%tcomm->nkernels; /* Increment the queue ctr to point to the next available slot */
875: PetscJobQueue->kernel_ctr++;
877: (*tcomm->ops->runkernel)(tcomm,job);
879: PetscLogEventEnd(ThreadComm_RunKernel,0,0,0,0);
880: return(0);
881: }
885: /*@C
886: PetscThreadCommRunKernel2 - PetscThreadCommRunKernel version for kernels with 2
887: input arguments
889: Input Parameters:
890: + comm - the MPI communicator
891: . func - the kernel (needs to be cast to PetscThreadKernel)
892: . in1 - 1st input argument for the kernel
893: - in2 - 2nd input argument for the kernel
895: Level: developer
897: Notes:
898: All input arguments to the kernel must be passed by reference, Petsc objects are
899: inherrently passed by reference so you don't need to additionally & them.
901: Example usage - PetscThreadCommRunKernel1(comm,(PetscThreadKernel)kernel_func,x);
902: with kernel_func declared as
903: PetscErrorCode kernel_func(PetscInt thread_id,PetscInt *x,PetscInt *y)
905: The first input argument of kernel_func, thread_id, is the thread rank. This is passed implicitly
906: by PETSc.
908: .seealso: PetscThreadCommCreate(), PetscThreadCommGNThreads()
909: @*/
910: PetscErrorCode PetscThreadCommRunKernel2(MPI_Comm comm,PetscErrorCode (*func)(PetscInt,...),void *in1,void *in2)
911: {
912: PetscErrorCode ierr;
913: PetscInt i;
914: PetscThreadComm tcomm=0;
915: PetscThreadCommJobCtx job;
918: PetscLogEventBegin(ThreadComm_RunKernel,0,0,0,0);
919: PetscCommGetThreadComm(comm,&tcomm);
920: if (tcomm->isnothread) {
921: (*func)(0,in1,in2);
922: PetscLogEventEnd(ThreadComm_RunKernel,0,0,0,0);
923: return(0);
924: }
926: if (!PetscJobQueue) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Trying to run kernel with no job queue");
927: job = &PetscJobQueue->jobs[PetscJobQueue->ctr]; /* Get the job context from the queue to launch this job */
928: if (job->job_status[0] != THREAD_JOB_NONE) {
929: for (i=0; i<tcomm->nworkThreads; i++) {
930: while (PetscReadOnce(int,job->job_status[i]) != THREAD_JOB_COMPLETED) ;
931: }
932: }
934: job->tcomm = tcomm;
935: job->tcomm->job_ctr = PetscJobQueue->ctr;
936: job->nargs = 2;
937: job->pfunc = (PetscThreadKernel)func;
938: job->args[0] = in1;
939: job->args[1] = in2;
941: for (i=0; i<tcomm->nworkThreads; i++) job->job_status[i] = THREAD_JOB_POSTED;
943: PetscJobQueue->ctr = (PetscJobQueue->ctr+1)%tcomm->nkernels; /* Increment the queue ctr to point to the next available slot */
944: PetscJobQueue->kernel_ctr++;
946: (*tcomm->ops->runkernel)(tcomm,job);
948: PetscLogEventEnd(ThreadComm_RunKernel,0,0,0,0);
949: return(0);
950: }
954: /*@C
955: PetscThreadCommRunKernel3 - PetscThreadCommRunKernel version for kernels with 3
956: input argument
958: Input Parameters:
959: + comm - the MPI communicator
960: . func - the kernel (needs to be cast to PetscThreadKernel)
961: . in1 - first input argument for the kernel
962: . in2 - second input argument for the kernel
963: - in3 - third input argument for the kernel
965: Level: developer
967: Notes:
968: All input arguments to the kernel must be passed by reference, Petsc objects are
969: inherrently passed by reference so you don't need to additionally & them.
971: Example usage - PetscThreadCommRunKernel1(comm,(PetscThreadKernel)kernel_func,x);
972: with kernel_func declared as
973: PetscErrorCode kernel_func(PetscInt thread_id,PetscInt* x)
975: The first input argument of kernel_func, thread_id, is the thread rank. This is passed implicitly
976: by PETSc.
978: .seealso: PetscThreadCommCreate(), PetscThreadCommGNThreads()
979: @*/
980: PetscErrorCode PetscThreadCommRunKernel3(MPI_Comm comm,PetscErrorCode (*func)(PetscInt,...),void *in1,void *in2,void *in3)
981: {
982: PetscErrorCode ierr;
983: PetscInt i;
984: PetscThreadComm tcomm=0;
985: PetscThreadCommJobCtx job;
988: PetscLogEventBegin(ThreadComm_RunKernel,0,0,0,0);
989: PetscCommGetThreadComm(comm,&tcomm);
990: if (tcomm->isnothread) {
991: (*func)(0,in1,in2,in3);
992: PetscLogEventEnd(ThreadComm_RunKernel,0,0,0,0);
993: return(0);
994: }
996: if (!PetscJobQueue) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Trying to run kernel with no job queue");
997: job = &PetscJobQueue->jobs[PetscJobQueue->ctr]; /* Get the job context from the queue to launch this job */
998: if (job->job_status[0] != THREAD_JOB_NONE) {
999: for (i=0; i<tcomm->nworkThreads; i++) {
1000: while (PetscReadOnce(int,job->job_status[i]) != THREAD_JOB_COMPLETED) ;
1001: }
1002: }
1004: job->tcomm = tcomm;
1005: job->tcomm->job_ctr = PetscJobQueue->ctr;
1006: job->nargs = 3;
1007: job->pfunc = (PetscThreadKernel)func;
1008: job->args[0] = in1;
1009: job->args[1] = in2;
1010: job->args[2] = in3;
1012: for (i=0; i<tcomm->nworkThreads; i++) job->job_status[i] = THREAD_JOB_POSTED;
1014: PetscJobQueue->ctr = (PetscJobQueue->ctr+1)%tcomm->nkernels; /* Increment the queue ctr to point to the next available slot */
1015: PetscJobQueue->kernel_ctr++;
1017: (*tcomm->ops->runkernel)(tcomm,job);
1019: PetscLogEventEnd(ThreadComm_RunKernel,0,0,0,0);
1020: return(0);
1021: }
1025: /*@C
1026: PetscThreadCommRunKernel4 - PetscThreadCommRunKernel version for kernels with 4
1027: input argument
1029: Input Parameters:
1030: + comm - the MPI communicator
1031: . func - the kernel (needs to be cast to PetscThreadKernel)
1032: . in1 - first input argument for the kernel
1033: . in2 - second input argument for the kernel
1034: . in3 - third input argument for the kernel
1035: - in4 - fourth input argument for the kernel
1037: Level: developer
1039: Notes:
1040: All input arguments to the kernel must be passed by reference, Petsc objects are
1041: inherrently passed by reference so you don't need to additionally & them.
1043: Example usage - PetscThreadCommRunKernel1(comm,(PetscThreadKernel)kernel_func,x);
1044: with kernel_func declared as
1045: PetscErrorCode kernel_func(PetscInt thread_id,PetscInt* x)
1047: The first input argument of kernel_func, thread_id, is the thread rank. This is passed implicitly
1048: by PETSc.
1050: .seealso: PetscThreadCommCreate(), PetscThreadCommGNThreads()
1051: @*/
1052: PetscErrorCode PetscThreadCommRunKernel4(MPI_Comm comm,PetscErrorCode (*func)(PetscInt,...),void *in1,void *in2,void *in3,void *in4)
1053: {
1054: PetscErrorCode ierr;
1055: PetscInt i;
1056: PetscThreadComm tcomm=0;
1057: PetscThreadCommJobCtx job;
1060: PetscLogEventBegin(ThreadComm_RunKernel,0,0,0,0);
1061: PetscCommGetThreadComm(comm,&tcomm);
1062: if (tcomm->isnothread) {
1063: (*func)(0,in1,in2,in3,in4);
1064: PetscLogEventEnd(ThreadComm_RunKernel,0,0,0,0);
1065: return(0);
1066: }
1068: if (!PetscJobQueue) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Trying to run kernel with no job queue");
1069: job = &PetscJobQueue->jobs[PetscJobQueue->ctr]; /* Get the job context from the queue to launch this job */
1070: if (job->job_status[0] != THREAD_JOB_NONE) {
1071: for (i=0; i<tcomm->nworkThreads; i++) {
1072: while (PetscReadOnce(int,job->job_status[i]) != THREAD_JOB_COMPLETED) ;
1073: }
1074: }
1076: job->tcomm = tcomm;
1077: job->tcomm->job_ctr = PetscJobQueue->ctr;
1078: job->nargs = 4;
1079: job->pfunc = (PetscThreadKernel)func;
1080: job->args[0] = in1;
1081: job->args[1] = in2;
1082: job->args[2] = in3;
1083: job->args[3] = in4;
1085: for (i=0; i<tcomm->nworkThreads; i++) job->job_status[i] = THREAD_JOB_POSTED;
1087: PetscJobQueue->ctr = (PetscJobQueue->ctr+1)%tcomm->nkernels; /* Increment the queue ctr to point to the next available slot */
1088: PetscJobQueue->kernel_ctr++;
1090: (*tcomm->ops->runkernel)(tcomm,job);
1092: PetscLogEventEnd(ThreadComm_RunKernel,0,0,0,0);
1093: return(0);
1094: }
1098: /*@C
1099: PetscThreadCommRunKernel6 - PetscThreadCommRunKernel version for kernels with 6
1100: input arguments
1102: Input Parameters:
1103: + comm - the MPI communicator
1104: . func - the kernel (needs to be cast to PetscThreadKernel)
1105: . in1 - first input argument for the kernel
1106: . in2 - second input argument for the kernel
1107: . in3 - third input argument for the kernel
1108: . in4 - fourth input argument for the kernel
1109: . in5 - fifth input argument for the kernel
1110: - in6 - sixth input argument for the kernel
1112: Level: developer
1114: Notes:
1115: All input arguments to the kernel must be passed by reference, Petsc objects are
1116: inherrently passed by reference so you don't need to additionally & them.
1118: Example usage - PetscThreadCommRunKernel1(comm,(PetscThreadKernel)kernel_func,x);
1119: with kernel_func declared as
1120: PetscErrorCode kernel_func(PetscInt thread_id,PetscInt* x)
1122: The first input argument of kernel_func, thread_id, is the thread rank. This is passed implicitly
1123: by PETSc.
1125: .seealso: PetscThreadCommCreate(), PetscThreadCommGNThreads()
1126: @*/
1127: PetscErrorCode PetscThreadCommRunKernel6(MPI_Comm comm,PetscErrorCode (*func)(PetscInt,...),void *in1,void *in2,void *in3,void *in4,void *in5,void *in6)
1128: {
1129: PetscErrorCode ierr;
1130: PetscInt i;
1131: PetscThreadComm tcomm=0;
1132: PetscThreadCommJobCtx job;
1135: PetscLogEventBegin(ThreadComm_RunKernel,0,0,0,0);
1136: PetscCommGetThreadComm(comm,&tcomm);
1137: if (tcomm->isnothread) {
1138: (*func)(0,in1,in2,in3,in4,in5,in6);
1139: PetscLogEventEnd(ThreadComm_RunKernel,0,0,0,0);
1140: return(0);
1141: }
1143: if (!PetscJobQueue) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Trying to run kernel with no job queue");
1144: job = &PetscJobQueue->jobs[PetscJobQueue->ctr]; /* Get the job context from the queue to launch this job */
1145: if (job->job_status[0] != THREAD_JOB_NONE) {
1146: for (i=0; i<tcomm->nworkThreads; i++) {
1147: while (PetscReadOnce(int,job->job_status[i]) != THREAD_JOB_COMPLETED) ;
1148: }
1149: }
1151: job->tcomm = tcomm;
1152: job->tcomm->job_ctr = PetscJobQueue->ctr;
1153: job->nargs = 6;
1154: job->pfunc = (PetscThreadKernel)func;
1155: job->args[0] = in1;
1156: job->args[1] = in2;
1157: job->args[2] = in3;
1158: job->args[3] = in4;
1159: job->args[4] = in5;
1160: job->args[5] = in6;
1163: for (i=0; i<tcomm->nworkThreads; i++) job->job_status[i] = THREAD_JOB_POSTED;
1165: PetscJobQueue->ctr = (PetscJobQueue->ctr+1)%tcomm->nkernels; /* Increment the queue ctr to point to the next available slot */
1166: PetscJobQueue->kernel_ctr++;
1168: (*tcomm->ops->runkernel)(tcomm,job);
1170: PetscLogEventEnd(ThreadComm_RunKernel,0,0,0,0);
1171: return(0);
1172: }
1174: /*
1175: Detaches the thread communicator from the MPI communicator if it exists
1176: */
1179: PetscErrorCode PetscThreadCommDetach(MPI_Comm comm)
1180: {
1182: PetscMPIInt flg;
1183: void *ptr;
1186: MPI_Attr_get(comm,Petsc_ThreadComm_keyval,&ptr,&flg);
1187: if (flg) {
1188: MPI_Attr_delete(comm,Petsc_ThreadComm_keyval);
1189: }
1190: return(0);
1191: }
1193: /*
1194: This routine attaches the thread communicator to the MPI communicator if it does not
1195: exist already.
1196: */
1199: PetscErrorCode PetscThreadCommAttach(MPI_Comm comm,PetscThreadComm tcomm)
1200: {
1202: PetscMPIInt flg;
1203: void *ptr;
1206: MPI_Attr_get(comm,Petsc_ThreadComm_keyval,&ptr,&flg);
1207: if (!flg) {
1208: tcomm->refct++;
1209: MPI_Attr_put(comm,Petsc_ThreadComm_keyval,tcomm);
1210: }
1211: return(0);
1212: }
1216: /*
1217: PetscThreadCommWorldInitialize - Initializes the global thread communicator object
1219: PetscThreadCommWorldInitialize() defaults to using the nonthreaded communicator.
1220: */
1221: PetscErrorCode PetscThreadCommWorldInitialize(void)
1222: {
1223: PetscErrorCode ierr;
1224: PetscThreadComm tcomm;
1225: PetscInt i,j;
1228: PetscThreadCommCreate(&PETSC_THREAD_COMM_WORLD);
1229: tcomm = PETSC_THREAD_COMM_WORLD;
1230: PetscThreadCommSetNThreads(tcomm,PETSC_DECIDE);
1231: PetscThreadCommSetAffinities(tcomm,NULL);
1232: PetscNew(struct _p_PetscThreadCommJobQueue,&PetscJobQueue);
1234: tcomm->nkernels = 16;
1236: PetscOptionsBegin(PETSC_COMM_WORLD,NULL,"Thread comm - setting number of kernels",NULL);
1237: PetscOptionsInt("-threadcomm_nkernels","number of kernels that can be launched simultaneously","",16,&tcomm->nkernels,NULL);
1238: PetscOptionsEnd();
1240: PetscMalloc(tcomm->nkernels*sizeof(struct _p_PetscThreadCommJobCtx),&PetscJobQueue->jobs);
1241: PetscMalloc(tcomm->nworkThreads*tcomm->nkernels*sizeof(PetscInt),&PetscJobQueue->jobs[0].job_status);
1242: for (i=0; i<tcomm->nkernels; i++) {
1243: PetscJobQueue->jobs[i].job_status = PetscJobQueue->jobs[0].job_status + i*tcomm->nworkThreads;
1244: for (j=0; j<tcomm->nworkThreads; j++) PetscJobQueue->jobs[i].job_status[j] = THREAD_JOB_NONE;
1245: }
1246: PetscJobQueue->ctr = 0;
1247: PetscJobQueue->kernel_ctr = 0;
1248: tcomm->job_ctr = 0;
1250: PetscThreadCommSetType(tcomm,NOTHREAD);
1251: PetscThreadCommReductionCreate(tcomm,&tcomm->red);
1252: PetscThreadCommStackCreate();
1253: tcomm->refct++;
1254: return(0);
1255: }
1259: /*
1260: PetscThreadCommGetOwnershipRanges - Given the global size of an array, computes the local sizes and sets
1261: the starting array indices
1263: Input Parameters:
1264: + comm - the MPI communicator which holds the thread communicator
1265: - N - the global size of the array
1267: Output Parameters:
1268: . trstarts - The starting array indices for each thread. the size of trstarts is nthreads+1
1270: Notes:
1271: trstarts is malloced in this routine
1272: */
1273: PetscErrorCode PetscThreadCommGetOwnershipRanges(MPI_Comm comm,PetscInt N,PetscInt *trstarts[])
1274: {
1275: PetscErrorCode ierr;
1276: PetscInt Q,R;
1277: PetscBool S;
1278: PetscThreadComm tcomm = NULL;
1279: PetscInt *trstarts_out,nloc,i;
1282: PetscCommGetThreadComm(comm,&tcomm);
1284: PetscMalloc((tcomm->nworkThreads+1)*sizeof(PetscInt),&trstarts_out);
1285: trstarts_out[0] = 0;
1286: Q = N/tcomm->nworkThreads;
1287: R = N - Q*tcomm->nworkThreads;
1288: for (i=0; i<tcomm->nworkThreads; i++) {
1289: S = (PetscBool)(i < R);
1290: nloc = S ? Q+1 : Q;
1291: trstarts_out[i+1] = trstarts_out[i] + nloc;
1292: }
1294: *trstarts = trstarts_out;
1295: return(0);
1296: }
1300: /*
1301: PetscThreadCommGetRank - Gets the rank of the calling thread
1303: Input Parameters:
1304: . tcomm - the thread communicator
1306: Output Parameters:
1307: . trank - The rank of the calling thread
1309: */
1310: PetscErrorCode PetscThreadCommGetRank(PetscThreadComm tcomm,PetscInt *trank)
1311: {
1313: PetscInt rank = 0;
1316: if (tcomm->ops->getrank) {
1317: (*tcomm->ops->getrank)(&rank);
1318: }
1319: *trank = rank;
1320: return(0);
1321: }