Actual source code: threadcommred.c
petsc-3.3-p7 2013-05-11
1: /* This file contains code for threaded reductions */
2: #include <petsc-private/threadcommimpl.h> /*I "petscthreadcomm.h" I*/
6: /*@C
7: PetscThreadReductionBegin - Initiates a threaded reduction and returns a
8: reduction object to be passed to PetscThreadCommRunKernel
10: Input Parameters:
11: + comm - the MPI comm
12: . op - the reduction operation
13: . type - the data type for reduction
14: . red - the reduction context
16: Level: developer
18: Notes:
19: See include/petscthreadcomm.h for the available reduction operations
21: To be called from the main thread before calling PetscThreadCommRunKernel
23: .seealso: PetscThreadCommReductionKernelBegin(), PetscThreadCommReductionKernelEnd(), PetscThreadCommReductionEnd()
24: @*/
25: PetscErrorCode PetscThreadReductionBegin(MPI_Comm comm,PetscThreadCommReductionOp op, PetscDataType type,PetscThreadCommRedCtx *red)
26: {
28: PetscThreadComm tcomm;
31: PetscCommGetThreadComm(comm,&tcomm);
32: tcomm->red->op = op;
33: tcomm->red->type = type;
34: tcomm->red->red_status = THREADCOMM_REDUCTION_NEW;
35: tcomm->red->tcomm = tcomm;
36: *red = tcomm->red;
37: return(0);
38: }
39:
42: /*
43: PetscThreadCommReductionDestroy - Destroys the reduction context
45: Input Parameters:
46: . red - the reduction context
48: */
49: PetscErrorCode PetscThreadCommReductionDestroy(PetscThreadCommRedCtx red)
50: {
54: if(!red) return(0);
56: PetscFree(red->thread_status);
57: PetscFree(red->local_red);
58: PetscFree(red);
59: return(0);
60: }
64: /*
65: PetscThreadReductionKernelBegin - Begins a threaded reduction operation
67: Input Parameters:
68: + trank - Rank of the calling thread
69: . red - the reduction context
70: . lred - local contribution from the thread
72: Level: developer
74: Notes:
75: This routine posts the local reduction of each thread in the reduction context and
76: updates its reduction status.
78: Must call PetscThreadReductionBegin before launching the kernel.
79: */
80: PetscErrorCode PetscThreadReductionKernelBegin(PetscInt trank,PetscThreadCommRedCtx red,void* lred)
81: {
82: if(PetscReadOnce(int,red->red_status) != THREADCOMM_REDUCTION_NEW) {
83: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Did not call PetscThreadReductionBegin() before calling PetscThreadCommRunKernel()");
84: }
86: switch(red->type) {
87: case PETSC_INT:
88: ((PetscInt*)red->local_red)[trank] = *(PetscInt*)lred;
89: break;
90: #if defined(PETSC_USE_COMPLEX)
91: case PETSC_REAL:
92: ((PetscReal*)red->local_red)[trank] = *(PetscReal*)lred;
93: break;
94: #endif
95: case PETSC_SCALAR:
96: ((PetscScalar*)red->local_red)[trank] = *(PetscScalar*)lred;
97: break;
98: default:
99: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown datatype provided for kernel reduction");
100: break;
101: }
102: red->thread_status[trank] = THREADCOMM_THREAD_POSTED_LOCALRED;
103: return 0;
104: }
106: /* Completes the given reduction */
109: PetscErrorCode PetscThreadReductionEnd_Private(PetscThreadCommRedCtx red,void * outdata)
110: {
111: /* Check whether all threads have posted their contributions */
112: PetscBool wait=PETSC_TRUE;
113: PetscInt i;
114: while(wait) {
115: for(i=0;i < red->tcomm->nworkThreads;i++) {
116: if(PetscReadOnce(int,red->thread_status[i]) != THREADCOMM_THREAD_POSTED_LOCALRED) {
117: wait = PETSC_TRUE;
118: break;
119: }
120: wait = PETSC_FALSE;
121: }
122: }
123:
124: /* Apply the reduction operation */
125: switch(red->op) {
126: case THREADCOMM_SUM:
127: if(red->type == PETSC_REAL) {
128: PetscReal red_sum=*(PetscReal*)outdata;
129: for(i=0; i < red->tcomm->nworkThreads;i++) {
130: red_sum += ((PetscReal*)red->local_red)[i];
131: }
132: PetscMemcpy(outdata,&red_sum,sizeof(PetscReal));
133: break;
134: }
135: if(red->type == PETSC_SCALAR) {
136: PetscScalar red_sum=*(PetscScalar*)outdata;
137: for(i=0; i < red->tcomm->nworkThreads;i++) {
138: red_sum += ((PetscScalar*)red->local_red)[i];
139: }
140: PetscMemcpy(outdata,&red_sum,sizeof(PetscScalar));
141: break;
142: }
143: if(red->type == PETSC_INT) {
144: PetscInt red_sum=*(PetscInt*)outdata;
145: for(i=0; i < red->tcomm->nworkThreads;i++) {
146: red_sum += ((PetscInt*)red->local_red)[i];
147: }
148: PetscMemcpy(outdata,&red_sum,sizeof(PetscInt));
149: break;
150: }
151: break;
152: case THREADCOMM_PROD:
153: if(red->type == PETSC_REAL) {
154: PetscReal red_prod=*(PetscReal*)outdata;
155: for(i=0; i < red->tcomm->nworkThreads;i++) {
156: red_prod *= ((PetscReal*)red->local_red)[i];
157: }
158: PetscMemcpy(outdata,&red_prod,sizeof(PetscReal));
159: break;
160: }
161: if(red->type == PETSC_SCALAR) {
162: PetscScalar red_prod=*(PetscScalar*)outdata;
163: for(i=0; i < red->tcomm->nworkThreads;i++) {
164: red_prod *= ((PetscScalar*)red->local_red)[i];
165: }
166: PetscMemcpy(outdata,&red_prod,sizeof(PetscScalar));
167: break;
168: }
169: if(red->type == PETSC_INT) {
170: PetscInt red_prod=*(PetscInt*)outdata;
171: for(i=0; i < red->tcomm->nworkThreads;i++) {
172: red_prod *= ((PetscInt*)red->local_red)[i];
173: }
174: PetscMemcpy(outdata,&red_prod,sizeof(PetscInt));
175: break;
176: }
177: break;
178: default:
179: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Undefined thread reduction operation");
180: break;
181: }
182: return 0;
183: }
184:
187: /*@C
188: PetscThreadReductionEnd - Completes the given reduction
190: Input Parameters:
191: + red - the reduction context
192: . outdata - the reduction result
194: Level: developer
196: Notes:
197: To be called by the main thread only
198: @*/
199: PetscErrorCode PetscThreadReductionEnd(PetscThreadCommRedCtx red,void *outdata)
200: {
203: PetscThreadReductionEnd_Private(red,outdata);
204: red->red_status = THREADCOMM_REDUCTION_COMPLETE;
205: return(0);
206: }
210: /*
211: PetscThreadReductionKernelBegin - Finishes a reduction operation
213: Input Parameters:
214: + trank - Rank of the calling thread
215: . red - the reduction context
216: - outdata - the reduction result
218: Level: developer
220: Notes: This should be called only from kernels only if the reduction needs to
221: be completed while in the kernel for some future operation.
223: */
224: PetscErrorCode PetscThreadReductionKernelEnd(PetscInt trank,PetscThreadCommRedCtx red,void *outdata)
225: {
227: if(PetscReadOnce(int,red->tcomm->leader) == trank) {
228: PetscThreadReductionEnd_Private(red,outdata);
229: red->red_status = THREADCOMM_REDUCTION_COMPLETE;
230: }
232: /* Wait till the leader performs the reduction so that the other threads
233: can also see the reduction result */
234: while(PetscReadOnce(int,red->red_status) != THREADCOMM_REDUCTION_COMPLETE)
235: ;
236: red->thread_status[trank] = THREADCOMM_THREAD_WAITING_FOR_NEWRED;
237: return 0;
238: }
242: /*
243: PetscThreadCommReductionCreate - Allocates the reduction context and
244: initializes it
246: Input Parameters:
247: + tcomm - the thread communicator
248: . red - the reduction context
250: */
251: PetscErrorCode PetscThreadCommReductionCreate(PetscThreadComm tcomm,PetscThreadCommRedCtx *red)
252: {
253: PetscErrorCode ierr;
254: PetscThreadCommRedCtx redout;
255: PetscInt i;
256:
258: PetscNew(struct _p_PetscThreadCommRedCtx,&redout);
259: PetscMalloc(tcomm->nworkThreads*sizeof(PetscInt),&redout->thread_status);
260: PetscMalloc(tcomm->nworkThreads*sizeof(PetscScalar),&redout->local_red);
261: redout->nworkThreads = tcomm->nworkThreads;
262: redout->red_status = THREADCOMM_REDUCTION_NONE;
263: for(i=0;i<redout->nworkThreads;i++) redout->thread_status[i] = THREADCOMM_THREAD_WAITING_FOR_NEWRED;
264: *red = redout;
265: return(0);
266: }