Actual source code: threadcommred.c

petsc-3.3-p7 2013-05-11
  1: /* This file contains code for threaded reductions */
  2: #include <petsc-private/threadcommimpl.h>      /*I "petscthreadcomm.h" I*/

  6: /*@C
  7:    PetscThreadReductionBegin - Initiates a threaded reduction and returns a
  8:                                reduction object to be passed to PetscThreadCommRunKernel

 10:    Input Parameters:
 11: +  comm - the MPI comm
 12: .  op   - the reduction operation
 13: .  type - the data type for reduction
 14: .  red  - the reduction context

 16:    Level: developer

 18:    Notes:
 19:    See include/petscthreadcomm.h for the available reduction operations

 21:    To be called from the main thread before calling PetscThreadCommRunKernel

 23: .seealso: PetscThreadCommReductionKernelBegin(), PetscThreadCommReductionKernelEnd(), PetscThreadCommReductionEnd()
 24: @*/
 25: PetscErrorCode PetscThreadReductionBegin(MPI_Comm comm,PetscThreadCommReductionOp op, PetscDataType type,PetscThreadCommRedCtx *red)
 26: {
 28:   PetscThreadComm tcomm;

 31:   PetscCommGetThreadComm(comm,&tcomm);
 32:   tcomm->red->op = op;
 33:   tcomm->red->type = type;
 34:   tcomm->red->red_status = THREADCOMM_REDUCTION_NEW;
 35:   tcomm->red->tcomm = tcomm;
 36:   *red = tcomm->red;
 37:   return(0);
 38: }
 39: 
 42: /* 
 43:    PetscThreadCommReductionDestroy - Destroys the reduction context

 45:    Input Parameters:
 46: .  red - the reduction context

 48: */
 49: PetscErrorCode PetscThreadCommReductionDestroy(PetscThreadCommRedCtx red)
 50: {

 54:   if(!red) return(0);

 56:   PetscFree(red->thread_status);
 57:   PetscFree(red->local_red);
 58:   PetscFree(red);
 59:   return(0);
 60: }

 64: /*
 65:    PetscThreadReductionKernelBegin - Begins a threaded reduction operation

 67:    Input Parameters:
 68: +  trank   - Rank of the calling thread
 69: .  red     - the reduction context 
 70: .  lred    - local contribution from the thread

 72:    Level: developer

 74:    Notes:
 75:    This routine posts the local reduction of each thread in the reduction context and
 76:    updates its reduction status. 

 78:    Must call PetscThreadReductionBegin before launching the kernel.
 79: */
 80: PetscErrorCode PetscThreadReductionKernelBegin(PetscInt trank,PetscThreadCommRedCtx red,void* lred)
 81: {
 82:   if(PetscReadOnce(int,red->red_status) != THREADCOMM_REDUCTION_NEW) {
 83:     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Did not call PetscThreadReductionBegin() before calling PetscThreadCommRunKernel()");
 84:   }

 86:   switch(red->type) {
 87:   case PETSC_INT:
 88:     ((PetscInt*)red->local_red)[trank] = *(PetscInt*)lred;
 89:     break;
 90: #if defined(PETSC_USE_COMPLEX)
 91:   case PETSC_REAL:
 92:     ((PetscReal*)red->local_red)[trank] = *(PetscReal*)lred;
 93:     break;
 94: #endif
 95:   case PETSC_SCALAR:
 96:     ((PetscScalar*)red->local_red)[trank] = *(PetscScalar*)lred;
 97:     break;
 98:   default:
 99:     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown datatype provided for kernel reduction");
100:     break;
101:   }
102:   red->thread_status[trank] = THREADCOMM_THREAD_POSTED_LOCALRED;
103:   return 0;
104: }

106: /* Completes the given reduction */
109: PetscErrorCode PetscThreadReductionEnd_Private(PetscThreadCommRedCtx red,void * outdata)
110: {
111:   /* Check whether all threads have posted their contributions */
112:   PetscBool wait=PETSC_TRUE;
113:   PetscInt  i;
114:   while(wait) {
115:     for(i=0;i < red->tcomm->nworkThreads;i++) {
116:       if(PetscReadOnce(int,red->thread_status[i]) != THREADCOMM_THREAD_POSTED_LOCALRED) {
117:         wait = PETSC_TRUE;
118:         break;
119:       }
120:       wait = PETSC_FALSE;
121:     }
122:   }
123: 
124:   /* Apply the reduction operation */
125:   switch(red->op) {
126:   case THREADCOMM_SUM:
127:     if(red->type == PETSC_REAL) {
128:       PetscReal red_sum=*(PetscReal*)outdata;
129:       for(i=0; i < red->tcomm->nworkThreads;i++) {
130:         red_sum += ((PetscReal*)red->local_red)[i];
131:       }
132:       PetscMemcpy(outdata,&red_sum,sizeof(PetscReal));
133:       break;
134:     }
135:     if(red->type == PETSC_SCALAR) {
136:       PetscScalar red_sum=*(PetscScalar*)outdata;
137:       for(i=0; i < red->tcomm->nworkThreads;i++) {
138:         red_sum += ((PetscScalar*)red->local_red)[i];
139:       }
140:       PetscMemcpy(outdata,&red_sum,sizeof(PetscScalar));
141:       break;
142:     }
143:     if(red->type == PETSC_INT) {
144:       PetscInt red_sum=*(PetscInt*)outdata;
145:       for(i=0; i < red->tcomm->nworkThreads;i++) {
146:         red_sum += ((PetscInt*)red->local_red)[i];
147:       }
148:       PetscMemcpy(outdata,&red_sum,sizeof(PetscInt));
149:       break;
150:     }
151:     break;
152:   case THREADCOMM_PROD:
153:     if(red->type == PETSC_REAL) {
154:       PetscReal red_prod=*(PetscReal*)outdata;
155:       for(i=0; i < red->tcomm->nworkThreads;i++) {
156:         red_prod *= ((PetscReal*)red->local_red)[i];
157:       }
158:       PetscMemcpy(outdata,&red_prod,sizeof(PetscReal));
159:       break;
160:     }
161:     if(red->type == PETSC_SCALAR) {
162:       PetscScalar red_prod=*(PetscScalar*)outdata;
163:       for(i=0; i < red->tcomm->nworkThreads;i++) {
164:         red_prod *= ((PetscScalar*)red->local_red)[i];
165:       }
166:       PetscMemcpy(outdata,&red_prod,sizeof(PetscScalar));
167:       break;
168:     }
169:     if(red->type == PETSC_INT) {
170:       PetscInt red_prod=*(PetscInt*)outdata;
171:       for(i=0; i < red->tcomm->nworkThreads;i++) {
172:         red_prod *= ((PetscInt*)red->local_red)[i];
173:       }
174:       PetscMemcpy(outdata,&red_prod,sizeof(PetscInt));
175:       break;
176:     }
177:     break;
178:   default:
179:     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Undefined thread reduction operation");
180:     break;
181:   }
182:   return 0;
183: }
184: 
187: /*@C 
188:    PetscThreadReductionEnd - Completes the given reduction

190:    Input Parameters:
191: +  red     - the reduction context
192: .  outdata - the reduction result

194:    Level: developer

196:    Notes:
197:    To be called by the main thread only
198: @*/
199: PetscErrorCode PetscThreadReductionEnd(PetscThreadCommRedCtx red,void *outdata)
200: {
203:   PetscThreadReductionEnd_Private(red,outdata);
204:   red->red_status = THREADCOMM_REDUCTION_COMPLETE;
205:   return(0);
206: }

210: /*
211:    PetscThreadReductionKernelBegin - Finishes a reduction operation

213:    Input Parameters:
214: +  trank   - Rank of the calling thread
215: .  red     - the reduction context
216: -  outdata - the reduction result 

218:    Level: developer

220:    Notes: This should be called only from kernels only if the reduction needs to 
221:    be completed while in the kernel for some future operation.

223: */
224: PetscErrorCode PetscThreadReductionKernelEnd(PetscInt trank,PetscThreadCommRedCtx red,void *outdata)
225: {

227:   if(PetscReadOnce(int,red->tcomm->leader) == trank) {
228:     PetscThreadReductionEnd_Private(red,outdata);
229:     red->red_status = THREADCOMM_REDUCTION_COMPLETE;
230:   }

232:   /* Wait till the leader performs the reduction so that the other threads
233:      can also see the reduction result */
234:   while(PetscReadOnce(int,red->red_status) != THREADCOMM_REDUCTION_COMPLETE)
235:     ;
236:   red->thread_status[trank] = THREADCOMM_THREAD_WAITING_FOR_NEWRED;
237:   return 0;
238: }

242: /*
243:    PetscThreadCommReductionCreate - Allocates the reduction context and
244:                                  initializes it

246:    Input Parameters:
247: +  tcomm - the thread communicator
248: .  red   - the reduction context

250: */
251: PetscErrorCode PetscThreadCommReductionCreate(PetscThreadComm tcomm,PetscThreadCommRedCtx *red)
252: {
253:   PetscErrorCode        ierr;
254:   PetscThreadCommRedCtx redout;
255:   PetscInt              i;
256: 
258:   PetscNew(struct _p_PetscThreadCommRedCtx,&redout);
259:   PetscMalloc(tcomm->nworkThreads*sizeof(PetscInt),&redout->thread_status);
260:   PetscMalloc(tcomm->nworkThreads*sizeof(PetscScalar),&redout->local_red);
261:   redout->nworkThreads = tcomm->nworkThreads;
262:   redout->red_status = THREADCOMM_REDUCTION_NONE;
263:   for(i=0;i<redout->nworkThreads;i++) redout->thread_status[i] = THREADCOMM_THREAD_WAITING_FOR_NEWRED;
264:   *red = redout;
265:   return(0);
266: }