Actual source code: comb.c


  2: /*
  3:       Split phase global vector reductions with support for combining the
  4:    communication portion of several operations. Using MPI-1.1 support only

  6:       The idea for this and much of the initial code is contributed by
  7:    Victor Eijkhout.

  9:        Usage:
 10:              VecDotBegin(Vec,Vec,PetscScalar *);
 11:              VecNormBegin(Vec,NormType,PetscReal *);
 12:              ....
 13:              VecDotEnd(Vec,Vec,PetscScalar *);
 14:              VecNormEnd(Vec,NormType,PetscReal *);

 16:        Limitations:
 17:          - The order of the xxxEnd() functions MUST be in the same order
 18:            as the xxxBegin(). There is extensive error checking to try to
 19:            insure that the user calls the routines in the correct order
 20: */

 22: #include <petsc/private/vecimpl.h>

 24: static PetscErrorCode MPIPetsc_Iallreduce(void *sendbuf,void *recvbuf,PetscMPIInt count,MPI_Datatype datatype,MPI_Op op,MPI_Comm comm,MPI_Request *request)
 25: {

 28: #if defined(PETSC_HAVE_MPI_NONBLOCKING_COLLECTIVES)
 29:   MPI_Iallreduce(sendbuf,recvbuf,count,datatype,op,comm,request);
 30: #else
 31:   MPIU_Allreduce(sendbuf,recvbuf,count,datatype,op,comm);
 32:   *request = MPI_REQUEST_NULL;
 33: #endif
 34:   return 0;
 35: }

 37: static PetscErrorCode PetscSplitReductionApply(PetscSplitReduction*);

 39: /*
 40:    PetscSplitReductionCreate - Creates a data structure to contain the queued information.
 41: */
 42: static PetscErrorCode  PetscSplitReductionCreate(MPI_Comm comm,PetscSplitReduction **sr)
 43: {
 44:   PetscNew(sr);
 45:   (*sr)->numopsbegin = 0;
 46:   (*sr)->numopsend   = 0;
 47:   (*sr)->state       = STATE_BEGIN;
 48: #define MAXOPS 32
 49:   (*sr)->maxops      = MAXOPS;
 50:   PetscMalloc6(MAXOPS,&(*sr)->lvalues,MAXOPS,&(*sr)->gvalues,MAXOPS,&(*sr)->invecs,MAXOPS,&(*sr)->reducetype,MAXOPS,&(*sr)->lvalues_mix,MAXOPS,&(*sr)->gvalues_mix);
 51: #undef MAXOPS
 52:   (*sr)->comm        = comm;
 53:   (*sr)->request     = MPI_REQUEST_NULL;
 54:   (*sr)->mix         = PETSC_FALSE;
 55:   (*sr)->async       = PETSC_FALSE;
 56: #if defined(PETSC_HAVE_MPI_NONBLOCKING_COLLECTIVES)
 57:   (*sr)->async = PETSC_TRUE;    /* Enable by default */
 58: #endif
 59:   /* always check for option; so that tests that run on systems without support don't warn about unhandled options */
 60:   PetscOptionsGetBool(NULL,NULL,"-splitreduction_async",&(*sr)->async,NULL);
 61:   return 0;
 62: }

 64: /*
 65:        This function is the MPI reduction operation used when there is
 66:    a combination of sums and max in the reduction. The call below to
 67:    MPI_Op_create() converts the function PetscSplitReduction_Local() to the
 68:    MPI operator PetscSplitReduction_Op.
 69: */
 70: MPI_Op PetscSplitReduction_Op = 0;

 72: PETSC_EXTERN void MPIAPI PetscSplitReduction_Local(void *in,void *out,PetscMPIInt *cnt,MPI_Datatype *datatype)
 73: {
 74:   struct PetscScalarInt { PetscScalar v; PetscInt i; };
 75:   struct PetscScalarInt *xin = (struct PetscScalarInt*)in;
 76:   struct PetscScalarInt *xout = (struct PetscScalarInt*)out;
 77:   PetscInt              i,count = (PetscInt)*cnt;

 79:   if (*datatype != MPIU_SCALAR_INT) {
 80:     (*PetscErrorPrintf)("Can only handle MPIU_SCALAR_INT data types");
 81:     PETSCABORT(MPI_COMM_SELF,PETSC_ERR_ARG_WRONG);
 82:   }
 83:   for (i=0; i<count; i++) {
 84:     if      (xin[i].i == PETSC_SR_REDUCE_SUM) xout[i].v += xin[i].v;
 85:     else if (xin[i].i == PETSC_SR_REDUCE_MAX) xout[i].v = PetscMax(PetscRealPart(xout[i].v),PetscRealPart(xin[i].v));
 86:     else if (xin[i].i == PETSC_SR_REDUCE_MIN) xout[i].v = PetscMin(PetscRealPart(xout[i].v),PetscRealPart(xin[i].v));
 87:     else {
 88:       (*PetscErrorPrintf)("Reduction type input is not PETSC_SR_REDUCE_SUM, PETSC_SR_REDUCE_MAX, or PETSC_SR_REDUCE_MIN");
 89:       PETSCABORT(MPI_COMM_SELF,PETSC_ERR_ARG_WRONG);
 90:     }
 91:   }
 92:   return;
 93: }

 95: /*@
 96:    PetscCommSplitReductionBegin - Begin an asynchronous split-mode reduction

 98:    Collective but not synchronizing

100:    Input Parameter:
101:    comm - communicator on which split reduction has been queued

103:    Level: advanced

105:    Note:
106:    Calling this function is optional when using split-mode reduction. On supporting hardware, calling this after all
107:    VecXxxBegin() allows the reduction to make asynchronous progress before the result is needed (in VecXxxEnd()).

109: .seealso: VecNormBegin(), VecNormEnd(), VecDotBegin(), VecDotEnd(), VecTDotBegin(), VecTDotEnd(), VecMDotBegin(), VecMDotEnd(), VecMTDotBegin(), VecMTDotEnd()
110: @*/
111: PetscErrorCode PetscCommSplitReductionBegin(MPI_Comm comm)
112: {
113:   PetscSplitReduction *sr;

115:   PetscSplitReductionGet(comm,&sr);
117:   if (sr->async) {              /* Bad reuse, setup code copied from PetscSplitReductionApply(). */
118:     PetscInt    i,numops = sr->numopsbegin,*reducetype = sr->reducetype;
119:     PetscScalar *lvalues = sr->lvalues,*gvalues = sr->gvalues;
120:     PetscInt    sum_flg = 0,max_flg = 0, min_flg = 0;
121:     MPI_Comm    comm = sr->comm;
122:     PetscMPIInt size,cmul = sizeof(PetscScalar)/sizeof(PetscReal);

124:     PetscLogEventBegin(VEC_ReduceBegin,0,0,0,0);
125:     MPI_Comm_size(sr->comm,&size);
126:     if (size == 1) {
127:       PetscArraycpy(gvalues,lvalues,numops);
128:     } else {
129:       /* determine if all reductions are sum, max, or min */
130:       for (i=0; i<numops; i++) {
131:         if      (reducetype[i] == PETSC_SR_REDUCE_MAX) max_flg = 1;
132:         else if (reducetype[i] == PETSC_SR_REDUCE_SUM) sum_flg = 1;
133:         else if (reducetype[i] == PETSC_SR_REDUCE_MIN) min_flg = 1;
134:         else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in PetscSplitReduction() data structure, probably memory corruption");
135:       }
137:       if (sum_flg + max_flg + min_flg > 1) {
138:         sr->mix = PETSC_TRUE;
139:         for (i=0; i<numops; i++) { sr->lvalues_mix[i].v = lvalues[i]; sr->lvalues_mix[i].i = reducetype[i]; }
140:         MPIPetsc_Iallreduce(sr->lvalues_mix,sr->gvalues_mix,numops,MPIU_SCALAR_INT,PetscSplitReduction_Op,comm,&sr->request);
141:       } else if (max_flg) {   /* Compute max of real and imag parts separately, presumably only the real part is used */
142:         MPIPetsc_Iallreduce((PetscReal*)lvalues,(PetscReal*)gvalues,cmul*numops,MPIU_REAL,MPIU_MAX,comm,&sr->request);
143:       } else if (min_flg) {
144:         MPIPetsc_Iallreduce((PetscReal*)lvalues,(PetscReal*)gvalues,cmul*numops,MPIU_REAL,MPIU_MIN,comm,&sr->request);
145:       } else {
146:         MPIPetsc_Iallreduce(lvalues,gvalues,numops,MPIU_SCALAR,MPIU_SUM,comm,&sr->request);
147:       }
148:     }
149:     sr->state     = STATE_PENDING;
150:     sr->numopsend = 0;
151:     PetscLogEventEnd(VEC_ReduceBegin,0,0,0,0);
152:   } else {
153:     PetscSplitReductionApply(sr);
154:   }
155:   return 0;
156: }

158: PetscErrorCode PetscSplitReductionEnd(PetscSplitReduction *sr)
159: {
160:   switch (sr->state) {
161:   case STATE_BEGIN: /* We are doing synchronous communication and this is the first call to VecXxxEnd() so do the communication */
162:     PetscSplitReductionApply(sr);
163:     break;
164:   case STATE_PENDING:
165:     /* We are doing asynchronous-mode communication and this is the first VecXxxEnd() so wait for comm to complete */
166:     PetscLogEventBegin(VEC_ReduceEnd,0,0,0,0);
167:     if (sr->request != MPI_REQUEST_NULL) {
168:       MPI_Wait(&sr->request,MPI_STATUS_IGNORE);
169:     }
170:     sr->state = STATE_END;
171:     if (sr->mix) {
172:       PetscInt i;
173:       for (i=0; i<sr->numopsbegin; i++) { sr->gvalues[i] = sr->gvalues_mix[i].v; }
174:       sr->mix = PETSC_FALSE;
175:     }
176:     PetscLogEventEnd(VEC_ReduceEnd,0,0,0,0);
177:     break;
178:   default: break;            /* everything is already done */
179:   }
180:   return 0;
181: }

183: /*
184:    PetscSplitReductionApply - Actually do the communication required for a split phase reduction
185: */
186: static PetscErrorCode PetscSplitReductionApply(PetscSplitReduction *sr)
187: {
188:   PetscInt       i,numops = sr->numopsbegin,*reducetype = sr->reducetype;
189:   PetscScalar    *lvalues = sr->lvalues,*gvalues = sr->gvalues;
190:   PetscInt       sum_flg  = 0,max_flg = 0, min_flg = 0;
191:   MPI_Comm       comm     = sr->comm;
192:   PetscMPIInt    size,cmul = sizeof(PetscScalar)/sizeof(PetscReal);

195:   PetscLogEventBegin(VEC_ReduceCommunication,0,0,0,0);
196:   MPI_Comm_size(sr->comm,&size);
197:   if (size == 1) {
198:     PetscArraycpy(gvalues,lvalues,numops);
199:   } else {
200:     /* determine if all reductions are sum, max, or min */
201:     for (i=0; i<numops; i++) {
202:       if      (reducetype[i] == PETSC_SR_REDUCE_MAX) max_flg = 1;
203:       else if (reducetype[i] == PETSC_SR_REDUCE_SUM) sum_flg = 1;
204:       else if (reducetype[i] == PETSC_SR_REDUCE_MIN) min_flg = 1;
205:       else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in PetscSplitReduction() data structure, probably memory corruption");
206:     }
207:     if (sum_flg + max_flg + min_flg > 1) {
209:       for (i=0; i<numops; i++) { sr->lvalues_mix[i].v = lvalues[i]; sr->lvalues_mix[i].i = reducetype[i]; }
210:       MPIU_Allreduce(sr->lvalues_mix,sr->gvalues_mix,numops,MPIU_SCALAR_INT,PetscSplitReduction_Op,comm);
211:       for (i=0; i<numops; i++) { sr->gvalues[i] = sr->gvalues_mix[i].v; }
212:     } else if (max_flg) {     /* Compute max of real and imag parts separately, presumably only the real part is used */
213:       MPIU_Allreduce((PetscReal*)lvalues,(PetscReal*)gvalues,cmul*numops,MPIU_REAL,MPIU_MAX,comm);
214:     } else if (min_flg) {
215:       MPIU_Allreduce((PetscReal*)lvalues,(PetscReal*)gvalues,cmul*numops,MPIU_REAL,MPIU_MIN,comm);
216:     } else {
217:       MPIU_Allreduce(lvalues,gvalues,numops,MPIU_SCALAR,MPIU_SUM,comm);
218:     }
219:   }
220:   sr->state     = STATE_END;
221:   sr->numopsend = 0;
222:   PetscLogEventEnd(VEC_ReduceCommunication,0,0,0,0);
223:   return 0;
224: }

226: /*
227:    PetscSplitReductionExtend - Double the amount of space (slots) allocated for a split reduction object.
228: */
229: PetscErrorCode  PetscSplitReductionExtend(PetscSplitReduction *sr)
230: {
231:   struct PetscScalarInt { PetscScalar v; PetscInt i; };
232:   PetscInt              maxops   = sr->maxops,*reducetype = sr->reducetype;
233:   PetscScalar           *lvalues = sr->lvalues,*gvalues = sr->gvalues;
234:   struct PetscScalarInt *lvalues_mix = (struct PetscScalarInt*)sr->lvalues_mix;
235:   struct PetscScalarInt *gvalues_mix = (struct PetscScalarInt*)sr->gvalues_mix;
236:   void                  **invecs = sr->invecs;

238:   sr->maxops = 2*maxops;
239:   PetscMalloc6(2*maxops,&sr->lvalues,2*maxops,&sr->gvalues,2*maxops,&sr->reducetype,2*maxops,&sr->invecs,2*maxops,&sr->lvalues_mix,2*maxops,&sr->gvalues_mix);
240:   PetscArraycpy(sr->lvalues,lvalues,maxops);
241:   PetscArraycpy(sr->gvalues,gvalues,maxops);
242:   PetscArraycpy(sr->reducetype,reducetype,maxops);
243:   PetscArraycpy(sr->invecs,invecs,maxops);
244:   PetscArraycpy(sr->lvalues_mix,lvalues_mix,maxops);
245:   PetscArraycpy(sr->gvalues_mix,gvalues_mix,maxops);
246:   PetscFree6(lvalues,gvalues,reducetype,invecs,lvalues_mix,gvalues_mix);
247:   return 0;
248: }

250: PetscErrorCode  PetscSplitReductionDestroy(PetscSplitReduction *sr)
251: {
252:   PetscFree6(sr->lvalues,sr->gvalues,sr->reducetype,sr->invecs,sr->lvalues_mix,sr->gvalues_mix);
253:   PetscFree(sr);
254:   return 0;
255: }

257: PetscMPIInt Petsc_Reduction_keyval = MPI_KEYVAL_INVALID;

259: /*
260:    Private routine to delete internal storage when a communicator is freed.
261:   This is called by MPI, not by users.

263:   The binding for the first argument changed from MPI 1.0 to 1.1; in 1.0
264:   it was MPI_Comm *comm.
265: */
266: PETSC_EXTERN int MPIAPI Petsc_DelReduction(MPI_Comm comm,int keyval,void* attr_val,void* extra_state)
267: {
268:   PetscInfo(0,"Deleting reduction data in an MPI_Comm %ld\n",(long)comm);
269:   PetscSplitReductionDestroy((PetscSplitReduction*)attr_val);
270:   return 0;
271: }

273: /*
274:      PetscSplitReductionGet - Gets the split reduction object from a
275:         PETSc vector, creates if it does not exit.

277: */
278: PetscErrorCode PetscSplitReductionGet(MPI_Comm comm,PetscSplitReduction **sr)
279: {
280:   PetscMPIInt    flag;

282:   if (Petsc_Reduction_keyval == MPI_KEYVAL_INVALID) {
283:     /*
284:        The calling sequence of the 2nd argument to this function changed
285:        between MPI Standard 1.0 and the revisions 1.1 Here we match the
286:        new standard, if you are using an MPI implementation that uses
287:        the older version you will get a warning message about the next line;
288:        it is only a warning message and should do no harm.
289:     */
290:     MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN,Petsc_DelReduction,&Petsc_Reduction_keyval,NULL);
291:   }
292:   MPI_Comm_get_attr(comm,Petsc_Reduction_keyval,(void**)sr,&flag);
293:   if (!flag) {  /* doesn't exist yet so create it and put it in */
294:     PetscSplitReductionCreate(comm,sr);
295:     MPI_Comm_set_attr(comm,Petsc_Reduction_keyval,*sr);
296:     PetscInfo(0,"Putting reduction data in an MPI_Comm %ld\n",(long)comm);
297:   }
298:   return 0;
299: }

301: /* ----------------------------------------------------------------------------------------------------*/

303: /*@
304:    VecDotBegin - Starts a split phase dot product computation.

306:    Input Parameters:
307: +   x - the first vector
308: .   y - the second vector
309: -   result - where the result will go (can be NULL)

311:    Level: advanced

313:    Notes:
314:    Each call to VecDotBegin() should be paired with a call to VecDotEnd().

316: seealso: VecDotEnd(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
317:          VecTDotBegin(), VecTDotEnd(), PetscCommSplitReductionBegin()
318: @*/
319: PetscErrorCode  VecDotBegin(Vec x,Vec y,PetscScalar *result)
320: {
321:   PetscSplitReduction *sr;
322:   MPI_Comm            comm;

326:   PetscObjectGetComm((PetscObject)x,&comm);
327:   PetscSplitReductionGet(comm,&sr);
329:   if (sr->numopsbegin >= sr->maxops) {
330:     PetscSplitReductionExtend(sr);
331:   }
332:   sr->reducetype[sr->numopsbegin] = PETSC_SR_REDUCE_SUM;
333:   sr->invecs[sr->numopsbegin]     = (void*)x;
335:   PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
336:   (*x->ops->dot_local)(x,y,sr->lvalues+sr->numopsbegin++);
337:   PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
338:   return 0;
339: }

341: /*@
342:    VecDotEnd - Ends a split phase dot product computation.

344:    Input Parameters:
345: +  x - the first vector (can be NULL)
346: .  y - the second vector (can be NULL)
347: -  result - where the result will go

349:    Level: advanced

351:    Notes:
352:    Each call to VecDotBegin() should be paired with a call to VecDotEnd().

354: .seealso: VecDotBegin(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
355:          VecTDotBegin(),VecTDotEnd(), PetscCommSplitReductionBegin()

357: @*/
358: PetscErrorCode  VecDotEnd(Vec x,Vec y,PetscScalar *result)
359: {
360:   PetscSplitReduction *sr;
361:   MPI_Comm            comm;

363:   PetscObjectGetComm((PetscObject)x,&comm);
364:   PetscSplitReductionGet(comm,&sr);
365:   PetscSplitReductionEnd(sr);

370:   *result = sr->gvalues[sr->numopsend++];

372:   /*
373:      We are finished getting all the results so reset to no outstanding requests
374:   */
375:   if (sr->numopsend == sr->numopsbegin) {
376:     sr->state       = STATE_BEGIN;
377:     sr->numopsend   = 0;
378:     sr->numopsbegin = 0;
379:     sr->mix         = PETSC_FALSE;
380:   }
381:   return 0;
382: }

384: /*@
385:    VecTDotBegin - Starts a split phase transpose dot product computation.

387:    Input Parameters:
388: +  x - the first vector
389: .  y - the second vector
390: -  result - where the result will go (can be NULL)

392:    Level: advanced

394:    Notes:
395:    Each call to VecTDotBegin() should be paired with a call to VecTDotEnd().

397: .seealso: VecTDotEnd(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
398:          VecDotBegin(), VecDotEnd(), PetscCommSplitReductionBegin()

400: @*/
401: PetscErrorCode  VecTDotBegin(Vec x,Vec y,PetscScalar *result)
402: {
403:   PetscSplitReduction *sr;
404:   MPI_Comm            comm;

406:   PetscObjectGetComm((PetscObject)x,&comm);
407:   PetscSplitReductionGet(comm,&sr);
409:   if (sr->numopsbegin >= sr->maxops) {
410:     PetscSplitReductionExtend(sr);
411:   }
412:   sr->reducetype[sr->numopsbegin] = PETSC_SR_REDUCE_SUM;
413:   sr->invecs[sr->numopsbegin]     = (void*)x;
415:   PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
416:   (*x->ops->tdot_local)(x,y,sr->lvalues+sr->numopsbegin++);
417:   PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
418:   return 0;
419: }

421: /*@
422:    VecTDotEnd - Ends a split phase transpose dot product computation.

424:    Input Parameters:
425: +  x - the first vector (can be NULL)
426: .  y - the second vector (can be NULL)
427: -  result - where the result will go

429:    Level: advanced

431:    Notes:
432:    Each call to VecTDotBegin() should be paired with a call to VecTDotEnd().

434: seealso: VecTDotBegin(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
435:          VecDotBegin(), VecDotEnd()
436: @*/
437: PetscErrorCode  VecTDotEnd(Vec x,Vec y,PetscScalar *result)
438: {
439:   /*
440:       TDotEnd() is the same as DotEnd() so reuse the code
441:   */
442:   VecDotEnd(x,y,result);
443:   return 0;
444: }

446: /* -------------------------------------------------------------------------*/

448: /*@
449:    VecNormBegin - Starts a split phase norm computation.

451:    Input Parameters:
452: +  x - the first vector
453: .  ntype - norm type, one of NORM_1, NORM_2, NORM_MAX, NORM_1_AND_2
454: -  result - where the result will go (can be NULL)

456:    Level: advanced

458:    Notes:
459:    Each call to VecNormBegin() should be paired with a call to VecNormEnd().

461: .seealso: VecNormEnd(), VecNorm(), VecDot(), VecMDot(), VecDotBegin(), VecDotEnd(), PetscCommSplitReductionBegin()

463: @*/
464: PetscErrorCode  VecNormBegin(Vec x,NormType ntype,PetscReal *result)
465: {
466:   PetscSplitReduction *sr;
467:   PetscReal           lresult[2];
468:   MPI_Comm            comm;

471:   PetscObjectGetComm((PetscObject)x,&comm);
472:   PetscSplitReductionGet(comm,&sr);
474:   if (sr->numopsbegin >= sr->maxops || (sr->numopsbegin == sr->maxops-1 && ntype == NORM_1_AND_2)) {
475:     PetscSplitReductionExtend(sr);
476:   }

478:   sr->invecs[sr->numopsbegin] = (void*)x;
480:   PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
481:   (*x->ops->norm_local)(x,ntype,lresult);
482:   PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
483:   if (ntype == NORM_2)         lresult[0]                = lresult[0]*lresult[0];
484:   if (ntype == NORM_1_AND_2)   lresult[1]                = lresult[1]*lresult[1];
485:   if (ntype == NORM_MAX) sr->reducetype[sr->numopsbegin] = PETSC_SR_REDUCE_MAX;
486:   else                   sr->reducetype[sr->numopsbegin] = PETSC_SR_REDUCE_SUM;
487:   sr->lvalues[sr->numopsbegin++] = lresult[0];
488:   if (ntype == NORM_1_AND_2) {
489:     sr->reducetype[sr->numopsbegin] = PETSC_SR_REDUCE_SUM;
490:     sr->lvalues[sr->numopsbegin++]  = lresult[1];
491:   }
492:   return 0;
493: }

495: /*@
496:    VecNormEnd - Ends a split phase norm computation.

498:    Input Parameters:
499: +  x - the first vector
500: .  ntype - norm type, one of NORM_1, NORM_2, NORM_MAX, NORM_1_AND_2
501: -  result - where the result will go

503:    Level: advanced

505:    Notes:
506:    Each call to VecNormBegin() should be paired with a call to VecNormEnd().

508:    The x vector is not allowed to be NULL, otherwise the vector would not have its correctly cached norm value

510: .seealso: VecNormBegin(), VecNorm(), VecDot(), VecMDot(), VecDotBegin(), VecDotEnd(), PetscCommSplitReductionBegin()

512: @*/
513: PetscErrorCode  VecNormEnd(Vec x,NormType ntype,PetscReal *result)
514: {
515:   PetscSplitReduction *sr;
516:   MPI_Comm            comm;

519:   PetscObjectGetComm((PetscObject)x,&comm);
520:   PetscSplitReductionGet(comm,&sr);
521:   PetscSplitReductionEnd(sr);

526:   result[0] = PetscRealPart(sr->gvalues[sr->numopsend++]);

528:   if (ntype == NORM_2) result[0] = PetscSqrtReal(result[0]);
529:   else if (ntype == NORM_1_AND_2) {
530:     result[1] = PetscRealPart(sr->gvalues[sr->numopsend++]);
531:     result[1] = PetscSqrtReal(result[1]);
532:   }
533:   if (ntype!=NORM_1_AND_2) {
534:     PetscObjectComposedDataSetReal((PetscObject)x,NormIds[ntype],result[0]);
535:   }

537:   if (sr->numopsend == sr->numopsbegin) {
538:     sr->state       = STATE_BEGIN;
539:     sr->numopsend   = 0;
540:     sr->numopsbegin = 0;
541:   }
542:   return 0;
543: }

545: /*
546:    Possibly add

548:      PetscReductionSumBegin/End()
549:      PetscReductionMaxBegin/End()
550:      PetscReductionMinBegin/End()
551:    or have more like MPI with a single function with flag for Op? Like first better
552: */

554: /*@
555:    VecMDotBegin - Starts a split phase multiple dot product computation.

557:    Input Parameters:
558: +   x - the first vector
559: .   nv - number of vectors
560: .   y - array of vectors
561: -   result - where the result will go (can be NULL)

563:    Level: advanced

565:    Notes:
566:    Each call to VecMDotBegin() should be paired with a call to VecMDotEnd().

568: .seealso: VecMDotEnd(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
569:          VecTDotBegin(), VecTDotEnd(), VecMTDotBegin(), VecMTDotEnd(), PetscCommSplitReductionBegin()
570: @*/
571: PetscErrorCode  VecMDotBegin(Vec x,PetscInt nv,const Vec y[],PetscScalar result[])
572: {
573:   PetscSplitReduction *sr;
574:   MPI_Comm            comm;
575:   int                 i;

577:   PetscObjectGetComm((PetscObject)x,&comm);
578:   PetscSplitReductionGet(comm,&sr);
580:   for (i=0; i<nv; i++) {
581:     if (sr->numopsbegin+i >= sr->maxops) {
582:       PetscSplitReductionExtend(sr);
583:     }
584:     sr->reducetype[sr->numopsbegin+i] = PETSC_SR_REDUCE_SUM;
585:     sr->invecs[sr->numopsbegin+i]     = (void*)x;
586:   }
588:   PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
589:   (*x->ops->mdot_local)(x,nv,y,sr->lvalues+sr->numopsbegin);
590:   PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
591:   sr->numopsbegin += nv;
592:   return 0;
593: }

595: /*@
596:    VecMDotEnd - Ends a split phase multiple dot product computation.

598:    Input Parameters:
599: +   x - the first vector (can be NULL)
600: .   nv - number of vectors
601: -   y - array of vectors (can be NULL)

603:    Output Parameters:
604: .   result - where the result will go

606:    Level: advanced

608:    Notes:
609:    Each call to VecMDotBegin() should be paired with a call to VecMDotEnd().

611: .seealso: VecMDotBegin(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
612:          VecTDotBegin(),VecTDotEnd(), VecMTDotBegin(), VecMTDotEnd(), PetscCommSplitReductionBegin()

614: @*/
615: PetscErrorCode  VecMDotEnd(Vec x,PetscInt nv,const Vec y[],PetscScalar result[])
616: {
617:   PetscSplitReduction *sr;
618:   MPI_Comm            comm;
619:   int                 i;

621:   PetscObjectGetComm((PetscObject)x,&comm);
622:   PetscSplitReductionGet(comm,&sr);
623:   PetscSplitReductionEnd(sr);

628:   for (i=0;i<nv;i++) result[i] = sr->gvalues[sr->numopsend++];

630:   /*
631:      We are finished getting all the results so reset to no outstanding requests
632:   */
633:   if (sr->numopsend == sr->numopsbegin) {
634:     sr->state       = STATE_BEGIN;
635:     sr->numopsend   = 0;
636:     sr->numopsbegin = 0;
637:   }
638:   return 0;
639: }

641: /*@
642:    VecMTDotBegin - Starts a split phase transpose multiple dot product computation.

644:    Input Parameters:
645: +  x - the first vector
646: .  nv - number of vectors
647: .  y - array of  vectors
648: -  result - where the result will go (can be NULL)

650:    Level: advanced

652:    Notes:
653:    Each call to VecMTDotBegin() should be paired with a call to VecMTDotEnd().

655: .seealso: VecMTDotEnd(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
656:          VecDotBegin(), VecDotEnd(), VecMDotBegin(), VecMDotEnd(), PetscCommSplitReductionBegin()

658: @*/
659: PetscErrorCode  VecMTDotBegin(Vec x,PetscInt nv,const Vec y[],PetscScalar result[])
660: {
661:   PetscSplitReduction *sr;
662:   MPI_Comm            comm;
663:   int                 i;

665:   PetscObjectGetComm((PetscObject)x,&comm);
666:   PetscSplitReductionGet(comm,&sr);
668:   for (i=0; i<nv; i++) {
669:     if (sr->numopsbegin+i >= sr->maxops) {
670:       PetscSplitReductionExtend(sr);
671:     }
672:     sr->reducetype[sr->numopsbegin+i] = PETSC_SR_REDUCE_SUM;
673:     sr->invecs[sr->numopsbegin+i]     = (void*)x;
674:   }
676:   PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
677:   (*x->ops->mtdot_local)(x,nv,y,sr->lvalues+sr->numopsbegin);
678:   PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
679:   sr->numopsbegin += nv;
680:   return 0;
681: }

683: /*@
684:    VecMTDotEnd - Ends a split phase transpose multiple dot product computation.

686:    Input Parameters:
687: +  x - the first vector (can be NULL)
688: .  nv - number of vectors
689: -  y - array of  vectors (can be NULL)

691:    Output Parameters:
692: .  result - where the result will go

694:    Level: advanced

696:    Notes:
697:    Each call to VecTDotBegin() should be paired with a call to VecTDotEnd().

699: .seealso: VecMTDotBegin(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
700:          VecDotBegin(), VecDotEnd(), VecMDotBegin(), VecMDotEnd(), PetscCommSplitReductionBegin()
701: @*/
702: PetscErrorCode  VecMTDotEnd(Vec x,PetscInt nv,const Vec y[],PetscScalar result[])
703: {
704:   /*
705:       MTDotEnd() is the same as MDotEnd() so reuse the code
706:   */
707:   VecMDotEnd(x,nv,y,result);
708:   return 0;
709: }