Actual source code: comb.c

petsc-3.11.4 2019-09-28
Report Typos and Errors

  2: /*
  3:       Split phase global vector reductions with support for combining the
  4:    communication portion of several operations. Using MPI-1.1 support only

  6:       The idea for this and much of the initial code is contributed by
  7:    Victor Eijkhout.

  9:        Usage:
 10:              VecDotBegin(Vec,Vec,PetscScalar *);
 11:              VecNormBegin(Vec,NormType,PetscReal *);
 12:              ....
 13:              VecDotEnd(Vec,Vec,PetscScalar *);
 14:              VecNormEnd(Vec,NormType,PetscReal *);

 16:        Limitations:
 17:          - The order of the xxxEnd() functions MUST be in the same order
 18:            as the xxxBegin(). There is extensive error checking to try to
 19:            insure that the user calls the routines in the correct order
 20: */

 22:  #include <petsc/private/vecimpl.h>

 24: static PetscErrorCode MPIPetsc_Iallreduce(void *sendbuf,void *recvbuf,PetscMPIInt count,MPI_Datatype datatype,MPI_Op op,MPI_Comm comm,MPI_Request *request)
 25: {

 29: #if defined(PETSC_HAVE_MPI_IALLREDUCE)
 30:   MPI_Iallreduce(sendbuf,recvbuf,count,datatype,op,comm,request);
 31: #elif defined(PETSC_HAVE_MPIX_IALLREDUCE)
 32:   MPIX_Iallreduce(sendbuf,recvbuf,count,datatype,op,comm,request);
 33: #else
 34:   MPIU_Allreduce(sendbuf,recvbuf,count,datatype,op,comm);
 35:   *request = MPI_REQUEST_NULL;
 36: #endif
 37:   return(0);
 38: }

 40: /*
 41:    Note: the lvalues and gvalues are twice as long as maxops, this is to allow the second half of
 42: the entries to have a flag indicating if they are PETSC_SR_REDUCE_SUM, PETSC_SR_REDUCE_MAX, or PETSC_SR_REDUCE_MIN these are used by
 43: the custom reduction operation that replaces MPI_SUM, MPI_MAX, or MPI_MIN in the case when a reduction involves
 44: some of each.
 45: */

 47: static PetscErrorCode PetscSplitReductionApply(PetscSplitReduction*);

 49: /*
 50:    PetscSplitReductionCreate - Creates a data structure to contain the queued information.
 51: */
 52: static PetscErrorCode  PetscSplitReductionCreate(MPI_Comm comm,PetscSplitReduction **sr)
 53: {

 57:   PetscNew(sr);
 58:   (*sr)->numopsbegin = 0;
 59:   (*sr)->numopsend   = 0;
 60:   (*sr)->state       = STATE_BEGIN;
 61:   (*sr)->maxops      = 32;
 62:   PetscMalloc1(2*32,&(*sr)->lvalues);
 63:   PetscMalloc1(2*32,&(*sr)->gvalues);
 64:   PetscMalloc1(32,&(*sr)->invecs);
 65:   (*sr)->comm        = comm;
 66:   (*sr)->request     = MPI_REQUEST_NULL;
 67:   PetscMalloc1(32,&(*sr)->reducetype);
 68:   (*sr)->async       = PETSC_FALSE;
 69: #if defined(PETSC_HAVE_MPI_IALLREDUCE) || defined(PETSC_HAVE_MPIX_IALLREDUCE)
 70:   (*sr)->async = PETSC_TRUE;    /* Enable by default */
 71: #endif
 72:   /* always check for option; so that tests that run on systems without support don't warn about unhandled options */
 73:   PetscOptionsGetBool(NULL,NULL,"-splitreduction_async",&(*sr)->async,NULL);
 74:   return(0);
 75: }

 77: /*
 78:        This function is the MPI reduction operation used when there is
 79:    a combination of sums and max in the reduction. The call below to
 80:    MPI_Op_create() converts the function PetscSplitReduction_Local() to the
 81:    MPI operator PetscSplitReduction_Op.
 82: */
 83: MPI_Op PetscSplitReduction_Op = 0;

 85: PETSC_EXTERN void MPIAPI PetscSplitReduction_Local(void *in,void *out,PetscMPIInt *cnt,MPI_Datatype *datatype)
 86: {
 87:   PetscScalar *xin = (PetscScalar*)in,*xout = (PetscScalar*)out;
 88:   PetscInt    i,count = (PetscInt)*cnt;

 91:   if (*datatype != MPIU_SCALAR) {
 92:     (*PetscErrorPrintf)("Can only handle MPIU_SCALAR data types");
 93:     MPI_Abort(MPI_COMM_SELF,1);
 94:   }
 95:   count = count/2;
 96:   for (i=0; i<count; i++) {
 97:      /* second half of xin[] is flags for reduction type */
 98:     if      ((PetscInt)PetscRealPart(xin[count+i]) == PETSC_SR_REDUCE_SUM) xout[i] += xin[i];
 99:     else if ((PetscInt)PetscRealPart(xin[count+i]) == PETSC_SR_REDUCE_MAX) xout[i] = PetscMax(*(PetscReal*)(xout+i),*(PetscReal*)(xin+i));
100:     else if ((PetscInt)PetscRealPart(xin[count+i]) == PETSC_SR_REDUCE_MIN) xout[i] = PetscMin(*(PetscReal*)(xout+i),*(PetscReal*)(xin+i));
101:     else {
102:       (*PetscErrorPrintf)("Reduction type input is not PETSC_SR_REDUCE_SUM, PETSC_SR_REDUCE_MAX, or PETSC_SR_REDUCE_MIN");
103:       MPI_Abort(MPI_COMM_SELF,1);
104:     }
105:   }
106:   PetscFunctionReturnVoid();
107: }

109: /*@
110:    PetscCommSplitReductionBegin - Begin an asynchronous split-mode reduction

112:    Collective but not synchronizing

114:    Input Arguments:
115:    comm - communicator on which split reduction has been queued

117:    Level: advanced

119:    Note:
120:    Calling this function is optional when using split-mode reduction. On supporting hardware, calling this after all
121:    VecXxxBegin() allows the reduction to make asynchronous progress before the result is needed (in VecXxxEnd()).

123: .seealso: VecNormBegin(), VecNormEnd(), VecDotBegin(), VecDotEnd(), VecTDotBegin(), VecTDotEnd(), VecMDotBegin(), VecMDotEnd(), VecMTDotBegin(), VecMTDotEnd()
124: @*/
125: PetscErrorCode PetscCommSplitReductionBegin(MPI_Comm comm)
126: {
127:   PetscErrorCode      ierr;
128:   PetscSplitReduction *sr;

131:   PetscSplitReductionGet(comm,&sr);
132:   if (sr->numopsend > 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Cannot call this after VecxxxEnd() has been called");
133:   if (sr->async) {              /* Bad reuse, setup code copied from PetscSplitReductionApply(). */
134:     PetscInt       i,numops = sr->numopsbegin,*reducetype = sr->reducetype;
135:     PetscScalar    *lvalues = sr->lvalues,*gvalues = sr->gvalues;
136:     PetscInt       sum_flg = 0,max_flg = 0, min_flg = 0;
137:     MPI_Comm       comm = sr->comm;
138:     PetscMPIInt    size,cmul = sizeof(PetscScalar)/sizeof(PetscReal);;
139:     PetscLogEventBegin(VEC_ReduceBegin,0,0,0,0);
140:     MPI_Comm_size(sr->comm,&size);
141:     if (size == 1) {
142:       PetscMemcpy(gvalues,lvalues,numops*sizeof(PetscScalar));
143:     } else {
144:       /* determine if all reductions are sum, max, or min */
145:       for (i=0; i<numops; i++) {
146:         if      (reducetype[i] == PETSC_SR_REDUCE_MAX) max_flg = 1;
147:         else if (reducetype[i] == PETSC_SR_REDUCE_SUM) sum_flg = 1;
148:         else if (reducetype[i] == PETSC_SR_REDUCE_MIN) min_flg = 1;
149:         else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in PetscSplitReduction() data structure, probably memory corruption");
150:       }
151:       if (sum_flg + max_flg + min_flg > 1) {
152:         /*
153:          after all the entires in lvalues we store the reducetype flags to indicate
154:          to the reduction operations what are sums and what are max
155:          */
156:         for (i=0; i<numops; i++) lvalues[numops+i] = reducetype[i];

158:         MPIPetsc_Iallreduce(lvalues,gvalues,2*numops,MPIU_SCALAR,PetscSplitReduction_Op,comm,&sr->request);
159:       } else if (max_flg) {   /* Compute max of real and imag parts separately, presumably only the real part is used */
160:         MPIPetsc_Iallreduce((PetscReal*)lvalues,(PetscReal*)gvalues,cmul*numops,MPIU_REAL,MPIU_MAX,comm,&sr->request);
161:       } else if (min_flg) {
162:         MPIPetsc_Iallreduce((PetscReal*)lvalues,(PetscReal*)gvalues,cmul*numops,MPIU_REAL,MPIU_MIN,comm,&sr->request);
163:       } else {
164:         MPIPetsc_Iallreduce(lvalues,gvalues,numops,MPIU_SCALAR,MPIU_SUM,comm,&sr->request);
165:       }
166:     }
167:     sr->state     = STATE_PENDING;
168:     sr->numopsend = 0;
169:     PetscLogEventEnd(VEC_ReduceBegin,0,0,0,0);
170:   } else {
171:     PetscSplitReductionApply(sr);
172:   }
173:   return(0);
174: }

176: PetscErrorCode PetscSplitReductionEnd(PetscSplitReduction *sr)
177: {

181:   switch (sr->state) {
182:   case STATE_BEGIN: /* We are doing synchronous communication and this is the first call to VecXxxEnd() so do the communication */
183:     PetscSplitReductionApply(sr);
184:     break;
185:   case STATE_PENDING:
186:     /* We are doing asynchronous-mode communication and this is the first VecXxxEnd() so wait for comm to complete */
187:     PetscLogEventBegin(VEC_ReduceEnd,0,0,0,0);
188:     if (sr->request != MPI_REQUEST_NULL) {
189:       MPI_Wait(&sr->request,MPI_STATUS_IGNORE);
190:     }
191:     sr->state = STATE_END;
192:     PetscLogEventEnd(VEC_ReduceEnd,0,0,0,0);
193:     break;
194:   default: break;            /* everything is already done */
195:   }
196:   return(0);
197: }

199: /*
200:    PetscSplitReductionApply - Actually do the communication required for a split phase reduction
201: */
202: static PetscErrorCode PetscSplitReductionApply(PetscSplitReduction *sr)
203: {
205:   PetscInt       i,numops = sr->numopsbegin,*reducetype = sr->reducetype;
206:   PetscScalar    *lvalues = sr->lvalues,*gvalues = sr->gvalues;
207:   PetscInt       sum_flg  = 0,max_flg = 0, min_flg = 0;
208:   MPI_Comm       comm     = sr->comm;
209:   PetscMPIInt    size,cmul = sizeof(PetscScalar)/sizeof(PetscReal);

212:   if (sr->numopsend > 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Cannot call this after VecxxxEnd() has been called");
213:   PetscLogEventBegin(VEC_ReduceCommunication,0,0,0,0);
214:   MPI_Comm_size(sr->comm,&size);
215:   if (size == 1) {
216:     PetscMemcpy(gvalues,lvalues,numops*sizeof(PetscScalar));
217:   } else {
218:     /* determine if all reductions are sum, max, or min */
219:     for (i=0; i<numops; i++) {
220:       if      (reducetype[i] == PETSC_SR_REDUCE_MAX) max_flg = 1;
221:       else if (reducetype[i] == PETSC_SR_REDUCE_SUM) sum_flg = 1;
222:       else if (reducetype[i] == PETSC_SR_REDUCE_MIN) min_flg = 1;
223:       else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in PetscSplitReduction() data structure, probably memory corruption");
224:     }
225:     if (sum_flg + max_flg + min_flg > 1) {
226:       /*
227:          after all the entires in lvalues we store the reducetype flags to indicate
228:          to the reduction operations what are sums and what are max
229:       */
230:       for (i=0; i<numops; i++) lvalues[numops+i] = reducetype[i];
231:       MPIU_Allreduce(lvalues,gvalues,2*numops,MPIU_SCALAR,PetscSplitReduction_Op,comm);
232:     } else if (max_flg) {     /* Compute max of real and imag parts separately, presumably only the real part is used */
233:       MPIU_Allreduce((PetscReal*)lvalues,(PetscReal*)gvalues,cmul*numops,MPIU_REAL,MPIU_MAX,comm);
234:     } else if (min_flg) {
235:       MPIU_Allreduce((PetscReal*)lvalues,(PetscReal*)gvalues,cmul*numops,MPIU_REAL,MPIU_MIN,comm);
236:     } else {
237:       MPIU_Allreduce(lvalues,gvalues,numops,MPIU_SCALAR,MPIU_SUM,comm);
238:     }
239:   }
240:   sr->state     = STATE_END;
241:   sr->numopsend = 0;
242:   PetscLogEventEnd(VEC_ReduceCommunication,0,0,0,0);
243:   return(0);
244: }

246: /*
247:    PetscSplitReductionExtend - Double the amount of space (slots) allocated for a split reduction object.
248: */
249: PetscErrorCode  PetscSplitReductionExtend(PetscSplitReduction *sr)
250: {
252:   PetscInt       maxops   = sr->maxops,*reducetype = sr->reducetype;
253:   PetscScalar    *lvalues = sr->lvalues,*gvalues = sr->gvalues;
254:   void           *invecs  = sr->invecs;

257:   sr->maxops     = 2*maxops;
258:   PetscMalloc1(2*2*maxops,&sr->lvalues);
259:   PetscMalloc1(2*2*maxops,&sr->gvalues);
260:   PetscMalloc1(2*maxops,&sr->reducetype);
261:   PetscMalloc1(2*maxops,&sr->invecs);
262:   PetscMemcpy(sr->lvalues,lvalues,maxops*sizeof(PetscScalar));
263:   PetscMemcpy(sr->gvalues,gvalues,maxops*sizeof(PetscScalar));
264:   PetscMemcpy(sr->reducetype,reducetype,maxops*sizeof(PetscInt));
265:   PetscMemcpy(sr->invecs,invecs,maxops*sizeof(void*));
266:   PetscFree(lvalues);
267:   PetscFree(gvalues);
268:   PetscFree(reducetype);
269:   PetscFree(invecs);
270:   return(0);
271: }

273: PetscErrorCode  PetscSplitReductionDestroy(PetscSplitReduction *sr)
274: {

278:   PetscFree(sr->lvalues);
279:   PetscFree(sr->gvalues);
280:   PetscFree(sr->reducetype);
281:   PetscFree(sr->invecs);
282:   PetscFree(sr);
283:   return(0);
284: }

286: PetscMPIInt Petsc_Reduction_keyval = MPI_KEYVAL_INVALID;

288: /*
289:    Private routine to delete internal storage when a communicator is freed.
290:   This is called by MPI, not by users.

292:   The binding for the first argument changed from MPI 1.0 to 1.1; in 1.0
293:   it was MPI_Comm *comm.
294: */
295: PETSC_EXTERN int MPIAPI Petsc_DelReduction(MPI_Comm comm,int keyval,void* attr_val,void* extra_state)
296: {

300:   PetscInfo1(0,"Deleting reduction data in an MPI_Comm %ld\n",(long)comm);CHKERRMPI(ierr);
301:   PetscSplitReductionDestroy((PetscSplitReduction*)attr_val);CHKERRMPI(ierr);
302:   return(0);
303: }

305: /*
306:      PetscSplitReductionGet - Gets the split reduction object from a
307:         PETSc vector, creates if it does not exit.

309: */
310: PetscErrorCode PetscSplitReductionGet(MPI_Comm comm,PetscSplitReduction **sr)
311: {
313:   PetscMPIInt    flag;

316:   if (Petsc_Reduction_keyval == MPI_KEYVAL_INVALID) {
317:     /*
318:        The calling sequence of the 2nd argument to this function changed
319:        between MPI Standard 1.0 and the revisions 1.1 Here we match the
320:        new standard, if you are using an MPI implementation that uses
321:        the older version you will get a warning message about the next line;
322:        it is only a warning message and should do no harm.
323:     */
324:     MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN,Petsc_DelReduction,&Petsc_Reduction_keyval,0);
325:   }
326:   MPI_Comm_get_attr(comm,Petsc_Reduction_keyval,(void**)sr,&flag);
327:   if (!flag) {  /* doesn't exist yet so create it and put it in */
328:     PetscSplitReductionCreate(comm,sr);
329:     MPI_Comm_set_attr(comm,Petsc_Reduction_keyval,*sr);
330:     PetscInfo1(0,"Putting reduction data in an MPI_Comm %ld\n",(long)comm);
331:   }
332:   return(0);
333: }

335: /* ----------------------------------------------------------------------------------------------------*/

337: /*@
338:    VecDotBegin - Starts a split phase dot product computation.

340:    Input Parameters:
341: +   x - the first vector
342: .   y - the second vector
343: -   result - where the result will go (can be NULL)

345:    Level: advanced

347:    Notes:
348:    Each call to VecDotBegin() should be paired with a call to VecDotEnd().

350: seealso: VecDotEnd(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
351:          VecTDotBegin(), VecTDotEnd(), PetscCommSplitReductionBegin()
352: @*/
353: PetscErrorCode  VecDotBegin(Vec x,Vec y,PetscScalar *result)
354: {
355:   PetscErrorCode      ierr;
356:   PetscSplitReduction *sr;
357:   MPI_Comm            comm;

362:   PetscObjectGetComm((PetscObject)x,&comm);
363:   PetscSplitReductionGet(comm,&sr);
364:   if (sr->state != STATE_BEGIN) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Called before all VecxxxEnd() called");
365:   if (sr->numopsbegin >= sr->maxops) {
366:     PetscSplitReductionExtend(sr);
367:   }
368:   sr->reducetype[sr->numopsbegin] = PETSC_SR_REDUCE_SUM;
369:   sr->invecs[sr->numopsbegin]     = (void*)x;
370:   if (!x->ops->dot_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Vector does not suppport local dots");
371:   PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
372:   (*x->ops->dot_local)(x,y,sr->lvalues+sr->numopsbegin++);
373:   PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
374:   return(0);
375: }

377: /*@
378:    VecDotEnd - Ends a split phase dot product computation.

380:    Input Parameters:
381: +  x - the first vector (can be NULL)
382: .  y - the second vector (can be NULL)
383: -  result - where the result will go

385:    Level: advanced

387:    Notes:
388:    Each call to VecDotBegin() should be paired with a call to VecDotEnd().

390: .seealso: VecDotBegin(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
391:          VecTDotBegin(),VecTDotEnd(), PetscCommSplitReductionBegin()

393: @*/
394: PetscErrorCode  VecDotEnd(Vec x,Vec y,PetscScalar *result)
395: {
396:   PetscErrorCode      ierr;
397:   PetscSplitReduction *sr;
398:   MPI_Comm            comm;

401:   PetscObjectGetComm((PetscObject)x,&comm);
402:   PetscSplitReductionGet(comm,&sr);
403:   PetscSplitReductionEnd(sr);

405:   if (sr->numopsend >= sr->numopsbegin) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecxxxEnd() more times then VecxxxBegin()");
406:   if (x && (void*) x != sr->invecs[sr->numopsend]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecxxxEnd() in a different order or with a different vector than VecxxxBegin()");
407:   if (sr->reducetype[sr->numopsend] != PETSC_SR_REDUCE_SUM) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecDotEnd() on a reduction started with VecNormBegin()");
408:   *result = sr->gvalues[sr->numopsend++];

410:   /*
411:      We are finished getting all the results so reset to no outstanding requests
412:   */
413:   if (sr->numopsend == sr->numopsbegin) {
414:     sr->state       = STATE_BEGIN;
415:     sr->numopsend   = 0;
416:     sr->numopsbegin = 0;
417:   }
418:   return(0);
419: }

421: /*@
422:    VecTDotBegin - Starts a split phase transpose dot product computation.

424:    Input Parameters:
425: +  x - the first vector
426: .  y - the second vector
427: -  result - where the result will go (can be NULL)

429:    Level: advanced

431:    Notes:
432:    Each call to VecTDotBegin() should be paired with a call to VecTDotEnd().

434: .seealso: VecTDotEnd(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
435:          VecDotBegin(), VecDotEnd(), PetscCommSplitReductionBegin()

437: @*/
438: PetscErrorCode  VecTDotBegin(Vec x,Vec y,PetscScalar *result)
439: {
440:   PetscErrorCode      ierr;
441:   PetscSplitReduction *sr;
442:   MPI_Comm            comm;

445:   PetscObjectGetComm((PetscObject)x,&comm);
446:   PetscSplitReductionGet(comm,&sr);
447:   if (sr->state != STATE_BEGIN) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Called before all VecxxxEnd() called");
448:   if (sr->numopsbegin >= sr->maxops) {
449:     PetscSplitReductionExtend(sr);
450:   }
451:   sr->reducetype[sr->numopsbegin] = PETSC_SR_REDUCE_SUM;
452:   sr->invecs[sr->numopsbegin]     = (void*)x;
453:   if (!x->ops->tdot_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Vector does not suppport local dots");
454:   PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
455:   (*x->ops->tdot_local)(x,y,sr->lvalues+sr->numopsbegin++);
456:   PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
457:   return(0);
458: }

460: /*@
461:    VecTDotEnd - Ends a split phase transpose dot product computation.

463:    Input Parameters:
464: +  x - the first vector (can be NULL)
465: .  y - the second vector (can be NULL)
466: -  result - where the result will go

468:    Level: advanced

470:    Notes:
471:    Each call to VecTDotBegin() should be paired with a call to VecTDotEnd().

473: seealso: VecTDotBegin(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
474:          VecDotBegin(), VecDotEnd()
475: @*/
476: PetscErrorCode  VecTDotEnd(Vec x,Vec y,PetscScalar *result)
477: {

481:   /*
482:       TDotEnd() is the same as DotEnd() so reuse the code
483:   */
484:   VecDotEnd(x,y,result);
485:   return(0);
486: }

488: /* -------------------------------------------------------------------------*/

490: /*@
491:    VecNormBegin - Starts a split phase norm computation.

493:    Input Parameters:
494: +  x - the first vector
495: .  ntype - norm type, one of NORM_1, NORM_2, NORM_MAX, NORM_1_AND_2
496: -  result - where the result will go (can be NULL)

498:    Level: advanced

500:    Notes:
501:    Each call to VecNormBegin() should be paired with a call to VecNormEnd().

503: .seealso: VecNormEnd(), VecNorm(), VecDot(), VecMDot(), VecDotBegin(), VecDotEnd(), PetscCommSplitReductionBegin()

505: @*/
506: PetscErrorCode  VecNormBegin(Vec x,NormType ntype,PetscReal *result)
507: {
508:   PetscErrorCode      ierr;
509:   PetscSplitReduction *sr;
510:   PetscReal           lresult[2];
511:   MPI_Comm            comm;

515:   PetscObjectGetComm((PetscObject)x,&comm);
516:   PetscSplitReductionGet(comm,&sr);
517:   if (sr->state != STATE_BEGIN) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Called before all VecxxxEnd() called");
518:   if (sr->numopsbegin >= sr->maxops || (sr->numopsbegin == sr->maxops-1 && ntype == NORM_1_AND_2)) {
519:     PetscSplitReductionExtend(sr);
520:   }

522:   sr->invecs[sr->numopsbegin] = (void*)x;
523:   if (!x->ops->norm_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Vector does not support local norms");
524:   PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
525:   (*x->ops->norm_local)(x,ntype,lresult);
526:   PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
527:   if (ntype == NORM_2)         lresult[0]                = lresult[0]*lresult[0];
528:   if (ntype == NORM_1_AND_2)   lresult[1]                = lresult[1]*lresult[1];
529:   if (ntype == NORM_MAX) sr->reducetype[sr->numopsbegin] = PETSC_SR_REDUCE_MAX;
530:   else                   sr->reducetype[sr->numopsbegin] = PETSC_SR_REDUCE_SUM;
531:   sr->lvalues[sr->numopsbegin++] = lresult[0];
532:   if (ntype == NORM_1_AND_2) {
533:     sr->reducetype[sr->numopsbegin] = PETSC_SR_REDUCE_SUM;
534:     sr->lvalues[sr->numopsbegin++]  = lresult[1];
535:   }
536:   return(0);
537: }

539: /*@
540:    VecNormEnd - Ends a split phase norm computation.

542:    Input Parameters:
543: +  x - the first vector 
544: .  ntype - norm type, one of NORM_1, NORM_2, NORM_MAX, NORM_1_AND_2
545: -  result - where the result will go

547:    Level: advanced

549:    Notes:
550:    Each call to VecNormBegin() should be paired with a call to VecNormEnd().

552:    The x vector is not allowed to be NULL, otherwise the vector would not have its correctly cached norm value

554: .seealso: VecNormBegin(), VecNorm(), VecDot(), VecMDot(), VecDotBegin(), VecDotEnd(), PetscCommSplitReductionBegin()

556: @*/
557: PetscErrorCode  VecNormEnd(Vec x,NormType ntype,PetscReal *result)
558: {
559:   PetscErrorCode      ierr;
560:   PetscSplitReduction *sr;
561:   MPI_Comm            comm;

565:   PetscObjectGetComm((PetscObject)x,&comm);
566:   PetscSplitReductionGet(comm,&sr);
567:   PetscSplitReductionEnd(sr);

569:   if (sr->numopsend >= sr->numopsbegin) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecxxxEnd() more times then VecxxxBegin()");
570:   if ((void*)x != sr->invecs[sr->numopsend]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecxxxEnd() in a different order or with a different vector than VecxxxBegin()");
571:   if (sr->reducetype[sr->numopsend] != PETSC_SR_REDUCE_MAX && ntype == NORM_MAX) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecNormEnd(,NORM_MAX,) on a reduction started with VecDotBegin() or NORM_1 or NORM_2");
572:   result[0] = PetscRealPart(sr->gvalues[sr->numopsend++]);

574:   if (ntype == NORM_2) result[0] = PetscSqrtReal(result[0]);
575:   else if (ntype == NORM_1_AND_2) {
576:     result[1] = PetscRealPart(sr->gvalues[sr->numopsend++]);
577:     result[1] = PetscSqrtReal(result[1]);
578:   }
579:   if (ntype!=NORM_1_AND_2) {
580:     PetscObjectComposedDataSetReal((PetscObject)x,NormIds[ntype],result[0]);
581:   }

583:   if (sr->numopsend == sr->numopsbegin) {
584:     sr->state       = STATE_BEGIN;
585:     sr->numopsend   = 0;
586:     sr->numopsbegin = 0;
587:   }
588:   return(0);
589: }

591: /*
592:    Possibly add

594:      PetscReductionSumBegin/End()
595:      PetscReductionMaxBegin/End()
596:      PetscReductionMinBegin/End()
597:    or have more like MPI with a single function with flag for Op? Like first better
598: */

600: /*@
601:    VecMDotBegin - Starts a split phase multiple dot product computation.

603:    Input Parameters:
604: +   x - the first vector
605: .   nv - number of vectors
606: .   y - array of vectors
607: -   result - where the result will go (can be NULL)

609:    Level: advanced

611:    Notes:
612:    Each call to VecMDotBegin() should be paired with a call to VecMDotEnd().

614: .seealso: VecMDotEnd(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
615:          VecTDotBegin(), VecTDotEnd(), VecMTDotBegin(), VecMTDotEnd(), PetscCommSplitReductionBegin()
616: @*/
617: PetscErrorCode  VecMDotBegin(Vec x,PetscInt nv,const Vec y[],PetscScalar result[])
618: {
619:   PetscErrorCode      ierr;
620:   PetscSplitReduction *sr;
621:   MPI_Comm            comm;
622:   int                 i;

625:   PetscObjectGetComm((PetscObject)x,&comm);
626:   PetscSplitReductionGet(comm,&sr);
627:   if (sr->state != STATE_BEGIN) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Called before all VecxxxEnd() called");
628:   for (i=0; i<nv; i++) {
629:     if (sr->numopsbegin+i >= sr->maxops) {
630:       PetscSplitReductionExtend(sr);
631:     }
632:     sr->reducetype[sr->numopsbegin+i] = PETSC_SR_REDUCE_SUM;
633:     sr->invecs[sr->numopsbegin+i]     = (void*)x;
634:   }
635:   if (!x->ops->mdot_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Vector does not suppport local mdots");
636:   PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
637:   (*x->ops->mdot_local)(x,nv,y,sr->lvalues+sr->numopsbegin);
638:   PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
639:   sr->numopsbegin += nv;
640:   return(0);
641: }

643: /*@
644:    VecMDotEnd - Ends a split phase multiple dot product computation.

646:    Input Parameters:
647: +   x - the first vector (can be NULL)
648: .   nv - number of vectors
649: -   y - array of vectors (can be NULL)

651:    Output Parameters:
652: .   result - where the result will go

654:    Level: advanced

656:    Notes:
657:    Each call to VecMDotBegin() should be paired with a call to VecMDotEnd().

659: .seealso: VecMDotBegin(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
660:          VecTDotBegin(),VecTDotEnd(), VecMTDotBegin(), VecMTDotEnd(), PetscCommSplitReductionBegin()

662: @*/
663: PetscErrorCode  VecMDotEnd(Vec x,PetscInt nv,const Vec y[],PetscScalar result[])
664: {
665:   PetscErrorCode      ierr;
666:   PetscSplitReduction *sr;
667:   MPI_Comm            comm;
668:   int                 i;

671:   PetscObjectGetComm((PetscObject)x,&comm);
672:   PetscSplitReductionGet(comm,&sr);
673:   PetscSplitReductionEnd(sr);

675:   if (sr->numopsend >= sr->numopsbegin) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecxxxEnd() more times then VecxxxBegin()");
676:   if (x && (void*) x != sr->invecs[sr->numopsend]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecxxxEnd() in a different order or with a different vector than VecxxxBegin()");
677:   if (sr->reducetype[sr->numopsend] != PETSC_SR_REDUCE_SUM) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecDotEnd() on a reduction started with VecNormBegin()");
678:   for (i=0;i<nv;i++) result[i] = sr->gvalues[sr->numopsend++];

680:   /*
681:      We are finished getting all the results so reset to no outstanding requests
682:   */
683:   if (sr->numopsend == sr->numopsbegin) {
684:     sr->state       = STATE_BEGIN;
685:     sr->numopsend   = 0;
686:     sr->numopsbegin = 0;
687:   }
688:   return(0);
689: }

691: /*@
692:    VecMTDotBegin - Starts a split phase transpose multiple dot product computation.

694:    Input Parameters:
695: +  x - the first vector
696: .  nv - number of vectors
697: .  y - array of  vectors
698: -  result - where the result will go (can be NULL)

700:    Level: advanced

702:    Notes:
703:    Each call to VecMTDotBegin() should be paired with a call to VecMTDotEnd().

705: .seealso: VecMTDotEnd(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
706:          VecDotBegin(), VecDotEnd(), VecMDotBegin(), VecMDotEnd(), PetscCommSplitReductionBegin()

708: @*/
709: PetscErrorCode  VecMTDotBegin(Vec x,PetscInt nv,const Vec y[],PetscScalar result[])
710: {
711:   PetscErrorCode      ierr;
712:   PetscSplitReduction *sr;
713:   MPI_Comm            comm;
714:   int                 i;

717:   PetscObjectGetComm((PetscObject)x,&comm);
718:   PetscSplitReductionGet(comm,&sr);
719:   if (sr->state != STATE_BEGIN) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Called before all VecxxxEnd() called");
720:   for (i=0; i<nv; i++) {
721:     if (sr->numopsbegin+i >= sr->maxops) {
722:       PetscSplitReductionExtend(sr);
723:     }
724:     sr->reducetype[sr->numopsbegin+i] = PETSC_SR_REDUCE_SUM;
725:     sr->invecs[sr->numopsbegin+i]     = (void*)x;
726:   }
727:   if (!x->ops->mtdot_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Vector does not suppport local mdots");
728:   PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
729:   (*x->ops->mdot_local)(x,nv,y,sr->lvalues+sr->numopsbegin);
730:   PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
731:   sr->numopsbegin += nv;
732:   return(0);
733: }

735: /*@
736:    VecMTDotEnd - Ends a split phase transpose multiple dot product computation.

738:    Input Parameters:
739: +  x - the first vector (can be NULL)
740: .  nv - number of vectors
741: -  y - array of  vectors (can be NULL)

743:    Output Parameters
744: .  result - where the result will go

746:    Level: advanced

748:    Notes:
749:    Each call to VecTDotBegin() should be paired with a call to VecTDotEnd().

751: .seealso: VecMTDotBegin(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
752:          VecDotBegin(), VecDotEnd(), VecMDotBegin(), VecMDotEnd(), PetscCommSplitReductionBegin()
753: @*/
754: PetscErrorCode  VecMTDotEnd(Vec x,PetscInt nv,const Vec y[],PetscScalar result[])
755: {

759:   /*
760:       MTDotEnd() is the same as MDotEnd() so reuse the code
761:   */
762:   VecMDotEnd(x,nv,y,result);
763:   return(0);
764: }