Actual source code: comb.c

petsc-3.6.4 2016-04-12
Report Typos and Errors
  2: /*
  3:       Split phase global vector reductions with support for combining the
  4:    communication portion of several operations. Using MPI-1.1 support only

  6:       The idea for this and much of the initial code is contributed by
  7:    Victor Eijkhout.

  9:        Usage:
 10:              VecDotBegin(Vec,Vec,PetscScalar *);
 11:              VecNormBegin(Vec,NormType,PetscReal *);
 12:              ....
 13:              VecDotEnd(Vec,Vec,PetscScalar *);
 14:              VecNormEnd(Vec,NormType,PetscReal *);

 16:        Limitations:
 17:          - The order of the xxxEnd() functions MUST be in the same order
 18:            as the xxxBegin(). There is extensive error checking to try to
 19:            insure that the user calls the routines in the correct order
 20: */

 22: #include <petsc/private/vecimpl.h>    /*I   "petscvec.h"    I*/

 26: static PetscErrorCode MPIPetsc_Iallreduce(void *sendbuf,void *recvbuf,PetscMPIInt count,MPI_Datatype datatype,MPI_Op op,MPI_Comm comm,MPI_Request *request)
 27: {

 31: #if defined(PETSC_HAVE_MPI_IALLREDUCE)
 32:   MPI_Iallreduce(sendbuf,recvbuf,count,datatype,op,comm,request);
 33: #elif defined(PETSC_HAVE_MPIX_IALLREDUCE)
 34:   MPIX_Iallreduce(sendbuf,recvbuf,count,datatype,op,comm,request);
 35: #else
 36:   MPI_Allreduce(sendbuf,recvbuf,count,datatype,op,comm);
 37:   *request = MPI_REQUEST_NULL;
 38: #endif
 39:   return(0);
 40: }

 42: /*
 43:    Note: the lvalues and gvalues are twice as long as maxops, this is to allow the second half of
 44: the entries to have a flag indicating if they are REDUCE_SUM, REDUCE_MAX, or REDUCE_MIN these are used by
 45: the custom reduction operation that replaces MPI_SUM, MPI_MAX, or MPI_MIN in the case when a reduction involves
 46: some of each.
 47: */

 49: static PetscErrorCode PetscSplitReductionApply(PetscSplitReduction*);

 53: /*
 54:    PetscSplitReductionCreate - Creates a data structure to contain the queued information.
 55: */
 56: static PetscErrorCode  PetscSplitReductionCreate(MPI_Comm comm,PetscSplitReduction **sr)
 57: {

 61:   PetscNew(sr);
 62:   (*sr)->numopsbegin = 0;
 63:   (*sr)->numopsend   = 0;
 64:   (*sr)->state       = STATE_BEGIN;
 65:   (*sr)->maxops      = 32;
 66:   PetscMalloc1(2*32,&(*sr)->lvalues);
 67:   PetscMalloc1(2*32,&(*sr)->gvalues);
 68:   PetscMalloc1(32,&(*sr)->invecs);
 69:   (*sr)->comm        = comm;
 70:   (*sr)->request     = MPI_REQUEST_NULL;
 71:   PetscMalloc1(32,&(*sr)->reducetype);
 72:   (*sr)->async       = PETSC_FALSE;
 73: #if defined(PETSC_HAVE_MPI_IALLREDUCE) || defined(PETSC_HAVE_MPIX_IALLREDUCE)
 74:   (*sr)->async = PETSC_TRUE;    /* Enable by default */
 75:   PetscOptionsGetBool(NULL,"-splitreduction_async",&(*sr)->async,NULL);
 76: #endif
 77:   return(0);
 78: }

 80: /*
 81:        This function is the MPI reduction operation used when there is
 82:    a combination of sums and max in the reduction. The call below to
 83:    MPI_Op_create() converts the function PetscSplitReduction_Local() to the
 84:    MPI operator PetscSplitReduction_Op.
 85: */
 86: MPI_Op PetscSplitReduction_Op = 0;

 90: PETSC_EXTERN void MPIAPI PetscSplitReduction_Local(void *in,void *out,PetscMPIInt *cnt,MPI_Datatype *datatype)
 91: {
 92:   PetscScalar *xin = (PetscScalar*)in,*xout = (PetscScalar*)out;
 93:   PetscInt    i,count = (PetscInt)*cnt;

 96:   if (*datatype != MPIU_SCALAR) {
 97:     (*PetscErrorPrintf)("Can only handle MPIU_SCALAR data types");
 98:     MPI_Abort(MPI_COMM_SELF,1);
 99:   }
100:   count = count/2;
101:   for (i=0; i<count; i++) {
102:      /* second half of xin[] is flags for reduction type */
103:     if      ((PetscInt)PetscRealPart(xin[count+i]) == REDUCE_SUM) xout[i] += xin[i];
104:     else if ((PetscInt)PetscRealPart(xin[count+i]) == REDUCE_MAX) xout[i] = PetscMax(*(PetscReal*)(xout+i),*(PetscReal*)(xin+i));
105:     else if ((PetscInt)PetscRealPart(xin[count+i]) == REDUCE_MIN) xout[i] = PetscMin(*(PetscReal*)(xout+i),*(PetscReal*)(xin+i));
106:     else {
107:       (*PetscErrorPrintf)("Reduction type input is not REDUCE_SUM, REDUCE_MAX, or REDUCE_MIN");
108:       MPI_Abort(MPI_COMM_SELF,1);
109:     }
110:   }
111:   PetscFunctionReturnVoid();
112: }

116: /*@
117:    PetscCommSplitReductionBegin - Begin an asynchronous split-mode reduction

119:    Collective but not synchronizing

121:    Input Arguments:
122:    comm - communicator on which split reduction has been queued

124:    Level: advanced

126:    Note:
127:    Calling this function is optional when using split-mode reduction. On supporting hardware, calling this after all
128:    VecXxxBegin() allows the reduction to make asynchronous progress before the result is needed (in VecXxxEnd()).

130: .seealso: VecNormBegin(), VecNormEnd(), VecDotBegin(), VecDotEnd(), VecTDotBegin(), VecTDotEnd(), VecMDotBegin(), VecMDotEnd(), VecMTDotBegin(), VecMTDotEnd()
131: @*/
132: PetscErrorCode PetscCommSplitReductionBegin(MPI_Comm comm)
133: {
134:   PetscErrorCode      ierr;
135:   PetscSplitReduction *sr;

138:   PetscSplitReductionGet(comm,&sr);
139:   if (sr->numopsend > 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Cannot call this after VecxxxEnd() has been called");
140:   if (sr->async) {              /* Bad reuse, setup code copied from PetscSplitReductionApply(). */
141:     PetscInt       i,numops = sr->numopsbegin,*reducetype = sr->reducetype;
142:     PetscScalar    *lvalues = sr->lvalues,*gvalues = sr->gvalues;
143:     PetscInt       sum_flg = 0,max_flg = 0, min_flg = 0;
144:     MPI_Comm       comm = sr->comm;
145:     PetscMPIInt    size,cmul = sizeof(PetscScalar)/sizeof(PetscReal);;
146:     PetscLogEventBegin(VEC_ReduceBegin,0,0,0,0);
147:     MPI_Comm_size(sr->comm,&size);
148:     if (size == 1) {
149:       PetscMemcpy(gvalues,lvalues,numops*sizeof(PetscScalar));
150:     } else {
151:       /* determine if all reductions are sum, max, or min */
152:       for (i=0; i<numops; i++) {
153:         if      (reducetype[i] == REDUCE_MAX) max_flg = 1;
154:         else if (reducetype[i] == REDUCE_SUM) sum_flg = 1;
155:         else if (reducetype[i] == REDUCE_MIN) min_flg = 1;
156:         else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in PetscSplitReduction() data structure, probably memory corruption");
157:       }
158:       if (sum_flg + max_flg + min_flg > 1) {
159:         /*
160:          after all the entires in lvalues we store the reducetype flags to indicate
161:          to the reduction operations what are sums and what are max
162:          */
163:         for (i=0; i<numops; i++) lvalues[numops+i] = reducetype[i];

165:         MPIPetsc_Iallreduce(lvalues,gvalues,2*numops,MPIU_SCALAR,PetscSplitReduction_Op,comm,&sr->request);
166:       } else if (max_flg) {   /* Compute max of real and imag parts separately, presumably only the real part is used */
167:         MPIPetsc_Iallreduce((PetscReal*)lvalues,(PetscReal*)gvalues,cmul*numops,MPIU_REAL,MPIU_MAX,comm,&sr->request);
168:       } else if (min_flg) {
169:         MPIPetsc_Iallreduce((PetscReal*)lvalues,(PetscReal*)gvalues,cmul*numops,MPIU_REAL,MPIU_MIN,comm,&sr->request);
170:       } else {
171:         MPIPetsc_Iallreduce(lvalues,gvalues,numops,MPIU_SCALAR,MPIU_SUM,comm,&sr->request);
172:       }
173:     }
174:     sr->state     = STATE_PENDING;
175:     sr->numopsend = 0;
176:     PetscLogEventEnd(VEC_ReduceBegin,0,0,0,0);
177:   } else {
178:     PetscSplitReductionApply(sr);
179:   }
180:   return(0);
181: }

185: PetscErrorCode PetscSplitReductionEnd(PetscSplitReduction *sr)
186: {

190:   switch (sr->state) {
191:   case STATE_BEGIN: /* We are doing synchronous communication and this is the first call to VecXxxEnd() so do the communication */
192:     PetscSplitReductionApply(sr);
193:     break;
194:   case STATE_PENDING:
195:     /* We are doing asynchronous-mode communication and this is the first VecXxxEnd() so wait for comm to complete */
196:     PetscLogEventBegin(VEC_ReduceEnd,0,0,0,0);
197:     if (sr->request != MPI_REQUEST_NULL) {
198:       MPI_Wait(&sr->request,MPI_STATUS_IGNORE);
199:     }
200:     sr->state = STATE_END;
201:     PetscLogEventEnd(VEC_ReduceEnd,0,0,0,0);
202:     break;
203:   default: break;            /* everything is already done */
204:   }
205:   return(0);
206: }

210: /*
211:    PetscSplitReductionApply - Actually do the communication required for a split phase reduction
212: */
213: static PetscErrorCode PetscSplitReductionApply(PetscSplitReduction *sr)
214: {
216:   PetscInt       i,numops = sr->numopsbegin,*reducetype = sr->reducetype;
217:   PetscScalar    *lvalues = sr->lvalues,*gvalues = sr->gvalues;
218:   PetscInt       sum_flg  = 0,max_flg = 0, min_flg = 0;
219:   MPI_Comm       comm     = sr->comm;
220:   PetscMPIInt    size,cmul = sizeof(PetscScalar)/sizeof(PetscReal);

223:   if (sr->numopsend > 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Cannot call this after VecxxxEnd() has been called");
224:   PetscLogEventBarrierBegin(VEC_ReduceBarrier,0,0,0,0,comm);
225:   MPI_Comm_size(sr->comm,&size);
226:   if (size == 1) {
227:     PetscMemcpy(gvalues,lvalues,numops*sizeof(PetscScalar));
228:   } else {
229:     /* determine if all reductions are sum, max, or min */
230:     for (i=0; i<numops; i++) {
231:       if      (reducetype[i] == REDUCE_MAX) max_flg = 1;
232:       else if (reducetype[i] == REDUCE_SUM) sum_flg = 1;
233:       else if (reducetype[i] == REDUCE_MIN) min_flg = 1;
234:       else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in PetscSplitReduction() data structure, probably memory corruption");
235:     }
236:     if (sum_flg + max_flg + min_flg > 1) {
237:       /*
238:          after all the entires in lvalues we store the reducetype flags to indicate
239:          to the reduction operations what are sums and what are max
240:       */
241:       for (i=0; i<numops; i++) lvalues[numops+i] = reducetype[i];
242:       MPI_Allreduce(lvalues,gvalues,2*numops,MPIU_SCALAR,PetscSplitReduction_Op,comm);
243:     } else if (max_flg) {     /* Compute max of real and imag parts separately, presumably only the real part is used */
244:       MPI_Allreduce((PetscReal*)lvalues,(PetscReal*)gvalues,cmul*numops,MPIU_REAL,MPIU_MAX,comm);
245:     } else if (min_flg) {
246:       MPI_Allreduce((PetscReal*)lvalues,(PetscReal*)gvalues,cmul*numops,MPIU_REAL,MPIU_MIN,comm);
247:     } else {
248:       MPI_Allreduce(lvalues,gvalues,numops,MPIU_SCALAR,MPIU_SUM,comm);
249:     }
250:   }
251:   sr->state     = STATE_END;
252:   sr->numopsend = 0;
253:   PetscLogEventBarrierEnd(VEC_ReduceBarrier,0,0,0,0,comm);
254:   return(0);
255: }

259: /*
260:    PetscSplitReductionExtend - Double the amount of space (slots) allocated for a split reduction object.
261: */
262: PetscErrorCode  PetscSplitReductionExtend(PetscSplitReduction *sr)
263: {
265:   PetscInt       maxops   = sr->maxops,*reducetype = sr->reducetype;
266:   PetscScalar    *lvalues = sr->lvalues,*gvalues = sr->gvalues;
267:   void           *invecs  = sr->invecs;

270:   sr->maxops     = 2*maxops;
271:   PetscMalloc1(2*2*maxops,&sr->lvalues);
272:   PetscMalloc1(2*2*maxops,&sr->gvalues);
273:   PetscMalloc1(2*maxops,&sr->reducetype);
274:   PetscMalloc1(2*maxops,&sr->invecs);
275:   PetscMemcpy(sr->lvalues,lvalues,maxops*sizeof(PetscScalar));
276:   PetscMemcpy(sr->gvalues,gvalues,maxops*sizeof(PetscScalar));
277:   PetscMemcpy(sr->reducetype,reducetype,maxops*sizeof(PetscInt));
278:   PetscMemcpy(sr->invecs,invecs,maxops*sizeof(void*));
279:   PetscFree(lvalues);
280:   PetscFree(gvalues);
281:   PetscFree(reducetype);
282:   PetscFree(invecs);
283:   return(0);
284: }

288: PetscErrorCode  PetscSplitReductionDestroy(PetscSplitReduction *sr)
289: {

293:   PetscFree(sr->lvalues);
294:   PetscFree(sr->gvalues);
295:   PetscFree(sr->reducetype);
296:   PetscFree(sr->invecs);
297:   PetscFree(sr);
298:   return(0);
299: }

301: static PetscMPIInt Petsc_Reduction_keyval = MPI_KEYVAL_INVALID;

305: /*
306:    Private routine to delete internal storage when a communicator is freed.
307:   This is called by MPI, not by users.

309:   The binding for the first argument changed from MPI 1.0 to 1.1; in 1.0
310:   it was MPI_Comm *comm.
311: */
312: PETSC_EXTERN int MPIAPI Petsc_DelReduction(MPI_Comm comm,int keyval,void* attr_val,void* extra_state)
313: {

317:   PetscInfo1(0,"Deleting reduction data in an MPI_Comm %ld\n",(long)comm);
318:   PetscSplitReductionDestroy((PetscSplitReduction*)attr_val);
319:   return(0);
320: }

322: /*
323:      PetscSplitReductionGet - Gets the split reduction object from a
324:         PETSc vector, creates if it does not exit.

326: */
329: PetscErrorCode PetscSplitReductionGet(MPI_Comm comm,PetscSplitReduction **sr)
330: {
332:   PetscMPIInt    flag;

335:   if (Petsc_Reduction_keyval == MPI_KEYVAL_INVALID) {
336:     /*
337:        The calling sequence of the 2nd argument to this function changed
338:        between MPI Standard 1.0 and the revisions 1.1 Here we match the
339:        new standard, if you are using an MPI implementation that uses
340:        the older version you will get a warning message about the next line;
341:        it is only a warning message and should do no harm.
342:     */
343:     MPI_Keyval_create(MPI_NULL_COPY_FN,Petsc_DelReduction,&Petsc_Reduction_keyval,0);
344:   }
345:   MPI_Attr_get(comm,Petsc_Reduction_keyval,(void**)sr,&flag);
346:   if (!flag) {  /* doesn't exist yet so create it and put it in */
347:     PetscSplitReductionCreate(comm,sr);
348:     MPI_Attr_put(comm,Petsc_Reduction_keyval,*sr);
349:     PetscInfo1(0,"Putting reduction data in an MPI_Comm %ld\n",(long)comm);
350:   }
351:   return(0);
352: }

354: /* ----------------------------------------------------------------------------------------------------*/

358: /*@
359:    VecDotBegin - Starts a split phase dot product computation.

361:    Input Parameters:
362: +   x - the first vector
363: .   y - the second vector
364: -   result - where the result will go (can be NULL)

366:    Level: advanced

368:    Notes:
369:    Each call to VecDotBegin() should be paired with a call to VecDotEnd().

371: seealso: VecDotEnd(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
372:          VecTDotBegin(), VecTDotEnd(), PetscCommSplitReductionBegin()
373: @*/
374: PetscErrorCode  VecDotBegin(Vec x,Vec y,PetscScalar *result)
375: {
376:   PetscErrorCode      ierr;
377:   PetscSplitReduction *sr;
378:   MPI_Comm            comm;

381:   PetscObjectGetComm((PetscObject)x,&comm);
382:   PetscSplitReductionGet(comm,&sr);
383:   if (sr->state != STATE_BEGIN) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Called before all VecxxxEnd() called");
384:   if (sr->numopsbegin >= sr->maxops) {
385:     PetscSplitReductionExtend(sr);
386:   }
387:   sr->reducetype[sr->numopsbegin] = REDUCE_SUM;
388:   sr->invecs[sr->numopsbegin]     = (void*)x;
389:   if (!x->ops->tdot_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Vector does not suppport local dots");
390:   PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
391:   (*x->ops->tdot_local)(x,y,sr->lvalues+sr->numopsbegin++);
392:   PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
393:   return(0);
394: }

398: /*@
399:    VecDotEnd - Ends a split phase dot product computation.

401:    Input Parameters:
402: +  x - the first vector (can be NULL)
403: .  y - the second vector (can be NULL)
404: -  result - where the result will go

406:    Level: advanced

408:    Notes:
409:    Each call to VecDotBegin() should be paired with a call to VecDotEnd().

411: .seealso: VecDotBegin(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
412:          VecTDotBegin(),VecTDotEnd(), PetscCommSplitReductionBegin()

414: @*/
415: PetscErrorCode  VecDotEnd(Vec x,Vec y,PetscScalar *result)
416: {
417:   PetscErrorCode      ierr;
418:   PetscSplitReduction *sr;
419:   MPI_Comm            comm;

422:   PetscObjectGetComm((PetscObject)x,&comm);
423:   PetscSplitReductionGet(comm,&sr);
424:   PetscSplitReductionEnd(sr);

426:   if (sr->numopsend >= sr->numopsbegin) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecxxxEnd() more times then VecxxxBegin()");
427:   if (x && (void*) x != sr->invecs[sr->numopsend]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecxxxEnd() in a different order or with a different vector than VecxxxBegin()");
428:   if (sr->reducetype[sr->numopsend] != REDUCE_SUM) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecDotEnd() on a reduction started with VecNormBegin()");
429:   *result = sr->gvalues[sr->numopsend++];

431:   /*
432:      We are finished getting all the results so reset to no outstanding requests
433:   */
434:   if (sr->numopsend == sr->numopsbegin) {
435:     sr->state       = STATE_BEGIN;
436:     sr->numopsend   = 0;
437:     sr->numopsbegin = 0;
438:   }
439:   return(0);
440: }

444: /*@
445:    VecTDotBegin - Starts a split phase transpose dot product computation.

447:    Input Parameters:
448: +  x - the first vector
449: .  y - the second vector
450: -  result - where the result will go (can be NULL)

452:    Level: advanced

454:    Notes:
455:    Each call to VecTDotBegin() should be paired with a call to VecTDotEnd().

457: .seealso: VecTDotEnd(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
458:          VecDotBegin(), VecDotEnd(), PetscCommSplitReductionBegin()

460: @*/
461: PetscErrorCode  VecTDotBegin(Vec x,Vec y,PetscScalar *result)
462: {
463:   PetscErrorCode      ierr;
464:   PetscSplitReduction *sr;
465:   MPI_Comm            comm;

468:   PetscObjectGetComm((PetscObject)x,&comm);
469:   PetscSplitReductionGet(comm,&sr);
470:   if (sr->state != STATE_BEGIN) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Called before all VecxxxEnd() called");
471:   if (sr->numopsbegin >= sr->maxops) {
472:     PetscSplitReductionExtend(sr);
473:   }
474:   sr->reducetype[sr->numopsbegin] = REDUCE_SUM;
475:   sr->invecs[sr->numopsbegin]     = (void*)x;
476:   if (!x->ops->tdot_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Vector does not suppport local dots");
477:   PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
478:   (*x->ops->dot_local)(x,y,sr->lvalues+sr->numopsbegin++);
479:   PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
480:   return(0);
481: }

485: /*@
486:    VecTDotEnd - Ends a split phase transpose dot product computation.

488:    Input Parameters:
489: +  x - the first vector (can be NULL)
490: .  y - the second vector (can be NULL)
491: -  result - where the result will go

493:    Level: advanced

495:    Notes:
496:    Each call to VecTDotBegin() should be paired with a call to VecTDotEnd().

498: seealso: VecTDotBegin(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
499:          VecDotBegin(), VecDotEnd()
500: @*/
501: PetscErrorCode  VecTDotEnd(Vec x,Vec y,PetscScalar *result)
502: {

506:   /*
507:       TDotEnd() is the same as DotEnd() so reuse the code
508:   */
509:   VecDotEnd(x,y,result);
510:   return(0);
511: }

513: /* -------------------------------------------------------------------------*/

517: /*@
518:    VecNormBegin - Starts a split phase norm computation.

520:    Input Parameters:
521: +  x - the first vector
522: .  ntype - norm type, one of NORM_1, NORM_2, NORM_MAX, NORM_1_AND_2
523: -  result - where the result will go (can be NULL)

525:    Level: advanced

527:    Notes:
528:    Each call to VecNormBegin() should be paired with a call to VecNormEnd().

530: .seealso: VecNormEnd(), VecNorm(), VecDot(), VecMDot(), VecDotBegin(), VecDotEnd(), PetscCommSplitReductionBegin()

532: @*/
533: PetscErrorCode  VecNormBegin(Vec x,NormType ntype,PetscReal *result)
534: {
535:   PetscErrorCode      ierr;
536:   PetscSplitReduction *sr;
537:   PetscReal           lresult[2];
538:   MPI_Comm            comm;

541:   PetscObjectGetComm((PetscObject)x,&comm);
542:   PetscSplitReductionGet(comm,&sr);
543:   if (sr->state != STATE_BEGIN) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Called before all VecxxxEnd() called");
544:   if (sr->numopsbegin >= sr->maxops || (sr->numopsbegin == sr->maxops-1 && ntype == NORM_1_AND_2)) {
545:     PetscSplitReductionExtend(sr);
546:   }

548:   sr->invecs[sr->numopsbegin] = (void*)x;
549:   if (!x->ops->norm_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Vector does not support local norms");
550:   PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
551:   (*x->ops->norm_local)(x,ntype,lresult);
552:   PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
553:   if (ntype == NORM_2)         lresult[0]                = lresult[0]*lresult[0];
554:   if (ntype == NORM_1_AND_2)   lresult[1]                = lresult[1]*lresult[1];
555:   if (ntype == NORM_MAX) sr->reducetype[sr->numopsbegin] = REDUCE_MAX;
556:   else                   sr->reducetype[sr->numopsbegin] = REDUCE_SUM;
557:   sr->lvalues[sr->numopsbegin++] = lresult[0];
558:   if (ntype == NORM_1_AND_2) {
559:     sr->reducetype[sr->numopsbegin] = REDUCE_SUM;
560:     sr->lvalues[sr->numopsbegin++]  = lresult[1];
561:   }
562:   return(0);
563: }

567: /*@
568:    VecNormEnd - Ends a split phase norm computation.

570:    Input Parameters:
571: +  x - the first vector (can be NULL)
572: .  ntype - norm type, one of NORM_1, NORM_2, NORM_MAX, NORM_1_AND_2
573: -  result - where the result will go

575:    Level: advanced

577:    Notes:
578:    Each call to VecNormBegin() should be paired with a call to VecNormEnd().

580: .seealso: VecNormBegin(), VecNorm(), VecDot(), VecMDot(), VecDotBegin(), VecDotEnd(), PetscCommSplitReductionBegin()

582: @*/
583: PetscErrorCode  VecNormEnd(Vec x,NormType ntype,PetscReal *result)
584: {
585:   PetscErrorCode      ierr;
586:   PetscSplitReduction *sr;
587:   MPI_Comm            comm;

590:   PetscObjectGetComm((PetscObject)x,&comm);
591:   PetscSplitReductionGet(comm,&sr);
592:   PetscSplitReductionEnd(sr);

594:   if (sr->numopsend >= sr->numopsbegin) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecxxxEnd() more times then VecxxxBegin()");
595:   if (x && (void*)x != sr->invecs[sr->numopsend]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecxxxEnd() in a different order or with a different vector than VecxxxBegin()");
596:   if (sr->reducetype[sr->numopsend] != REDUCE_MAX && ntype == NORM_MAX) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecNormEnd(,NORM_MAX,) on a reduction started with VecDotBegin() or NORM_1 or NORM_2");
597:   result[0] = PetscRealPart(sr->gvalues[sr->numopsend++]);

599:   if (ntype == NORM_2) result[0] = PetscSqrtReal(result[0]);
600:   else if (ntype == NORM_1_AND_2) {
601:     result[1] = PetscRealPart(sr->gvalues[sr->numopsend++]);
602:     result[1] = PetscSqrtReal(result[1]);
603:   }
604:   if (ntype!=NORM_1_AND_2) {
605:     PetscObjectComposedDataSetReal((PetscObject)x,NormIds[ntype],result[0]);
606:   }

608:   if (sr->numopsend == sr->numopsbegin) {
609:     sr->state       = STATE_BEGIN;
610:     sr->numopsend   = 0;
611:     sr->numopsbegin = 0;
612:   }
613:   return(0);
614: }

616: /*
617:    Possibly add

619:      PetscReductionSumBegin/End()
620:      PetscReductionMaxBegin/End()
621:      PetscReductionMinBegin/End()
622:    or have more like MPI with a single function with flag for Op? Like first better
623: */

627: /*@
628:    VecMDotBegin - Starts a split phase multiple dot product computation.

630:    Input Parameters:
631: +   x - the first vector
632: .   nv - number of vectors
633: .   y - array of vectors
634: -   result - where the result will go (can be NULL)

636:    Level: advanced

638:    Notes:
639:    Each call to VecMDotBegin() should be paired with a call to VecMDotEnd().

641: .seealso: VecMDotEnd(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
642:          VecTDotBegin(), VecTDotEnd(), VecMTDotBegin(), VecMTDotEnd(), PetscCommSplitReductionBegin()
643: @*/
644: PetscErrorCode  VecMDotBegin(Vec x,PetscInt nv,const Vec y[],PetscScalar result[])
645: {
646:   PetscErrorCode      ierr;
647:   PetscSplitReduction *sr;
648:   MPI_Comm            comm;
649:   int                 i;

652:   PetscObjectGetComm((PetscObject)x,&comm);
653:   PetscSplitReductionGet(comm,&sr);
654:   if (sr->state != STATE_BEGIN) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Called before all VecxxxEnd() called");
655:   for (i=0; i<nv; i++) {
656:     if (sr->numopsbegin+i >= sr->maxops) {
657:       PetscSplitReductionExtend(sr);
658:     }
659:     sr->reducetype[sr->numopsbegin+i] = REDUCE_SUM;
660:     sr->invecs[sr->numopsbegin+i]     = (void*)x;
661:   }
662:   if (!x->ops->mdot_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Vector does not suppport local mdots");
663:   PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
664:   (*x->ops->mdot_local)(x,nv,y,sr->lvalues+sr->numopsbegin);
665:   PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
666:   sr->numopsbegin += nv;
667:   return(0);
668: }

672: /*@
673:    VecMDotEnd - Ends a split phase multiple dot product computation.

675:    Input Parameters:
676: +   x - the first vector (can be NULL)
677: .   nv - number of vectors
678: -   y - array of vectors (can be NULL)

680:    Output Parameters:
681: .   result - where the result will go

683:    Level: advanced

685:    Notes:
686:    Each call to VecMDotBegin() should be paired with a call to VecMDotEnd().

688: .seealso: VecMDotBegin(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
689:          VecTDotBegin(),VecTDotEnd(), VecMTDotBegin(), VecMTDotEnd(), PetscCommSplitReductionBegin()

691: @*/
692: PetscErrorCode  VecMDotEnd(Vec x,PetscInt nv,const Vec y[],PetscScalar result[])
693: {
694:   PetscErrorCode      ierr;
695:   PetscSplitReduction *sr;
696:   MPI_Comm            comm;
697:   int                 i;

700:   PetscObjectGetComm((PetscObject)x,&comm);
701:   PetscSplitReductionGet(comm,&sr);
702:   PetscSplitReductionEnd(sr);

704:   if (sr->numopsend >= sr->numopsbegin) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecxxxEnd() more times then VecxxxBegin()");
705:   if (x && (void*) x != sr->invecs[sr->numopsend]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecxxxEnd() in a different order or with a different vector than VecxxxBegin()");
706:   if (sr->reducetype[sr->numopsend] != REDUCE_SUM) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecDotEnd() on a reduction started with VecNormBegin()");
707:   for (i=0;i<nv;i++) result[i] = sr->gvalues[sr->numopsend++];

709:   /*
710:      We are finished getting all the results so reset to no outstanding requests
711:   */
712:   if (sr->numopsend == sr->numopsbegin) {
713:     sr->state       = STATE_BEGIN;
714:     sr->numopsend   = 0;
715:     sr->numopsbegin = 0;
716:   }
717:   return(0);
718: }

722: /*@
723:    VecMTDotBegin - Starts a split phase transpose multiple dot product computation.

725:    Input Parameters:
726: +  x - the first vector
727: .  nv - number of vectors
728: .  y - array of  vectors
729: -  result - where the result will go (can be NULL)

731:    Level: advanced

733:    Notes:
734:    Each call to VecMTDotBegin() should be paired with a call to VecMTDotEnd().

736: .seealso: VecMTDotEnd(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
737:          VecDotBegin(), VecDotEnd(), VecMDotBegin(), VecMDotEnd(), PetscCommSplitReductionBegin()

739: @*/
740: PetscErrorCode  VecMTDotBegin(Vec x,PetscInt nv,const Vec y[],PetscScalar result[])
741: {
742:   PetscErrorCode      ierr;
743:   PetscSplitReduction *sr;
744:   MPI_Comm            comm;
745:   int                 i;

748:   PetscObjectGetComm((PetscObject)x,&comm);
749:   PetscSplitReductionGet(comm,&sr);
750:   if (sr->state != STATE_BEGIN) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Called before all VecxxxEnd() called");
751:   for (i=0; i<nv; i++) {
752:     if (sr->numopsbegin+i >= sr->maxops) {
753:       PetscSplitReductionExtend(sr);
754:     }
755:     sr->reducetype[sr->numopsbegin+i] = REDUCE_SUM;
756:     sr->invecs[sr->numopsbegin+i]     = (void*)x;
757:   }
758:   if (!x->ops->mtdot_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Vector does not suppport local mdots");
759:   PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
760:   (*x->ops->mdot_local)(x,nv,y,sr->lvalues+sr->numopsbegin);
761:   PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
762:   sr->numopsbegin += nv;
763:   return(0);
764: }

768: /*@
769:    VecMTDotEnd - Ends a split phase transpose multiple dot product computation.

771:    Input Parameters:
772: +  x - the first vector (can be NULL)
773: .  nv - number of vectors
774: -  y - array of  vectors (can be NULL)

776:    Output Parameters
777: .  result - where the result will go

779:    Level: advanced

781:    Notes:
782:    Each call to VecTDotBegin() should be paired with a call to VecTDotEnd().

784: .seealso: VecMTDotBegin(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
785:          VecDotBegin(), VecDotEnd(), VecMDotBegin(), VecMDotEnd(), PetscCommSplitReductionBegin()
786: @*/
787: PetscErrorCode  VecMTDotEnd(Vec x,PetscInt nv,const Vec y[],PetscScalar result[])
788: {

792:   /*
793:       MTDotEnd() is the same as MDotEnd() so reuse the code
794:   */
795:   VecMDotEnd(x,nv,y,result);
796:   return(0);
797: }