Actual source code: comb.c
petsc-3.6.4 2016-04-12
2: /*
3: Split phase global vector reductions with support for combining the
4: communication portion of several operations. Using MPI-1.1 support only
6: The idea for this and much of the initial code is contributed by
7: Victor Eijkhout.
9: Usage:
10: VecDotBegin(Vec,Vec,PetscScalar *);
11: VecNormBegin(Vec,NormType,PetscReal *);
12: ....
13: VecDotEnd(Vec,Vec,PetscScalar *);
14: VecNormEnd(Vec,NormType,PetscReal *);
16: Limitations:
17: - The order of the xxxEnd() functions MUST be in the same order
18: as the xxxBegin(). There is extensive error checking to try to
19: insure that the user calls the routines in the correct order
20: */
22: #include <petsc/private/vecimpl.h> /*I "petscvec.h" I*/
26: static PetscErrorCode MPIPetsc_Iallreduce(void *sendbuf,void *recvbuf,PetscMPIInt count,MPI_Datatype datatype,MPI_Op op,MPI_Comm comm,MPI_Request *request)
27: {
31: #if defined(PETSC_HAVE_MPI_IALLREDUCE)
32: MPI_Iallreduce(sendbuf,recvbuf,count,datatype,op,comm,request);
33: #elif defined(PETSC_HAVE_MPIX_IALLREDUCE)
34: MPIX_Iallreduce(sendbuf,recvbuf,count,datatype,op,comm,request);
35: #else
36: MPI_Allreduce(sendbuf,recvbuf,count,datatype,op,comm);
37: *request = MPI_REQUEST_NULL;
38: #endif
39: return(0);
40: }
42: /*
43: Note: the lvalues and gvalues are twice as long as maxops, this is to allow the second half of
44: the entries to have a flag indicating if they are REDUCE_SUM, REDUCE_MAX, or REDUCE_MIN these are used by
45: the custom reduction operation that replaces MPI_SUM, MPI_MAX, or MPI_MIN in the case when a reduction involves
46: some of each.
47: */
49: static PetscErrorCode PetscSplitReductionApply(PetscSplitReduction*);
53: /*
54: PetscSplitReductionCreate - Creates a data structure to contain the queued information.
55: */
56: static PetscErrorCode PetscSplitReductionCreate(MPI_Comm comm,PetscSplitReduction **sr)
57: {
61: PetscNew(sr);
62: (*sr)->numopsbegin = 0;
63: (*sr)->numopsend = 0;
64: (*sr)->state = STATE_BEGIN;
65: (*sr)->maxops = 32;
66: PetscMalloc1(2*32,&(*sr)->lvalues);
67: PetscMalloc1(2*32,&(*sr)->gvalues);
68: PetscMalloc1(32,&(*sr)->invecs);
69: (*sr)->comm = comm;
70: (*sr)->request = MPI_REQUEST_NULL;
71: PetscMalloc1(32,&(*sr)->reducetype);
72: (*sr)->async = PETSC_FALSE;
73: #if defined(PETSC_HAVE_MPI_IALLREDUCE) || defined(PETSC_HAVE_MPIX_IALLREDUCE)
74: (*sr)->async = PETSC_TRUE; /* Enable by default */
75: PetscOptionsGetBool(NULL,"-splitreduction_async",&(*sr)->async,NULL);
76: #endif
77: return(0);
78: }
80: /*
81: This function is the MPI reduction operation used when there is
82: a combination of sums and max in the reduction. The call below to
83: MPI_Op_create() converts the function PetscSplitReduction_Local() to the
84: MPI operator PetscSplitReduction_Op.
85: */
86: MPI_Op PetscSplitReduction_Op = 0;
90: PETSC_EXTERN void MPIAPI PetscSplitReduction_Local(void *in,void *out,PetscMPIInt *cnt,MPI_Datatype *datatype)
91: {
92: PetscScalar *xin = (PetscScalar*)in,*xout = (PetscScalar*)out;
93: PetscInt i,count = (PetscInt)*cnt;
96: if (*datatype != MPIU_SCALAR) {
97: (*PetscErrorPrintf)("Can only handle MPIU_SCALAR data types");
98: MPI_Abort(MPI_COMM_SELF,1);
99: }
100: count = count/2;
101: for (i=0; i<count; i++) {
102: /* second half of xin[] is flags for reduction type */
103: if ((PetscInt)PetscRealPart(xin[count+i]) == REDUCE_SUM) xout[i] += xin[i];
104: else if ((PetscInt)PetscRealPart(xin[count+i]) == REDUCE_MAX) xout[i] = PetscMax(*(PetscReal*)(xout+i),*(PetscReal*)(xin+i));
105: else if ((PetscInt)PetscRealPart(xin[count+i]) == REDUCE_MIN) xout[i] = PetscMin(*(PetscReal*)(xout+i),*(PetscReal*)(xin+i));
106: else {
107: (*PetscErrorPrintf)("Reduction type input is not REDUCE_SUM, REDUCE_MAX, or REDUCE_MIN");
108: MPI_Abort(MPI_COMM_SELF,1);
109: }
110: }
111: PetscFunctionReturnVoid();
112: }
116: /*@
117: PetscCommSplitReductionBegin - Begin an asynchronous split-mode reduction
119: Collective but not synchronizing
121: Input Arguments:
122: comm - communicator on which split reduction has been queued
124: Level: advanced
126: Note:
127: Calling this function is optional when using split-mode reduction. On supporting hardware, calling this after all
128: VecXxxBegin() allows the reduction to make asynchronous progress before the result is needed (in VecXxxEnd()).
130: .seealso: VecNormBegin(), VecNormEnd(), VecDotBegin(), VecDotEnd(), VecTDotBegin(), VecTDotEnd(), VecMDotBegin(), VecMDotEnd(), VecMTDotBegin(), VecMTDotEnd()
131: @*/
132: PetscErrorCode PetscCommSplitReductionBegin(MPI_Comm comm)
133: {
134: PetscErrorCode ierr;
135: PetscSplitReduction *sr;
138: PetscSplitReductionGet(comm,&sr);
139: if (sr->numopsend > 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Cannot call this after VecxxxEnd() has been called");
140: if (sr->async) { /* Bad reuse, setup code copied from PetscSplitReductionApply(). */
141: PetscInt i,numops = sr->numopsbegin,*reducetype = sr->reducetype;
142: PetscScalar *lvalues = sr->lvalues,*gvalues = sr->gvalues;
143: PetscInt sum_flg = 0,max_flg = 0, min_flg = 0;
144: MPI_Comm comm = sr->comm;
145: PetscMPIInt size,cmul = sizeof(PetscScalar)/sizeof(PetscReal);;
146: PetscLogEventBegin(VEC_ReduceBegin,0,0,0,0);
147: MPI_Comm_size(sr->comm,&size);
148: if (size == 1) {
149: PetscMemcpy(gvalues,lvalues,numops*sizeof(PetscScalar));
150: } else {
151: /* determine if all reductions are sum, max, or min */
152: for (i=0; i<numops; i++) {
153: if (reducetype[i] == REDUCE_MAX) max_flg = 1;
154: else if (reducetype[i] == REDUCE_SUM) sum_flg = 1;
155: else if (reducetype[i] == REDUCE_MIN) min_flg = 1;
156: else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in PetscSplitReduction() data structure, probably memory corruption");
157: }
158: if (sum_flg + max_flg + min_flg > 1) {
159: /*
160: after all the entires in lvalues we store the reducetype flags to indicate
161: to the reduction operations what are sums and what are max
162: */
163: for (i=0; i<numops; i++) lvalues[numops+i] = reducetype[i];
165: MPIPetsc_Iallreduce(lvalues,gvalues,2*numops,MPIU_SCALAR,PetscSplitReduction_Op,comm,&sr->request);
166: } else if (max_flg) { /* Compute max of real and imag parts separately, presumably only the real part is used */
167: MPIPetsc_Iallreduce((PetscReal*)lvalues,(PetscReal*)gvalues,cmul*numops,MPIU_REAL,MPIU_MAX,comm,&sr->request);
168: } else if (min_flg) {
169: MPIPetsc_Iallreduce((PetscReal*)lvalues,(PetscReal*)gvalues,cmul*numops,MPIU_REAL,MPIU_MIN,comm,&sr->request);
170: } else {
171: MPIPetsc_Iallreduce(lvalues,gvalues,numops,MPIU_SCALAR,MPIU_SUM,comm,&sr->request);
172: }
173: }
174: sr->state = STATE_PENDING;
175: sr->numopsend = 0;
176: PetscLogEventEnd(VEC_ReduceBegin,0,0,0,0);
177: } else {
178: PetscSplitReductionApply(sr);
179: }
180: return(0);
181: }
185: PetscErrorCode PetscSplitReductionEnd(PetscSplitReduction *sr)
186: {
190: switch (sr->state) {
191: case STATE_BEGIN: /* We are doing synchronous communication and this is the first call to VecXxxEnd() so do the communication */
192: PetscSplitReductionApply(sr);
193: break;
194: case STATE_PENDING:
195: /* We are doing asynchronous-mode communication and this is the first VecXxxEnd() so wait for comm to complete */
196: PetscLogEventBegin(VEC_ReduceEnd,0,0,0,0);
197: if (sr->request != MPI_REQUEST_NULL) {
198: MPI_Wait(&sr->request,MPI_STATUS_IGNORE);
199: }
200: sr->state = STATE_END;
201: PetscLogEventEnd(VEC_ReduceEnd,0,0,0,0);
202: break;
203: default: break; /* everything is already done */
204: }
205: return(0);
206: }
210: /*
211: PetscSplitReductionApply - Actually do the communication required for a split phase reduction
212: */
213: static PetscErrorCode PetscSplitReductionApply(PetscSplitReduction *sr)
214: {
216: PetscInt i,numops = sr->numopsbegin,*reducetype = sr->reducetype;
217: PetscScalar *lvalues = sr->lvalues,*gvalues = sr->gvalues;
218: PetscInt sum_flg = 0,max_flg = 0, min_flg = 0;
219: MPI_Comm comm = sr->comm;
220: PetscMPIInt size,cmul = sizeof(PetscScalar)/sizeof(PetscReal);
223: if (sr->numopsend > 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Cannot call this after VecxxxEnd() has been called");
224: PetscLogEventBarrierBegin(VEC_ReduceBarrier,0,0,0,0,comm);
225: MPI_Comm_size(sr->comm,&size);
226: if (size == 1) {
227: PetscMemcpy(gvalues,lvalues,numops*sizeof(PetscScalar));
228: } else {
229: /* determine if all reductions are sum, max, or min */
230: for (i=0; i<numops; i++) {
231: if (reducetype[i] == REDUCE_MAX) max_flg = 1;
232: else if (reducetype[i] == REDUCE_SUM) sum_flg = 1;
233: else if (reducetype[i] == REDUCE_MIN) min_flg = 1;
234: else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in PetscSplitReduction() data structure, probably memory corruption");
235: }
236: if (sum_flg + max_flg + min_flg > 1) {
237: /*
238: after all the entires in lvalues we store the reducetype flags to indicate
239: to the reduction operations what are sums and what are max
240: */
241: for (i=0; i<numops; i++) lvalues[numops+i] = reducetype[i];
242: MPI_Allreduce(lvalues,gvalues,2*numops,MPIU_SCALAR,PetscSplitReduction_Op,comm);
243: } else if (max_flg) { /* Compute max of real and imag parts separately, presumably only the real part is used */
244: MPI_Allreduce((PetscReal*)lvalues,(PetscReal*)gvalues,cmul*numops,MPIU_REAL,MPIU_MAX,comm);
245: } else if (min_flg) {
246: MPI_Allreduce((PetscReal*)lvalues,(PetscReal*)gvalues,cmul*numops,MPIU_REAL,MPIU_MIN,comm);
247: } else {
248: MPI_Allreduce(lvalues,gvalues,numops,MPIU_SCALAR,MPIU_SUM,comm);
249: }
250: }
251: sr->state = STATE_END;
252: sr->numopsend = 0;
253: PetscLogEventBarrierEnd(VEC_ReduceBarrier,0,0,0,0,comm);
254: return(0);
255: }
259: /*
260: PetscSplitReductionExtend - Double the amount of space (slots) allocated for a split reduction object.
261: */
262: PetscErrorCode PetscSplitReductionExtend(PetscSplitReduction *sr)
263: {
265: PetscInt maxops = sr->maxops,*reducetype = sr->reducetype;
266: PetscScalar *lvalues = sr->lvalues,*gvalues = sr->gvalues;
267: void *invecs = sr->invecs;
270: sr->maxops = 2*maxops;
271: PetscMalloc1(2*2*maxops,&sr->lvalues);
272: PetscMalloc1(2*2*maxops,&sr->gvalues);
273: PetscMalloc1(2*maxops,&sr->reducetype);
274: PetscMalloc1(2*maxops,&sr->invecs);
275: PetscMemcpy(sr->lvalues,lvalues,maxops*sizeof(PetscScalar));
276: PetscMemcpy(sr->gvalues,gvalues,maxops*sizeof(PetscScalar));
277: PetscMemcpy(sr->reducetype,reducetype,maxops*sizeof(PetscInt));
278: PetscMemcpy(sr->invecs,invecs,maxops*sizeof(void*));
279: PetscFree(lvalues);
280: PetscFree(gvalues);
281: PetscFree(reducetype);
282: PetscFree(invecs);
283: return(0);
284: }
288: PetscErrorCode PetscSplitReductionDestroy(PetscSplitReduction *sr)
289: {
293: PetscFree(sr->lvalues);
294: PetscFree(sr->gvalues);
295: PetscFree(sr->reducetype);
296: PetscFree(sr->invecs);
297: PetscFree(sr);
298: return(0);
299: }
301: static PetscMPIInt Petsc_Reduction_keyval = MPI_KEYVAL_INVALID;
305: /*
306: Private routine to delete internal storage when a communicator is freed.
307: This is called by MPI, not by users.
309: The binding for the first argument changed from MPI 1.0 to 1.1; in 1.0
310: it was MPI_Comm *comm.
311: */
312: PETSC_EXTERN int MPIAPI Petsc_DelReduction(MPI_Comm comm,int keyval,void* attr_val,void* extra_state)
313: {
317: PetscInfo1(0,"Deleting reduction data in an MPI_Comm %ld\n",(long)comm);
318: PetscSplitReductionDestroy((PetscSplitReduction*)attr_val);
319: return(0);
320: }
322: /*
323: PetscSplitReductionGet - Gets the split reduction object from a
324: PETSc vector, creates if it does not exit.
326: */
329: PetscErrorCode PetscSplitReductionGet(MPI_Comm comm,PetscSplitReduction **sr)
330: {
332: PetscMPIInt flag;
335: if (Petsc_Reduction_keyval == MPI_KEYVAL_INVALID) {
336: /*
337: The calling sequence of the 2nd argument to this function changed
338: between MPI Standard 1.0 and the revisions 1.1 Here we match the
339: new standard, if you are using an MPI implementation that uses
340: the older version you will get a warning message about the next line;
341: it is only a warning message and should do no harm.
342: */
343: MPI_Keyval_create(MPI_NULL_COPY_FN,Petsc_DelReduction,&Petsc_Reduction_keyval,0);
344: }
345: MPI_Attr_get(comm,Petsc_Reduction_keyval,(void**)sr,&flag);
346: if (!flag) { /* doesn't exist yet so create it and put it in */
347: PetscSplitReductionCreate(comm,sr);
348: MPI_Attr_put(comm,Petsc_Reduction_keyval,*sr);
349: PetscInfo1(0,"Putting reduction data in an MPI_Comm %ld\n",(long)comm);
350: }
351: return(0);
352: }
354: /* ----------------------------------------------------------------------------------------------------*/
358: /*@
359: VecDotBegin - Starts a split phase dot product computation.
361: Input Parameters:
362: + x - the first vector
363: . y - the second vector
364: - result - where the result will go (can be NULL)
366: Level: advanced
368: Notes:
369: Each call to VecDotBegin() should be paired with a call to VecDotEnd().
371: seealso: VecDotEnd(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
372: VecTDotBegin(), VecTDotEnd(), PetscCommSplitReductionBegin()
373: @*/
374: PetscErrorCode VecDotBegin(Vec x,Vec y,PetscScalar *result)
375: {
376: PetscErrorCode ierr;
377: PetscSplitReduction *sr;
378: MPI_Comm comm;
381: PetscObjectGetComm((PetscObject)x,&comm);
382: PetscSplitReductionGet(comm,&sr);
383: if (sr->state != STATE_BEGIN) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Called before all VecxxxEnd() called");
384: if (sr->numopsbegin >= sr->maxops) {
385: PetscSplitReductionExtend(sr);
386: }
387: sr->reducetype[sr->numopsbegin] = REDUCE_SUM;
388: sr->invecs[sr->numopsbegin] = (void*)x;
389: if (!x->ops->tdot_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Vector does not suppport local dots");
390: PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
391: (*x->ops->tdot_local)(x,y,sr->lvalues+sr->numopsbegin++);
392: PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
393: return(0);
394: }
398: /*@
399: VecDotEnd - Ends a split phase dot product computation.
401: Input Parameters:
402: + x - the first vector (can be NULL)
403: . y - the second vector (can be NULL)
404: - result - where the result will go
406: Level: advanced
408: Notes:
409: Each call to VecDotBegin() should be paired with a call to VecDotEnd().
411: .seealso: VecDotBegin(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
412: VecTDotBegin(),VecTDotEnd(), PetscCommSplitReductionBegin()
414: @*/
415: PetscErrorCode VecDotEnd(Vec x,Vec y,PetscScalar *result)
416: {
417: PetscErrorCode ierr;
418: PetscSplitReduction *sr;
419: MPI_Comm comm;
422: PetscObjectGetComm((PetscObject)x,&comm);
423: PetscSplitReductionGet(comm,&sr);
424: PetscSplitReductionEnd(sr);
426: if (sr->numopsend >= sr->numopsbegin) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecxxxEnd() more times then VecxxxBegin()");
427: if (x && (void*) x != sr->invecs[sr->numopsend]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecxxxEnd() in a different order or with a different vector than VecxxxBegin()");
428: if (sr->reducetype[sr->numopsend] != REDUCE_SUM) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecDotEnd() on a reduction started with VecNormBegin()");
429: *result = sr->gvalues[sr->numopsend++];
431: /*
432: We are finished getting all the results so reset to no outstanding requests
433: */
434: if (sr->numopsend == sr->numopsbegin) {
435: sr->state = STATE_BEGIN;
436: sr->numopsend = 0;
437: sr->numopsbegin = 0;
438: }
439: return(0);
440: }
444: /*@
445: VecTDotBegin - Starts a split phase transpose dot product computation.
447: Input Parameters:
448: + x - the first vector
449: . y - the second vector
450: - result - where the result will go (can be NULL)
452: Level: advanced
454: Notes:
455: Each call to VecTDotBegin() should be paired with a call to VecTDotEnd().
457: .seealso: VecTDotEnd(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
458: VecDotBegin(), VecDotEnd(), PetscCommSplitReductionBegin()
460: @*/
461: PetscErrorCode VecTDotBegin(Vec x,Vec y,PetscScalar *result)
462: {
463: PetscErrorCode ierr;
464: PetscSplitReduction *sr;
465: MPI_Comm comm;
468: PetscObjectGetComm((PetscObject)x,&comm);
469: PetscSplitReductionGet(comm,&sr);
470: if (sr->state != STATE_BEGIN) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Called before all VecxxxEnd() called");
471: if (sr->numopsbegin >= sr->maxops) {
472: PetscSplitReductionExtend(sr);
473: }
474: sr->reducetype[sr->numopsbegin] = REDUCE_SUM;
475: sr->invecs[sr->numopsbegin] = (void*)x;
476: if (!x->ops->tdot_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Vector does not suppport local dots");
477: PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
478: (*x->ops->dot_local)(x,y,sr->lvalues+sr->numopsbegin++);
479: PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
480: return(0);
481: }
485: /*@
486: VecTDotEnd - Ends a split phase transpose dot product computation.
488: Input Parameters:
489: + x - the first vector (can be NULL)
490: . y - the second vector (can be NULL)
491: - result - where the result will go
493: Level: advanced
495: Notes:
496: Each call to VecTDotBegin() should be paired with a call to VecTDotEnd().
498: seealso: VecTDotBegin(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
499: VecDotBegin(), VecDotEnd()
500: @*/
501: PetscErrorCode VecTDotEnd(Vec x,Vec y,PetscScalar *result)
502: {
506: /*
507: TDotEnd() is the same as DotEnd() so reuse the code
508: */
509: VecDotEnd(x,y,result);
510: return(0);
511: }
513: /* -------------------------------------------------------------------------*/
517: /*@
518: VecNormBegin - Starts a split phase norm computation.
520: Input Parameters:
521: + x - the first vector
522: . ntype - norm type, one of NORM_1, NORM_2, NORM_MAX, NORM_1_AND_2
523: - result - where the result will go (can be NULL)
525: Level: advanced
527: Notes:
528: Each call to VecNormBegin() should be paired with a call to VecNormEnd().
530: .seealso: VecNormEnd(), VecNorm(), VecDot(), VecMDot(), VecDotBegin(), VecDotEnd(), PetscCommSplitReductionBegin()
532: @*/
533: PetscErrorCode VecNormBegin(Vec x,NormType ntype,PetscReal *result)
534: {
535: PetscErrorCode ierr;
536: PetscSplitReduction *sr;
537: PetscReal lresult[2];
538: MPI_Comm comm;
541: PetscObjectGetComm((PetscObject)x,&comm);
542: PetscSplitReductionGet(comm,&sr);
543: if (sr->state != STATE_BEGIN) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Called before all VecxxxEnd() called");
544: if (sr->numopsbegin >= sr->maxops || (sr->numopsbegin == sr->maxops-1 && ntype == NORM_1_AND_2)) {
545: PetscSplitReductionExtend(sr);
546: }
548: sr->invecs[sr->numopsbegin] = (void*)x;
549: if (!x->ops->norm_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Vector does not support local norms");
550: PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
551: (*x->ops->norm_local)(x,ntype,lresult);
552: PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
553: if (ntype == NORM_2) lresult[0] = lresult[0]*lresult[0];
554: if (ntype == NORM_1_AND_2) lresult[1] = lresult[1]*lresult[1];
555: if (ntype == NORM_MAX) sr->reducetype[sr->numopsbegin] = REDUCE_MAX;
556: else sr->reducetype[sr->numopsbegin] = REDUCE_SUM;
557: sr->lvalues[sr->numopsbegin++] = lresult[0];
558: if (ntype == NORM_1_AND_2) {
559: sr->reducetype[sr->numopsbegin] = REDUCE_SUM;
560: sr->lvalues[sr->numopsbegin++] = lresult[1];
561: }
562: return(0);
563: }
567: /*@
568: VecNormEnd - Ends a split phase norm computation.
570: Input Parameters:
571: + x - the first vector (can be NULL)
572: . ntype - norm type, one of NORM_1, NORM_2, NORM_MAX, NORM_1_AND_2
573: - result - where the result will go
575: Level: advanced
577: Notes:
578: Each call to VecNormBegin() should be paired with a call to VecNormEnd().
580: .seealso: VecNormBegin(), VecNorm(), VecDot(), VecMDot(), VecDotBegin(), VecDotEnd(), PetscCommSplitReductionBegin()
582: @*/
583: PetscErrorCode VecNormEnd(Vec x,NormType ntype,PetscReal *result)
584: {
585: PetscErrorCode ierr;
586: PetscSplitReduction *sr;
587: MPI_Comm comm;
590: PetscObjectGetComm((PetscObject)x,&comm);
591: PetscSplitReductionGet(comm,&sr);
592: PetscSplitReductionEnd(sr);
594: if (sr->numopsend >= sr->numopsbegin) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecxxxEnd() more times then VecxxxBegin()");
595: if (x && (void*)x != sr->invecs[sr->numopsend]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecxxxEnd() in a different order or with a different vector than VecxxxBegin()");
596: if (sr->reducetype[sr->numopsend] != REDUCE_MAX && ntype == NORM_MAX) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecNormEnd(,NORM_MAX,) on a reduction started with VecDotBegin() or NORM_1 or NORM_2");
597: result[0] = PetscRealPart(sr->gvalues[sr->numopsend++]);
599: if (ntype == NORM_2) result[0] = PetscSqrtReal(result[0]);
600: else if (ntype == NORM_1_AND_2) {
601: result[1] = PetscRealPart(sr->gvalues[sr->numopsend++]);
602: result[1] = PetscSqrtReal(result[1]);
603: }
604: if (ntype!=NORM_1_AND_2) {
605: PetscObjectComposedDataSetReal((PetscObject)x,NormIds[ntype],result[0]);
606: }
608: if (sr->numopsend == sr->numopsbegin) {
609: sr->state = STATE_BEGIN;
610: sr->numopsend = 0;
611: sr->numopsbegin = 0;
612: }
613: return(0);
614: }
616: /*
617: Possibly add
619: PetscReductionSumBegin/End()
620: PetscReductionMaxBegin/End()
621: PetscReductionMinBegin/End()
622: or have more like MPI with a single function with flag for Op? Like first better
623: */
627: /*@
628: VecMDotBegin - Starts a split phase multiple dot product computation.
630: Input Parameters:
631: + x - the first vector
632: . nv - number of vectors
633: . y - array of vectors
634: - result - where the result will go (can be NULL)
636: Level: advanced
638: Notes:
639: Each call to VecMDotBegin() should be paired with a call to VecMDotEnd().
641: .seealso: VecMDotEnd(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
642: VecTDotBegin(), VecTDotEnd(), VecMTDotBegin(), VecMTDotEnd(), PetscCommSplitReductionBegin()
643: @*/
644: PetscErrorCode VecMDotBegin(Vec x,PetscInt nv,const Vec y[],PetscScalar result[])
645: {
646: PetscErrorCode ierr;
647: PetscSplitReduction *sr;
648: MPI_Comm comm;
649: int i;
652: PetscObjectGetComm((PetscObject)x,&comm);
653: PetscSplitReductionGet(comm,&sr);
654: if (sr->state != STATE_BEGIN) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Called before all VecxxxEnd() called");
655: for (i=0; i<nv; i++) {
656: if (sr->numopsbegin+i >= sr->maxops) {
657: PetscSplitReductionExtend(sr);
658: }
659: sr->reducetype[sr->numopsbegin+i] = REDUCE_SUM;
660: sr->invecs[sr->numopsbegin+i] = (void*)x;
661: }
662: if (!x->ops->mdot_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Vector does not suppport local mdots");
663: PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
664: (*x->ops->mdot_local)(x,nv,y,sr->lvalues+sr->numopsbegin);
665: PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
666: sr->numopsbegin += nv;
667: return(0);
668: }
672: /*@
673: VecMDotEnd - Ends a split phase multiple dot product computation.
675: Input Parameters:
676: + x - the first vector (can be NULL)
677: . nv - number of vectors
678: - y - array of vectors (can be NULL)
680: Output Parameters:
681: . result - where the result will go
683: Level: advanced
685: Notes:
686: Each call to VecMDotBegin() should be paired with a call to VecMDotEnd().
688: .seealso: VecMDotBegin(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
689: VecTDotBegin(),VecTDotEnd(), VecMTDotBegin(), VecMTDotEnd(), PetscCommSplitReductionBegin()
691: @*/
692: PetscErrorCode VecMDotEnd(Vec x,PetscInt nv,const Vec y[],PetscScalar result[])
693: {
694: PetscErrorCode ierr;
695: PetscSplitReduction *sr;
696: MPI_Comm comm;
697: int i;
700: PetscObjectGetComm((PetscObject)x,&comm);
701: PetscSplitReductionGet(comm,&sr);
702: PetscSplitReductionEnd(sr);
704: if (sr->numopsend >= sr->numopsbegin) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecxxxEnd() more times then VecxxxBegin()");
705: if (x && (void*) x != sr->invecs[sr->numopsend]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecxxxEnd() in a different order or with a different vector than VecxxxBegin()");
706: if (sr->reducetype[sr->numopsend] != REDUCE_SUM) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecDotEnd() on a reduction started with VecNormBegin()");
707: for (i=0;i<nv;i++) result[i] = sr->gvalues[sr->numopsend++];
709: /*
710: We are finished getting all the results so reset to no outstanding requests
711: */
712: if (sr->numopsend == sr->numopsbegin) {
713: sr->state = STATE_BEGIN;
714: sr->numopsend = 0;
715: sr->numopsbegin = 0;
716: }
717: return(0);
718: }
722: /*@
723: VecMTDotBegin - Starts a split phase transpose multiple dot product computation.
725: Input Parameters:
726: + x - the first vector
727: . nv - number of vectors
728: . y - array of vectors
729: - result - where the result will go (can be NULL)
731: Level: advanced
733: Notes:
734: Each call to VecMTDotBegin() should be paired with a call to VecMTDotEnd().
736: .seealso: VecMTDotEnd(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
737: VecDotBegin(), VecDotEnd(), VecMDotBegin(), VecMDotEnd(), PetscCommSplitReductionBegin()
739: @*/
740: PetscErrorCode VecMTDotBegin(Vec x,PetscInt nv,const Vec y[],PetscScalar result[])
741: {
742: PetscErrorCode ierr;
743: PetscSplitReduction *sr;
744: MPI_Comm comm;
745: int i;
748: PetscObjectGetComm((PetscObject)x,&comm);
749: PetscSplitReductionGet(comm,&sr);
750: if (sr->state != STATE_BEGIN) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Called before all VecxxxEnd() called");
751: for (i=0; i<nv; i++) {
752: if (sr->numopsbegin+i >= sr->maxops) {
753: PetscSplitReductionExtend(sr);
754: }
755: sr->reducetype[sr->numopsbegin+i] = REDUCE_SUM;
756: sr->invecs[sr->numopsbegin+i] = (void*)x;
757: }
758: if (!x->ops->mtdot_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Vector does not suppport local mdots");
759: PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
760: (*x->ops->mdot_local)(x,nv,y,sr->lvalues+sr->numopsbegin);
761: PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
762: sr->numopsbegin += nv;
763: return(0);
764: }
768: /*@
769: VecMTDotEnd - Ends a split phase transpose multiple dot product computation.
771: Input Parameters:
772: + x - the first vector (can be NULL)
773: . nv - number of vectors
774: - y - array of vectors (can be NULL)
776: Output Parameters
777: . result - where the result will go
779: Level: advanced
781: Notes:
782: Each call to VecTDotBegin() should be paired with a call to VecTDotEnd().
784: .seealso: VecMTDotBegin(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
785: VecDotBegin(), VecDotEnd(), VecMDotBegin(), VecMDotEnd(), PetscCommSplitReductionBegin()
786: @*/
787: PetscErrorCode VecMTDotEnd(Vec x,PetscInt nv,const Vec y[],PetscScalar result[])
788: {
792: /*
793: MTDotEnd() is the same as MDotEnd() so reuse the code
794: */
795: VecMDotEnd(x,nv,y,result);
796: return(0);
797: }