Actual source code: comb.c
petsc-3.14.6 2021-03-30
2: /*
3: Split phase global vector reductions with support for combining the
4: communication portion of several operations. Using MPI-1.1 support only
6: The idea for this and much of the initial code is contributed by
7: Victor Eijkhout.
9: Usage:
10: VecDotBegin(Vec,Vec,PetscScalar *);
11: VecNormBegin(Vec,NormType,PetscReal *);
12: ....
13: VecDotEnd(Vec,Vec,PetscScalar *);
14: VecNormEnd(Vec,NormType,PetscReal *);
16: Limitations:
17: - The order of the xxxEnd() functions MUST be in the same order
18: as the xxxBegin(). There is extensive error checking to try to
19: insure that the user calls the routines in the correct order
20: */
22: #include <petsc/private/vecimpl.h>
24: static PetscErrorCode MPIPetsc_Iallreduce(void *sendbuf,void *recvbuf,PetscMPIInt count,MPI_Datatype datatype,MPI_Op op,MPI_Comm comm,MPI_Request *request)
25: {
29: #if defined(PETSC_HAVE_MPI_IALLREDUCE)
30: MPI_Iallreduce(sendbuf,recvbuf,count,datatype,op,comm,request);
31: #elif defined(PETSC_HAVE_MPIX_IALLREDUCE)
32: MPIX_Iallreduce(sendbuf,recvbuf,count,datatype,op,comm,request);
33: #else
34: MPIU_Allreduce(sendbuf,recvbuf,count,datatype,op,comm);
35: *request = MPI_REQUEST_NULL;
36: #endif
37: return(0);
38: }
40: /*
41: Note: the lvalues and gvalues are twice as long as maxops, this is to allow the second half of
42: the entries to have a flag indicating if they are PETSC_SR_REDUCE_SUM, PETSC_SR_REDUCE_MAX, or PETSC_SR_REDUCE_MIN these are used by
43: the custom reduction operation that replaces MPI_SUM, MPI_MAX, or MPI_MIN in the case when a reduction involves
44: some of each.
45: */
47: static PetscErrorCode PetscSplitReductionApply(PetscSplitReduction*);
49: /*
50: PetscSplitReductionCreate - Creates a data structure to contain the queued information.
51: */
52: static PetscErrorCode PetscSplitReductionCreate(MPI_Comm comm,PetscSplitReduction **sr)
53: {
57: PetscNew(sr);
58: (*sr)->numopsbegin = 0;
59: (*sr)->numopsend = 0;
60: (*sr)->state = STATE_BEGIN;
61: #define MAXOPS 32
62: (*sr)->maxops = MAXOPS;
63: PetscMalloc4(2*MAXOPS,&(*sr)->lvalues,2*MAXOPS,&(*sr)->gvalues,MAXOPS,&(*sr)->invecs,MAXOPS,&(*sr)->reducetype);
64: #undef MAXOPS
65: (*sr)->comm = comm;
66: (*sr)->request = MPI_REQUEST_NULL;
67: (*sr)->async = PETSC_FALSE;
68: #if defined(PETSC_HAVE_MPI_IALLREDUCE) || defined(PETSC_HAVE_MPIX_IALLREDUCE)
69: (*sr)->async = PETSC_TRUE; /* Enable by default */
70: #endif
71: /* always check for option; so that tests that run on systems without support don't warn about unhandled options */
72: PetscOptionsGetBool(NULL,NULL,"-splitreduction_async",&(*sr)->async,NULL);
73: return(0);
74: }
76: /*
77: This function is the MPI reduction operation used when there is
78: a combination of sums and max in the reduction. The call below to
79: MPI_Op_create() converts the function PetscSplitReduction_Local() to the
80: MPI operator PetscSplitReduction_Op.
81: */
82: MPI_Op PetscSplitReduction_Op = 0;
84: PETSC_EXTERN void MPIAPI PetscSplitReduction_Local(void *in,void *out,PetscMPIInt *cnt,MPI_Datatype *datatype)
85: {
86: PetscScalar *xin = (PetscScalar*)in,*xout = (PetscScalar*)out;
87: PetscInt i,count = (PetscInt)*cnt;
90: if (*datatype != MPIU_SCALAR) {
91: (*PetscErrorPrintf)("Can only handle MPIU_SCALAR data types");
92: PETSCABORT(MPI_COMM_SELF,PETSC_ERR_ARG_WRONG);
93: }
94: count = count/2;
95: for (i=0; i<count; i++) {
96: /* second half of xin[] is flags for reduction type */
97: if ((PetscInt)PetscRealPart(xin[count+i]) == PETSC_SR_REDUCE_SUM) xout[i] += xin[i];
98: else if ((PetscInt)PetscRealPart(xin[count+i]) == PETSC_SR_REDUCE_MAX) xout[i] = PetscMax(*(PetscReal*)(xout+i),*(PetscReal*)(xin+i));
99: else if ((PetscInt)PetscRealPart(xin[count+i]) == PETSC_SR_REDUCE_MIN) xout[i] = PetscMin(*(PetscReal*)(xout+i),*(PetscReal*)(xin+i));
100: else {
101: (*PetscErrorPrintf)("Reduction type input is not PETSC_SR_REDUCE_SUM, PETSC_SR_REDUCE_MAX, or PETSC_SR_REDUCE_MIN");
102: PETSCABORT(MPI_COMM_SELF,PETSC_ERR_ARG_WRONG);
103: }
104: }
105: PetscFunctionReturnVoid();
106: }
108: /*@
109: PetscCommSplitReductionBegin - Begin an asynchronous split-mode reduction
111: Collective but not synchronizing
113: Input Arguments:
114: comm - communicator on which split reduction has been queued
116: Level: advanced
118: Note:
119: Calling this function is optional when using split-mode reduction. On supporting hardware, calling this after all
120: VecXxxBegin() allows the reduction to make asynchronous progress before the result is needed (in VecXxxEnd()).
122: .seealso: VecNormBegin(), VecNormEnd(), VecDotBegin(), VecDotEnd(), VecTDotBegin(), VecTDotEnd(), VecMDotBegin(), VecMDotEnd(), VecMTDotBegin(), VecMTDotEnd()
123: @*/
124: PetscErrorCode PetscCommSplitReductionBegin(MPI_Comm comm)
125: {
126: PetscErrorCode ierr;
127: PetscSplitReduction *sr;
130: PetscSplitReductionGet(comm,&sr);
131: if (sr->numopsend > 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Cannot call this after VecxxxEnd() has been called");
132: if (sr->async) { /* Bad reuse, setup code copied from PetscSplitReductionApply(). */
133: PetscInt i,numops = sr->numopsbegin,*reducetype = sr->reducetype;
134: PetscScalar *lvalues = sr->lvalues,*gvalues = sr->gvalues;
135: PetscInt sum_flg = 0,max_flg = 0, min_flg = 0;
136: MPI_Comm comm = sr->comm;
137: PetscMPIInt size,cmul = sizeof(PetscScalar)/sizeof(PetscReal);
138: PetscLogEventBegin(VEC_ReduceBegin,0,0,0,0);
139: MPI_Comm_size(sr->comm,&size);
140: if (size == 1) {
141: PetscArraycpy(gvalues,lvalues,numops);
142: } else {
143: /* determine if all reductions are sum, max, or min */
144: for (i=0; i<numops; i++) {
145: if (reducetype[i] == PETSC_SR_REDUCE_MAX) max_flg = 1;
146: else if (reducetype[i] == PETSC_SR_REDUCE_SUM) sum_flg = 1;
147: else if (reducetype[i] == PETSC_SR_REDUCE_MIN) min_flg = 1;
148: else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in PetscSplitReduction() data structure, probably memory corruption");
149: }
150: if (sum_flg + max_flg + min_flg > 1) {
151: /*
152: after all the entires in lvalues we store the reducetype flags to indicate
153: to the reduction operations what are sums and what are max
154: */
155: for (i=0; i<numops; i++) lvalues[numops+i] = reducetype[i];
157: MPIPetsc_Iallreduce(lvalues,gvalues,2*numops,MPIU_SCALAR,PetscSplitReduction_Op,comm,&sr->request);
158: } else if (max_flg) { /* Compute max of real and imag parts separately, presumably only the real part is used */
159: MPIPetsc_Iallreduce((PetscReal*)lvalues,(PetscReal*)gvalues,cmul*numops,MPIU_REAL,MPIU_MAX,comm,&sr->request);
160: } else if (min_flg) {
161: MPIPetsc_Iallreduce((PetscReal*)lvalues,(PetscReal*)gvalues,cmul*numops,MPIU_REAL,MPIU_MIN,comm,&sr->request);
162: } else {
163: MPIPetsc_Iallreduce(lvalues,gvalues,numops,MPIU_SCALAR,MPIU_SUM,comm,&sr->request);
164: }
165: }
166: sr->state = STATE_PENDING;
167: sr->numopsend = 0;
168: PetscLogEventEnd(VEC_ReduceBegin,0,0,0,0);
169: } else {
170: PetscSplitReductionApply(sr);
171: }
172: return(0);
173: }
175: PetscErrorCode PetscSplitReductionEnd(PetscSplitReduction *sr)
176: {
180: switch (sr->state) {
181: case STATE_BEGIN: /* We are doing synchronous communication and this is the first call to VecXxxEnd() so do the communication */
182: PetscSplitReductionApply(sr);
183: break;
184: case STATE_PENDING:
185: /* We are doing asynchronous-mode communication and this is the first VecXxxEnd() so wait for comm to complete */
186: PetscLogEventBegin(VEC_ReduceEnd,0,0,0,0);
187: if (sr->request != MPI_REQUEST_NULL) {
188: MPI_Wait(&sr->request,MPI_STATUS_IGNORE);
189: }
190: sr->state = STATE_END;
191: PetscLogEventEnd(VEC_ReduceEnd,0,0,0,0);
192: break;
193: default: break; /* everything is already done */
194: }
195: return(0);
196: }
198: /*
199: PetscSplitReductionApply - Actually do the communication required for a split phase reduction
200: */
201: static PetscErrorCode PetscSplitReductionApply(PetscSplitReduction *sr)
202: {
204: PetscInt i,numops = sr->numopsbegin,*reducetype = sr->reducetype;
205: PetscScalar *lvalues = sr->lvalues,*gvalues = sr->gvalues;
206: PetscInt sum_flg = 0,max_flg = 0, min_flg = 0;
207: MPI_Comm comm = sr->comm;
208: PetscMPIInt size,cmul = sizeof(PetscScalar)/sizeof(PetscReal);
211: if (sr->numopsend > 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Cannot call this after VecxxxEnd() has been called");
212: PetscLogEventBegin(VEC_ReduceCommunication,0,0,0,0);
213: MPI_Comm_size(sr->comm,&size);
214: if (size == 1) {
215: PetscArraycpy(gvalues,lvalues,numops);
216: } else {
217: /* determine if all reductions are sum, max, or min */
218: for (i=0; i<numops; i++) {
219: if (reducetype[i] == PETSC_SR_REDUCE_MAX) max_flg = 1;
220: else if (reducetype[i] == PETSC_SR_REDUCE_SUM) sum_flg = 1;
221: else if (reducetype[i] == PETSC_SR_REDUCE_MIN) min_flg = 1;
222: else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in PetscSplitReduction() data structure, probably memory corruption");
223: }
224: if (sum_flg + max_flg + min_flg > 1) {
225: /*
226: after all the entires in lvalues we store the reducetype flags to indicate
227: to the reduction operations what are sums and what are max
228: */
229: for (i=0; i<numops; i++) lvalues[numops+i] = reducetype[i];
230: MPIU_Allreduce(lvalues,gvalues,2*numops,MPIU_SCALAR,PetscSplitReduction_Op,comm);
231: } else if (max_flg) { /* Compute max of real and imag parts separately, presumably only the real part is used */
232: MPIU_Allreduce((PetscReal*)lvalues,(PetscReal*)gvalues,cmul*numops,MPIU_REAL,MPIU_MAX,comm);
233: } else if (min_flg) {
234: MPIU_Allreduce((PetscReal*)lvalues,(PetscReal*)gvalues,cmul*numops,MPIU_REAL,MPIU_MIN,comm);
235: } else {
236: MPIU_Allreduce(lvalues,gvalues,numops,MPIU_SCALAR,MPIU_SUM,comm);
237: }
238: }
239: sr->state = STATE_END;
240: sr->numopsend = 0;
241: PetscLogEventEnd(VEC_ReduceCommunication,0,0,0,0);
242: return(0);
243: }
245: /*
246: PetscSplitReductionExtend - Double the amount of space (slots) allocated for a split reduction object.
247: */
248: PetscErrorCode PetscSplitReductionExtend(PetscSplitReduction *sr)
249: {
251: PetscInt maxops = sr->maxops,*reducetype = sr->reducetype;
252: PetscScalar *lvalues = sr->lvalues,*gvalues = sr->gvalues;
253: void **invecs = sr->invecs;
256: sr->maxops = 2*maxops;
257: PetscMalloc4(2*2*maxops,&sr->lvalues,2*2*maxops,&sr->gvalues,2*maxops,&sr->reducetype,2*maxops,&sr->invecs);
258: PetscArraycpy(sr->lvalues,lvalues,maxops);
259: PetscArraycpy(sr->gvalues,gvalues,maxops);
260: PetscArraycpy(sr->reducetype,reducetype,maxops);
261: PetscArraycpy(sr->invecs,invecs,maxops);
262: PetscFree4(lvalues,gvalues,reducetype,invecs);
263: return(0);
264: }
266: PetscErrorCode PetscSplitReductionDestroy(PetscSplitReduction *sr)
267: {
271: PetscFree4(sr->lvalues,sr->gvalues,sr->reducetype,sr->invecs);
272: PetscFree(sr);
273: return(0);
274: }
276: PetscMPIInt Petsc_Reduction_keyval = MPI_KEYVAL_INVALID;
278: /*
279: Private routine to delete internal storage when a communicator is freed.
280: This is called by MPI, not by users.
282: The binding for the first argument changed from MPI 1.0 to 1.1; in 1.0
283: it was MPI_Comm *comm.
284: */
285: PETSC_EXTERN int MPIAPI Petsc_DelReduction(MPI_Comm comm,int keyval,void* attr_val,void* extra_state)
286: {
290: PetscInfo1(0,"Deleting reduction data in an MPI_Comm %ld\n",(long)comm);CHKERRMPI(ierr);
291: PetscSplitReductionDestroy((PetscSplitReduction*)attr_val);CHKERRMPI(ierr);
292: return(0);
293: }
295: /*
296: PetscSplitReductionGet - Gets the split reduction object from a
297: PETSc vector, creates if it does not exit.
299: */
300: PetscErrorCode PetscSplitReductionGet(MPI_Comm comm,PetscSplitReduction **sr)
301: {
303: PetscMPIInt flag;
306: if (Petsc_Reduction_keyval == MPI_KEYVAL_INVALID) {
307: /*
308: The calling sequence of the 2nd argument to this function changed
309: between MPI Standard 1.0 and the revisions 1.1 Here we match the
310: new standard, if you are using an MPI implementation that uses
311: the older version you will get a warning message about the next line;
312: it is only a warning message and should do no harm.
313: */
314: MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN,Petsc_DelReduction,&Petsc_Reduction_keyval,NULL);
315: }
316: MPI_Comm_get_attr(comm,Petsc_Reduction_keyval,(void**)sr,&flag);
317: if (!flag) { /* doesn't exist yet so create it and put it in */
318: PetscSplitReductionCreate(comm,sr);
319: MPI_Comm_set_attr(comm,Petsc_Reduction_keyval,*sr);
320: PetscInfo1(0,"Putting reduction data in an MPI_Comm %ld\n",(long)comm);
321: }
322: return(0);
323: }
325: /* ----------------------------------------------------------------------------------------------------*/
327: /*@
328: VecDotBegin - Starts a split phase dot product computation.
330: Input Parameters:
331: + x - the first vector
332: . y - the second vector
333: - result - where the result will go (can be NULL)
335: Level: advanced
337: Notes:
338: Each call to VecDotBegin() should be paired with a call to VecDotEnd().
340: seealso: VecDotEnd(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
341: VecTDotBegin(), VecTDotEnd(), PetscCommSplitReductionBegin()
342: @*/
343: PetscErrorCode VecDotBegin(Vec x,Vec y,PetscScalar *result)
344: {
345: PetscErrorCode ierr;
346: PetscSplitReduction *sr;
347: MPI_Comm comm;
352: PetscObjectGetComm((PetscObject)x,&comm);
353: PetscSplitReductionGet(comm,&sr);
354: if (sr->state != STATE_BEGIN) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Called before all VecxxxEnd() called");
355: if (sr->numopsbegin >= sr->maxops) {
356: PetscSplitReductionExtend(sr);
357: }
358: sr->reducetype[sr->numopsbegin] = PETSC_SR_REDUCE_SUM;
359: sr->invecs[sr->numopsbegin] = (void*)x;
360: if (!x->ops->dot_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Vector does not support local dots");
361: PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
362: (*x->ops->dot_local)(x,y,sr->lvalues+sr->numopsbegin++);
363: PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
364: return(0);
365: }
367: /*@
368: VecDotEnd - Ends a split phase dot product computation.
370: Input Parameters:
371: + x - the first vector (can be NULL)
372: . y - the second vector (can be NULL)
373: - result - where the result will go
375: Level: advanced
377: Notes:
378: Each call to VecDotBegin() should be paired with a call to VecDotEnd().
380: .seealso: VecDotBegin(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
381: VecTDotBegin(),VecTDotEnd(), PetscCommSplitReductionBegin()
383: @*/
384: PetscErrorCode VecDotEnd(Vec x,Vec y,PetscScalar *result)
385: {
386: PetscErrorCode ierr;
387: PetscSplitReduction *sr;
388: MPI_Comm comm;
391: PetscObjectGetComm((PetscObject)x,&comm);
392: PetscSplitReductionGet(comm,&sr);
393: PetscSplitReductionEnd(sr);
395: if (sr->numopsend >= sr->numopsbegin) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecxxxEnd() more times then VecxxxBegin()");
396: if (x && (void*)x != sr->invecs[sr->numopsend]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecxxxEnd() in a different order or with a different vector than VecxxxBegin()");
397: if (sr->reducetype[sr->numopsend] != PETSC_SR_REDUCE_SUM) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecDotEnd() on a reduction started with VecNormBegin()");
398: *result = sr->gvalues[sr->numopsend++];
400: /*
401: We are finished getting all the results so reset to no outstanding requests
402: */
403: if (sr->numopsend == sr->numopsbegin) {
404: sr->state = STATE_BEGIN;
405: sr->numopsend = 0;
406: sr->numopsbegin = 0;
407: }
408: return(0);
409: }
411: /*@
412: VecTDotBegin - Starts a split phase transpose dot product computation.
414: Input Parameters:
415: + x - the first vector
416: . y - the second vector
417: - result - where the result will go (can be NULL)
419: Level: advanced
421: Notes:
422: Each call to VecTDotBegin() should be paired with a call to VecTDotEnd().
424: .seealso: VecTDotEnd(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
425: VecDotBegin(), VecDotEnd(), PetscCommSplitReductionBegin()
427: @*/
428: PetscErrorCode VecTDotBegin(Vec x,Vec y,PetscScalar *result)
429: {
430: PetscErrorCode ierr;
431: PetscSplitReduction *sr;
432: MPI_Comm comm;
435: PetscObjectGetComm((PetscObject)x,&comm);
436: PetscSplitReductionGet(comm,&sr);
437: if (sr->state != STATE_BEGIN) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Called before all VecxxxEnd() called");
438: if (sr->numopsbegin >= sr->maxops) {
439: PetscSplitReductionExtend(sr);
440: }
441: sr->reducetype[sr->numopsbegin] = PETSC_SR_REDUCE_SUM;
442: sr->invecs[sr->numopsbegin] = (void*)x;
443: if (!x->ops->tdot_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Vector does not support local dots");
444: PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
445: (*x->ops->tdot_local)(x,y,sr->lvalues+sr->numopsbegin++);
446: PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
447: return(0);
448: }
450: /*@
451: VecTDotEnd - Ends a split phase transpose dot product computation.
453: Input Parameters:
454: + x - the first vector (can be NULL)
455: . y - the second vector (can be NULL)
456: - result - where the result will go
458: Level: advanced
460: Notes:
461: Each call to VecTDotBegin() should be paired with a call to VecTDotEnd().
463: seealso: VecTDotBegin(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
464: VecDotBegin(), VecDotEnd()
465: @*/
466: PetscErrorCode VecTDotEnd(Vec x,Vec y,PetscScalar *result)
467: {
471: /*
472: TDotEnd() is the same as DotEnd() so reuse the code
473: */
474: VecDotEnd(x,y,result);
475: return(0);
476: }
478: /* -------------------------------------------------------------------------*/
480: /*@
481: VecNormBegin - Starts a split phase norm computation.
483: Input Parameters:
484: + x - the first vector
485: . ntype - norm type, one of NORM_1, NORM_2, NORM_MAX, NORM_1_AND_2
486: - result - where the result will go (can be NULL)
488: Level: advanced
490: Notes:
491: Each call to VecNormBegin() should be paired with a call to VecNormEnd().
493: .seealso: VecNormEnd(), VecNorm(), VecDot(), VecMDot(), VecDotBegin(), VecDotEnd(), PetscCommSplitReductionBegin()
495: @*/
496: PetscErrorCode VecNormBegin(Vec x,NormType ntype,PetscReal *result)
497: {
498: PetscErrorCode ierr;
499: PetscSplitReduction *sr;
500: PetscReal lresult[2];
501: MPI_Comm comm;
505: PetscObjectGetComm((PetscObject)x,&comm);
506: PetscSplitReductionGet(comm,&sr);
507: if (sr->state != STATE_BEGIN) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Called before all VecxxxEnd() called");
508: if (sr->numopsbegin >= sr->maxops || (sr->numopsbegin == sr->maxops-1 && ntype == NORM_1_AND_2)) {
509: PetscSplitReductionExtend(sr);
510: }
512: sr->invecs[sr->numopsbegin] = (void*)x;
513: if (!x->ops->norm_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Vector does not support local norms");
514: PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
515: (*x->ops->norm_local)(x,ntype,lresult);
516: PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
517: if (ntype == NORM_2) lresult[0] = lresult[0]*lresult[0];
518: if (ntype == NORM_1_AND_2) lresult[1] = lresult[1]*lresult[1];
519: if (ntype == NORM_MAX) sr->reducetype[sr->numopsbegin] = PETSC_SR_REDUCE_MAX;
520: else sr->reducetype[sr->numopsbegin] = PETSC_SR_REDUCE_SUM;
521: sr->lvalues[sr->numopsbegin++] = lresult[0];
522: if (ntype == NORM_1_AND_2) {
523: sr->reducetype[sr->numopsbegin] = PETSC_SR_REDUCE_SUM;
524: sr->lvalues[sr->numopsbegin++] = lresult[1];
525: }
526: return(0);
527: }
529: /*@
530: VecNormEnd - Ends a split phase norm computation.
532: Input Parameters:
533: + x - the first vector
534: . ntype - norm type, one of NORM_1, NORM_2, NORM_MAX, NORM_1_AND_2
535: - result - where the result will go
537: Level: advanced
539: Notes:
540: Each call to VecNormBegin() should be paired with a call to VecNormEnd().
542: The x vector is not allowed to be NULL, otherwise the vector would not have its correctly cached norm value
544: .seealso: VecNormBegin(), VecNorm(), VecDot(), VecMDot(), VecDotBegin(), VecDotEnd(), PetscCommSplitReductionBegin()
546: @*/
547: PetscErrorCode VecNormEnd(Vec x,NormType ntype,PetscReal *result)
548: {
549: PetscErrorCode ierr;
550: PetscSplitReduction *sr;
551: MPI_Comm comm;
555: PetscObjectGetComm((PetscObject)x,&comm);
556: PetscSplitReductionGet(comm,&sr);
557: PetscSplitReductionEnd(sr);
559: if (sr->numopsend >= sr->numopsbegin) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecxxxEnd() more times then VecxxxBegin()");
560: if ((void*)x != sr->invecs[sr->numopsend]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecxxxEnd() in a different order or with a different vector than VecxxxBegin()");
561: if (sr->reducetype[sr->numopsend] != PETSC_SR_REDUCE_MAX && ntype == NORM_MAX) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecNormEnd(,NORM_MAX,) on a reduction started with VecDotBegin() or NORM_1 or NORM_2");
562: result[0] = PetscRealPart(sr->gvalues[sr->numopsend++]);
564: if (ntype == NORM_2) result[0] = PetscSqrtReal(result[0]);
565: else if (ntype == NORM_1_AND_2) {
566: result[1] = PetscRealPart(sr->gvalues[sr->numopsend++]);
567: result[1] = PetscSqrtReal(result[1]);
568: }
569: if (ntype!=NORM_1_AND_2) {
570: PetscObjectComposedDataSetReal((PetscObject)x,NormIds[ntype],result[0]);
571: }
573: if (sr->numopsend == sr->numopsbegin) {
574: sr->state = STATE_BEGIN;
575: sr->numopsend = 0;
576: sr->numopsbegin = 0;
577: }
578: return(0);
579: }
581: /*
582: Possibly add
584: PetscReductionSumBegin/End()
585: PetscReductionMaxBegin/End()
586: PetscReductionMinBegin/End()
587: or have more like MPI with a single function with flag for Op? Like first better
588: */
590: /*@
591: VecMDotBegin - Starts a split phase multiple dot product computation.
593: Input Parameters:
594: + x - the first vector
595: . nv - number of vectors
596: . y - array of vectors
597: - result - where the result will go (can be NULL)
599: Level: advanced
601: Notes:
602: Each call to VecMDotBegin() should be paired with a call to VecMDotEnd().
604: .seealso: VecMDotEnd(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
605: VecTDotBegin(), VecTDotEnd(), VecMTDotBegin(), VecMTDotEnd(), PetscCommSplitReductionBegin()
606: @*/
607: PetscErrorCode VecMDotBegin(Vec x,PetscInt nv,const Vec y[],PetscScalar result[])
608: {
609: PetscErrorCode ierr;
610: PetscSplitReduction *sr;
611: MPI_Comm comm;
612: int i;
615: PetscObjectGetComm((PetscObject)x,&comm);
616: PetscSplitReductionGet(comm,&sr);
617: if (sr->state != STATE_BEGIN) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Called before all VecxxxEnd() called");
618: for (i=0; i<nv; i++) {
619: if (sr->numopsbegin+i >= sr->maxops) {
620: PetscSplitReductionExtend(sr);
621: }
622: sr->reducetype[sr->numopsbegin+i] = PETSC_SR_REDUCE_SUM;
623: sr->invecs[sr->numopsbegin+i] = (void*)x;
624: }
625: if (!x->ops->mdot_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Vector does not support local mdots");
626: PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
627: (*x->ops->mdot_local)(x,nv,y,sr->lvalues+sr->numopsbegin);
628: PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
629: sr->numopsbegin += nv;
630: return(0);
631: }
633: /*@
634: VecMDotEnd - Ends a split phase multiple dot product computation.
636: Input Parameters:
637: + x - the first vector (can be NULL)
638: . nv - number of vectors
639: - y - array of vectors (can be NULL)
641: Output Parameters:
642: . result - where the result will go
644: Level: advanced
646: Notes:
647: Each call to VecMDotBegin() should be paired with a call to VecMDotEnd().
649: .seealso: VecMDotBegin(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
650: VecTDotBegin(),VecTDotEnd(), VecMTDotBegin(), VecMTDotEnd(), PetscCommSplitReductionBegin()
652: @*/
653: PetscErrorCode VecMDotEnd(Vec x,PetscInt nv,const Vec y[],PetscScalar result[])
654: {
655: PetscErrorCode ierr;
656: PetscSplitReduction *sr;
657: MPI_Comm comm;
658: int i;
661: PetscObjectGetComm((PetscObject)x,&comm);
662: PetscSplitReductionGet(comm,&sr);
663: PetscSplitReductionEnd(sr);
665: if (sr->numopsend >= sr->numopsbegin) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecxxxEnd() more times then VecxxxBegin()");
666: if (x && (void*)x != sr->invecs[sr->numopsend]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecxxxEnd() in a different order or with a different vector than VecxxxBegin()");
667: if (sr->reducetype[sr->numopsend] != PETSC_SR_REDUCE_SUM) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecDotEnd() on a reduction started with VecNormBegin()");
668: for (i=0;i<nv;i++) result[i] = sr->gvalues[sr->numopsend++];
670: /*
671: We are finished getting all the results so reset to no outstanding requests
672: */
673: if (sr->numopsend == sr->numopsbegin) {
674: sr->state = STATE_BEGIN;
675: sr->numopsend = 0;
676: sr->numopsbegin = 0;
677: }
678: return(0);
679: }
681: /*@
682: VecMTDotBegin - Starts a split phase transpose multiple dot product computation.
684: Input Parameters:
685: + x - the first vector
686: . nv - number of vectors
687: . y - array of vectors
688: - result - where the result will go (can be NULL)
690: Level: advanced
692: Notes:
693: Each call to VecMTDotBegin() should be paired with a call to VecMTDotEnd().
695: .seealso: VecMTDotEnd(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
696: VecDotBegin(), VecDotEnd(), VecMDotBegin(), VecMDotEnd(), PetscCommSplitReductionBegin()
698: @*/
699: PetscErrorCode VecMTDotBegin(Vec x,PetscInt nv,const Vec y[],PetscScalar result[])
700: {
701: PetscErrorCode ierr;
702: PetscSplitReduction *sr;
703: MPI_Comm comm;
704: int i;
707: PetscObjectGetComm((PetscObject)x,&comm);
708: PetscSplitReductionGet(comm,&sr);
709: if (sr->state != STATE_BEGIN) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Called before all VecxxxEnd() called");
710: for (i=0; i<nv; i++) {
711: if (sr->numopsbegin+i >= sr->maxops) {
712: PetscSplitReductionExtend(sr);
713: }
714: sr->reducetype[sr->numopsbegin+i] = PETSC_SR_REDUCE_SUM;
715: sr->invecs[sr->numopsbegin+i] = (void*)x;
716: }
717: if (!x->ops->mtdot_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Vector does not support local mdots");
718: PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
719: (*x->ops->mdot_local)(x,nv,y,sr->lvalues+sr->numopsbegin);
720: PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
721: sr->numopsbegin += nv;
722: return(0);
723: }
725: /*@
726: VecMTDotEnd - Ends a split phase transpose multiple dot product computation.
728: Input Parameters:
729: + x - the first vector (can be NULL)
730: . nv - number of vectors
731: - y - array of vectors (can be NULL)
733: Output Parameters:
734: . result - where the result will go
736: Level: advanced
738: Notes:
739: Each call to VecTDotBegin() should be paired with a call to VecTDotEnd().
741: .seealso: VecMTDotBegin(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
742: VecDotBegin(), VecDotEnd(), VecMDotBegin(), VecMDotEnd(), PetscCommSplitReductionBegin()
743: @*/
744: PetscErrorCode VecMTDotEnd(Vec x,PetscInt nv,const Vec y[],PetscScalar result[])
745: {
749: /*
750: MTDotEnd() is the same as MDotEnd() so reuse the code
751: */
752: VecMDotEnd(x,nv,y,result);
753: return(0);
754: }