Actual source code: comb.c
petsc-3.8.4 2018-03-24
2: /*
3: Split phase global vector reductions with support for combining the
4: communication portion of several operations. Using MPI-1.1 support only
6: The idea for this and much of the initial code is contributed by
7: Victor Eijkhout.
9: Usage:
10: VecDotBegin(Vec,Vec,PetscScalar *);
11: VecNormBegin(Vec,NormType,PetscReal *);
12: ....
13: VecDotEnd(Vec,Vec,PetscScalar *);
14: VecNormEnd(Vec,NormType,PetscReal *);
16: Limitations:
17: - The order of the xxxEnd() functions MUST be in the same order
18: as the xxxBegin(). There is extensive error checking to try to
19: insure that the user calls the routines in the correct order
20: */
22: #include <petsc/private/vecimpl.h>
24: static PetscErrorCode MPIPetsc_Iallreduce(void *sendbuf,void *recvbuf,PetscMPIInt count,MPI_Datatype datatype,MPI_Op op,MPI_Comm comm,MPI_Request *request)
25: {
29: #if defined(PETSC_HAVE_MPI_IALLREDUCE)
30: MPI_Iallreduce(sendbuf,recvbuf,count,datatype,op,comm,request);
31: #elif defined(PETSC_HAVE_MPIX_IALLREDUCE)
32: MPIX_Iallreduce(sendbuf,recvbuf,count,datatype,op,comm,request);
33: #else
34: MPIU_Allreduce(sendbuf,recvbuf,count,datatype,op,comm);
35: *request = MPI_REQUEST_NULL;
36: #endif
37: return(0);
38: }
40: /*
41: Note: the lvalues and gvalues are twice as long as maxops, this is to allow the second half of
42: the entries to have a flag indicating if they are REDUCE_SUM, REDUCE_MAX, or REDUCE_MIN these are used by
43: the custom reduction operation that replaces MPI_SUM, MPI_MAX, or MPI_MIN in the case when a reduction involves
44: some of each.
45: */
47: static PetscErrorCode PetscSplitReductionApply(PetscSplitReduction*);
49: /*
50: PetscSplitReductionCreate - Creates a data structure to contain the queued information.
51: */
52: static PetscErrorCode PetscSplitReductionCreate(MPI_Comm comm,PetscSplitReduction **sr)
53: {
57: PetscNew(sr);
58: (*sr)->numopsbegin = 0;
59: (*sr)->numopsend = 0;
60: (*sr)->state = STATE_BEGIN;
61: (*sr)->maxops = 32;
62: PetscMalloc1(2*32,&(*sr)->lvalues);
63: PetscMalloc1(2*32,&(*sr)->gvalues);
64: PetscMalloc1(32,&(*sr)->invecs);
65: (*sr)->comm = comm;
66: (*sr)->request = MPI_REQUEST_NULL;
67: PetscMalloc1(32,&(*sr)->reducetype);
68: (*sr)->async = PETSC_FALSE;
69: #if defined(PETSC_HAVE_MPI_IALLREDUCE) || defined(PETSC_HAVE_MPIX_IALLREDUCE)
70: (*sr)->async = PETSC_TRUE; /* Enable by default */
71: #endif
72: /* always check for option; so that tests that run on systems without support don't warn about unhandled options */
73: PetscOptionsGetBool(NULL,NULL,"-splitreduction_async",&(*sr)->async,NULL);
74: return(0);
75: }
77: /*
78: This function is the MPI reduction operation used when there is
79: a combination of sums and max in the reduction. The call below to
80: MPI_Op_create() converts the function PetscSplitReduction_Local() to the
81: MPI operator PetscSplitReduction_Op.
82: */
83: MPI_Op PetscSplitReduction_Op = 0;
85: PETSC_EXTERN void MPIAPI PetscSplitReduction_Local(void *in,void *out,PetscMPIInt *cnt,MPI_Datatype *datatype)
86: {
87: PetscScalar *xin = (PetscScalar*)in,*xout = (PetscScalar*)out;
88: PetscInt i,count = (PetscInt)*cnt;
91: if (*datatype != MPIU_SCALAR) {
92: (*PetscErrorPrintf)("Can only handle MPIU_SCALAR data types");
93: MPI_Abort(MPI_COMM_SELF,1);
94: }
95: count = count/2;
96: for (i=0; i<count; i++) {
97: /* second half of xin[] is flags for reduction type */
98: if ((PetscInt)PetscRealPart(xin[count+i]) == REDUCE_SUM) xout[i] += xin[i];
99: else if ((PetscInt)PetscRealPart(xin[count+i]) == REDUCE_MAX) xout[i] = PetscMax(*(PetscReal*)(xout+i),*(PetscReal*)(xin+i));
100: else if ((PetscInt)PetscRealPart(xin[count+i]) == REDUCE_MIN) xout[i] = PetscMin(*(PetscReal*)(xout+i),*(PetscReal*)(xin+i));
101: else {
102: (*PetscErrorPrintf)("Reduction type input is not REDUCE_SUM, REDUCE_MAX, or REDUCE_MIN");
103: MPI_Abort(MPI_COMM_SELF,1);
104: }
105: }
106: PetscFunctionReturnVoid();
107: }
109: /*@
110: PetscCommSplitReductionBegin - Begin an asynchronous split-mode reduction
112: Collective but not synchronizing
114: Input Arguments:
115: comm - communicator on which split reduction has been queued
117: Level: advanced
119: Note:
120: Calling this function is optional when using split-mode reduction. On supporting hardware, calling this after all
121: VecXxxBegin() allows the reduction to make asynchronous progress before the result is needed (in VecXxxEnd()).
123: .seealso: VecNormBegin(), VecNormEnd(), VecDotBegin(), VecDotEnd(), VecTDotBegin(), VecTDotEnd(), VecMDotBegin(), VecMDotEnd(), VecMTDotBegin(), VecMTDotEnd()
124: @*/
125: PetscErrorCode PetscCommSplitReductionBegin(MPI_Comm comm)
126: {
127: PetscErrorCode ierr;
128: PetscSplitReduction *sr;
131: PetscSplitReductionGet(comm,&sr);
132: if (sr->numopsend > 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Cannot call this after VecxxxEnd() has been called");
133: if (sr->async) { /* Bad reuse, setup code copied from PetscSplitReductionApply(). */
134: PetscInt i,numops = sr->numopsbegin,*reducetype = sr->reducetype;
135: PetscScalar *lvalues = sr->lvalues,*gvalues = sr->gvalues;
136: PetscInt sum_flg = 0,max_flg = 0, min_flg = 0;
137: MPI_Comm comm = sr->comm;
138: PetscMPIInt size,cmul = sizeof(PetscScalar)/sizeof(PetscReal);;
139: PetscLogEventBegin(VEC_ReduceBegin,0,0,0,0);
140: MPI_Comm_size(sr->comm,&size);
141: if (size == 1) {
142: PetscMemcpy(gvalues,lvalues,numops*sizeof(PetscScalar));
143: } else {
144: /* determine if all reductions are sum, max, or min */
145: for (i=0; i<numops; i++) {
146: if (reducetype[i] == REDUCE_MAX) max_flg = 1;
147: else if (reducetype[i] == REDUCE_SUM) sum_flg = 1;
148: else if (reducetype[i] == REDUCE_MIN) min_flg = 1;
149: else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in PetscSplitReduction() data structure, probably memory corruption");
150: }
151: if (sum_flg + max_flg + min_flg > 1) {
152: /*
153: after all the entires in lvalues we store the reducetype flags to indicate
154: to the reduction operations what are sums and what are max
155: */
156: for (i=0; i<numops; i++) lvalues[numops+i] = reducetype[i];
158: MPIPetsc_Iallreduce(lvalues,gvalues,2*numops,MPIU_SCALAR,PetscSplitReduction_Op,comm,&sr->request);
159: } else if (max_flg) { /* Compute max of real and imag parts separately, presumably only the real part is used */
160: MPIPetsc_Iallreduce((PetscReal*)lvalues,(PetscReal*)gvalues,cmul*numops,MPIU_REAL,MPIU_MAX,comm,&sr->request);
161: } else if (min_flg) {
162: MPIPetsc_Iallreduce((PetscReal*)lvalues,(PetscReal*)gvalues,cmul*numops,MPIU_REAL,MPIU_MIN,comm,&sr->request);
163: } else {
164: MPIPetsc_Iallreduce(lvalues,gvalues,numops,MPIU_SCALAR,MPIU_SUM,comm,&sr->request);
165: }
166: }
167: sr->state = STATE_PENDING;
168: sr->numopsend = 0;
169: PetscLogEventEnd(VEC_ReduceBegin,0,0,0,0);
170: } else {
171: PetscSplitReductionApply(sr);
172: }
173: return(0);
174: }
176: PetscErrorCode PetscSplitReductionEnd(PetscSplitReduction *sr)
177: {
181: switch (sr->state) {
182: case STATE_BEGIN: /* We are doing synchronous communication and this is the first call to VecXxxEnd() so do the communication */
183: PetscSplitReductionApply(sr);
184: break;
185: case STATE_PENDING:
186: /* We are doing asynchronous-mode communication and this is the first VecXxxEnd() so wait for comm to complete */
187: PetscLogEventBegin(VEC_ReduceEnd,0,0,0,0);
188: if (sr->request != MPI_REQUEST_NULL) {
189: MPI_Wait(&sr->request,MPI_STATUS_IGNORE);
190: }
191: sr->state = STATE_END;
192: PetscLogEventEnd(VEC_ReduceEnd,0,0,0,0);
193: break;
194: default: break; /* everything is already done */
195: }
196: return(0);
197: }
199: /*
200: PetscSplitReductionApply - Actually do the communication required for a split phase reduction
201: */
202: static PetscErrorCode PetscSplitReductionApply(PetscSplitReduction *sr)
203: {
205: PetscInt i,numops = sr->numopsbegin,*reducetype = sr->reducetype;
206: PetscScalar *lvalues = sr->lvalues,*gvalues = sr->gvalues;
207: PetscInt sum_flg = 0,max_flg = 0, min_flg = 0;
208: MPI_Comm comm = sr->comm;
209: PetscMPIInt size,cmul = sizeof(PetscScalar)/sizeof(PetscReal);
212: if (sr->numopsend > 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Cannot call this after VecxxxEnd() has been called");
213: PetscLogEventBarrierBegin(VEC_ReduceBarrier,0,0,0,0,comm);
214: MPI_Comm_size(sr->comm,&size);
215: if (size == 1) {
216: PetscMemcpy(gvalues,lvalues,numops*sizeof(PetscScalar));
217: } else {
218: /* determine if all reductions are sum, max, or min */
219: for (i=0; i<numops; i++) {
220: if (reducetype[i] == REDUCE_MAX) max_flg = 1;
221: else if (reducetype[i] == REDUCE_SUM) sum_flg = 1;
222: else if (reducetype[i] == REDUCE_MIN) min_flg = 1;
223: else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in PetscSplitReduction() data structure, probably memory corruption");
224: }
225: if (sum_flg + max_flg + min_flg > 1) {
226: /*
227: after all the entires in lvalues we store the reducetype flags to indicate
228: to the reduction operations what are sums and what are max
229: */
230: for (i=0; i<numops; i++) lvalues[numops+i] = reducetype[i];
231: MPIU_Allreduce(lvalues,gvalues,2*numops,MPIU_SCALAR,PetscSplitReduction_Op,comm);
232: } else if (max_flg) { /* Compute max of real and imag parts separately, presumably only the real part is used */
233: MPIU_Allreduce((PetscReal*)lvalues,(PetscReal*)gvalues,cmul*numops,MPIU_REAL,MPIU_MAX,comm);
234: } else if (min_flg) {
235: MPIU_Allreduce((PetscReal*)lvalues,(PetscReal*)gvalues,cmul*numops,MPIU_REAL,MPIU_MIN,comm);
236: } else {
237: MPIU_Allreduce(lvalues,gvalues,numops,MPIU_SCALAR,MPIU_SUM,comm);
238: }
239: }
240: sr->state = STATE_END;
241: sr->numopsend = 0;
242: PetscLogEventBarrierEnd(VEC_ReduceBarrier,0,0,0,0,comm);
243: return(0);
244: }
246: /*
247: PetscSplitReductionExtend - Double the amount of space (slots) allocated for a split reduction object.
248: */
249: PetscErrorCode PetscSplitReductionExtend(PetscSplitReduction *sr)
250: {
252: PetscInt maxops = sr->maxops,*reducetype = sr->reducetype;
253: PetscScalar *lvalues = sr->lvalues,*gvalues = sr->gvalues;
254: void *invecs = sr->invecs;
257: sr->maxops = 2*maxops;
258: PetscMalloc1(2*2*maxops,&sr->lvalues);
259: PetscMalloc1(2*2*maxops,&sr->gvalues);
260: PetscMalloc1(2*maxops,&sr->reducetype);
261: PetscMalloc1(2*maxops,&sr->invecs);
262: PetscMemcpy(sr->lvalues,lvalues,maxops*sizeof(PetscScalar));
263: PetscMemcpy(sr->gvalues,gvalues,maxops*sizeof(PetscScalar));
264: PetscMemcpy(sr->reducetype,reducetype,maxops*sizeof(PetscInt));
265: PetscMemcpy(sr->invecs,invecs,maxops*sizeof(void*));
266: PetscFree(lvalues);
267: PetscFree(gvalues);
268: PetscFree(reducetype);
269: PetscFree(invecs);
270: return(0);
271: }
273: PetscErrorCode PetscSplitReductionDestroy(PetscSplitReduction *sr)
274: {
278: PetscFree(sr->lvalues);
279: PetscFree(sr->gvalues);
280: PetscFree(sr->reducetype);
281: PetscFree(sr->invecs);
282: PetscFree(sr);
283: return(0);
284: }
286: PetscMPIInt Petsc_Reduction_keyval = MPI_KEYVAL_INVALID;
288: /*
289: Private routine to delete internal storage when a communicator is freed.
290: This is called by MPI, not by users.
292: The binding for the first argument changed from MPI 1.0 to 1.1; in 1.0
293: it was MPI_Comm *comm.
294: */
295: PETSC_EXTERN int MPIAPI Petsc_DelReduction(MPI_Comm comm,int keyval,void* attr_val,void* extra_state)
296: {
300: PetscInfo1(0,"Deleting reduction data in an MPI_Comm %ld\n",(long)comm);
301: PetscSplitReductionDestroy((PetscSplitReduction*)attr_val);
302: return(0);
303: }
305: /*
306: PetscSplitReductionGet - Gets the split reduction object from a
307: PETSc vector, creates if it does not exit.
309: */
310: PetscErrorCode PetscSplitReductionGet(MPI_Comm comm,PetscSplitReduction **sr)
311: {
313: PetscMPIInt flag;
316: if (Petsc_Reduction_keyval == MPI_KEYVAL_INVALID) {
317: /*
318: The calling sequence of the 2nd argument to this function changed
319: between MPI Standard 1.0 and the revisions 1.1 Here we match the
320: new standard, if you are using an MPI implementation that uses
321: the older version you will get a warning message about the next line;
322: it is only a warning message and should do no harm.
323: */
324: MPI_Keyval_create(MPI_NULL_COPY_FN,Petsc_DelReduction,&Petsc_Reduction_keyval,0);
325: }
326: MPI_Attr_get(comm,Petsc_Reduction_keyval,(void**)sr,&flag);
327: if (!flag) { /* doesn't exist yet so create it and put it in */
328: PetscSplitReductionCreate(comm,sr);
329: MPI_Attr_put(comm,Petsc_Reduction_keyval,*sr);
330: PetscInfo1(0,"Putting reduction data in an MPI_Comm %ld\n",(long)comm);
331: }
332: return(0);
333: }
335: /* ----------------------------------------------------------------------------------------------------*/
337: /*@
338: VecDotBegin - Starts a split phase dot product computation.
340: Input Parameters:
341: + x - the first vector
342: . y - the second vector
343: - result - where the result will go (can be NULL)
345: Level: advanced
347: Notes:
348: Each call to VecDotBegin() should be paired with a call to VecDotEnd().
350: seealso: VecDotEnd(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
351: VecTDotBegin(), VecTDotEnd(), PetscCommSplitReductionBegin()
352: @*/
353: PetscErrorCode VecDotBegin(Vec x,Vec y,PetscScalar *result)
354: {
355: PetscErrorCode ierr;
356: PetscSplitReduction *sr;
357: MPI_Comm comm;
362: PetscObjectGetComm((PetscObject)x,&comm);
363: PetscSplitReductionGet(comm,&sr);
364: if (sr->state != STATE_BEGIN) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Called before all VecxxxEnd() called");
365: if (sr->numopsbegin >= sr->maxops) {
366: PetscSplitReductionExtend(sr);
367: }
368: sr->reducetype[sr->numopsbegin] = REDUCE_SUM;
369: sr->invecs[sr->numopsbegin] = (void*)x;
370: if (!x->ops->dot_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Vector does not suppport local dots");
371: PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
372: (*x->ops->dot_local)(x,y,sr->lvalues+sr->numopsbegin++);
373: PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
374: return(0);
375: }
377: /*@
378: VecDotEnd - Ends a split phase dot product computation.
380: Input Parameters:
381: + x - the first vector (can be NULL)
382: . y - the second vector (can be NULL)
383: - result - where the result will go
385: Level: advanced
387: Notes:
388: Each call to VecDotBegin() should be paired with a call to VecDotEnd().
390: .seealso: VecDotBegin(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
391: VecTDotBegin(),VecTDotEnd(), PetscCommSplitReductionBegin()
393: @*/
394: PetscErrorCode VecDotEnd(Vec x,Vec y,PetscScalar *result)
395: {
396: PetscErrorCode ierr;
397: PetscSplitReduction *sr;
398: MPI_Comm comm;
401: PetscObjectGetComm((PetscObject)x,&comm);
402: PetscSplitReductionGet(comm,&sr);
403: PetscSplitReductionEnd(sr);
405: if (sr->numopsend >= sr->numopsbegin) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecxxxEnd() more times then VecxxxBegin()");
406: if (x && (void*) x != sr->invecs[sr->numopsend]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecxxxEnd() in a different order or with a different vector than VecxxxBegin()");
407: if (sr->reducetype[sr->numopsend] != REDUCE_SUM) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecDotEnd() on a reduction started with VecNormBegin()");
408: *result = sr->gvalues[sr->numopsend++];
410: /*
411: We are finished getting all the results so reset to no outstanding requests
412: */
413: if (sr->numopsend == sr->numopsbegin) {
414: sr->state = STATE_BEGIN;
415: sr->numopsend = 0;
416: sr->numopsbegin = 0;
417: }
418: return(0);
419: }
421: /*@
422: VecTDotBegin - Starts a split phase transpose dot product computation.
424: Input Parameters:
425: + x - the first vector
426: . y - the second vector
427: - result - where the result will go (can be NULL)
429: Level: advanced
431: Notes:
432: Each call to VecTDotBegin() should be paired with a call to VecTDotEnd().
434: .seealso: VecTDotEnd(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
435: VecDotBegin(), VecDotEnd(), PetscCommSplitReductionBegin()
437: @*/
438: PetscErrorCode VecTDotBegin(Vec x,Vec y,PetscScalar *result)
439: {
440: PetscErrorCode ierr;
441: PetscSplitReduction *sr;
442: MPI_Comm comm;
445: PetscObjectGetComm((PetscObject)x,&comm);
446: PetscSplitReductionGet(comm,&sr);
447: if (sr->state != STATE_BEGIN) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Called before all VecxxxEnd() called");
448: if (sr->numopsbegin >= sr->maxops) {
449: PetscSplitReductionExtend(sr);
450: }
451: sr->reducetype[sr->numopsbegin] = REDUCE_SUM;
452: sr->invecs[sr->numopsbegin] = (void*)x;
453: if (!x->ops->tdot_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Vector does not suppport local dots");
454: PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
455: (*x->ops->tdot_local)(x,y,sr->lvalues+sr->numopsbegin++);
456: PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
457: return(0);
458: }
460: /*@
461: VecTDotEnd - Ends a split phase transpose dot product computation.
463: Input Parameters:
464: + x - the first vector (can be NULL)
465: . y - the second vector (can be NULL)
466: - result - where the result will go
468: Level: advanced
470: Notes:
471: Each call to VecTDotBegin() should be paired with a call to VecTDotEnd().
473: seealso: VecTDotBegin(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
474: VecDotBegin(), VecDotEnd()
475: @*/
476: PetscErrorCode VecTDotEnd(Vec x,Vec y,PetscScalar *result)
477: {
481: /*
482: TDotEnd() is the same as DotEnd() so reuse the code
483: */
484: VecDotEnd(x,y,result);
485: return(0);
486: }
488: /* -------------------------------------------------------------------------*/
490: /*@
491: VecNormBegin - Starts a split phase norm computation.
493: Input Parameters:
494: + x - the first vector
495: . ntype - norm type, one of NORM_1, NORM_2, NORM_MAX, NORM_1_AND_2
496: - result - where the result will go (can be NULL)
498: Level: advanced
500: Notes:
501: Each call to VecNormBegin() should be paired with a call to VecNormEnd().
503: .seealso: VecNormEnd(), VecNorm(), VecDot(), VecMDot(), VecDotBegin(), VecDotEnd(), PetscCommSplitReductionBegin()
505: @*/
506: PetscErrorCode VecNormBegin(Vec x,NormType ntype,PetscReal *result)
507: {
508: PetscErrorCode ierr;
509: PetscSplitReduction *sr;
510: PetscReal lresult[2];
511: MPI_Comm comm;
515: PetscObjectGetComm((PetscObject)x,&comm);
516: PetscSplitReductionGet(comm,&sr);
517: if (sr->state != STATE_BEGIN) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Called before all VecxxxEnd() called");
518: if (sr->numopsbegin >= sr->maxops || (sr->numopsbegin == sr->maxops-1 && ntype == NORM_1_AND_2)) {
519: PetscSplitReductionExtend(sr);
520: }
522: sr->invecs[sr->numopsbegin] = (void*)x;
523: if (!x->ops->norm_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Vector does not support local norms");
524: PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
525: (*x->ops->norm_local)(x,ntype,lresult);
526: PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
527: if (ntype == NORM_2) lresult[0] = lresult[0]*lresult[0];
528: if (ntype == NORM_1_AND_2) lresult[1] = lresult[1]*lresult[1];
529: if (ntype == NORM_MAX) sr->reducetype[sr->numopsbegin] = REDUCE_MAX;
530: else sr->reducetype[sr->numopsbegin] = REDUCE_SUM;
531: sr->lvalues[sr->numopsbegin++] = lresult[0];
532: if (ntype == NORM_1_AND_2) {
533: sr->reducetype[sr->numopsbegin] = REDUCE_SUM;
534: sr->lvalues[sr->numopsbegin++] = lresult[1];
535: }
536: return(0);
537: }
539: /*@
540: VecNormEnd - Ends a split phase norm computation.
542: Input Parameters:
543: + x - the first vector
544: . ntype - norm type, one of NORM_1, NORM_2, NORM_MAX, NORM_1_AND_2
545: - result - where the result will go
547: Level: advanced
549: Notes:
550: Each call to VecNormBegin() should be paired with a call to VecNormEnd().
552: The x vector is not allowed to be NULL, otherwise the vector would not have its correctly cached norm value
554: .seealso: VecNormBegin(), VecNorm(), VecDot(), VecMDot(), VecDotBegin(), VecDotEnd(), PetscCommSplitReductionBegin()
556: @*/
557: PetscErrorCode VecNormEnd(Vec x,NormType ntype,PetscReal *result)
558: {
559: PetscErrorCode ierr;
560: PetscSplitReduction *sr;
561: MPI_Comm comm;
565: PetscObjectGetComm((PetscObject)x,&comm);
566: PetscSplitReductionGet(comm,&sr);
567: PetscSplitReductionEnd(sr);
569: if (sr->numopsend >= sr->numopsbegin) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecxxxEnd() more times then VecxxxBegin()");
570: if ((void*)x != sr->invecs[sr->numopsend]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecxxxEnd() in a different order or with a different vector than VecxxxBegin()");
571: if (sr->reducetype[sr->numopsend] != REDUCE_MAX && ntype == NORM_MAX) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecNormEnd(,NORM_MAX,) on a reduction started with VecDotBegin() or NORM_1 or NORM_2");
572: result[0] = PetscRealPart(sr->gvalues[sr->numopsend++]);
574: if (ntype == NORM_2) result[0] = PetscSqrtReal(result[0]);
575: else if (ntype == NORM_1_AND_2) {
576: result[1] = PetscRealPart(sr->gvalues[sr->numopsend++]);
577: result[1] = PetscSqrtReal(result[1]);
578: }
579: if (ntype!=NORM_1_AND_2) {
580: PetscObjectComposedDataSetReal((PetscObject)x,NormIds[ntype],result[0]);
581: }
583: if (sr->numopsend == sr->numopsbegin) {
584: sr->state = STATE_BEGIN;
585: sr->numopsend = 0;
586: sr->numopsbegin = 0;
587: }
588: return(0);
589: }
591: /*
592: Possibly add
594: PetscReductionSumBegin/End()
595: PetscReductionMaxBegin/End()
596: PetscReductionMinBegin/End()
597: or have more like MPI with a single function with flag for Op? Like first better
598: */
600: /*@
601: VecMDotBegin - Starts a split phase multiple dot product computation.
603: Input Parameters:
604: + x - the first vector
605: . nv - number of vectors
606: . y - array of vectors
607: - result - where the result will go (can be NULL)
609: Level: advanced
611: Notes:
612: Each call to VecMDotBegin() should be paired with a call to VecMDotEnd().
614: .seealso: VecMDotEnd(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
615: VecTDotBegin(), VecTDotEnd(), VecMTDotBegin(), VecMTDotEnd(), PetscCommSplitReductionBegin()
616: @*/
617: PetscErrorCode VecMDotBegin(Vec x,PetscInt nv,const Vec y[],PetscScalar result[])
618: {
619: PetscErrorCode ierr;
620: PetscSplitReduction *sr;
621: MPI_Comm comm;
622: int i;
625: PetscObjectGetComm((PetscObject)x,&comm);
626: PetscSplitReductionGet(comm,&sr);
627: if (sr->state != STATE_BEGIN) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Called before all VecxxxEnd() called");
628: for (i=0; i<nv; i++) {
629: if (sr->numopsbegin+i >= sr->maxops) {
630: PetscSplitReductionExtend(sr);
631: }
632: sr->reducetype[sr->numopsbegin+i] = REDUCE_SUM;
633: sr->invecs[sr->numopsbegin+i] = (void*)x;
634: }
635: if (!x->ops->mdot_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Vector does not suppport local mdots");
636: PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
637: (*x->ops->mdot_local)(x,nv,y,sr->lvalues+sr->numopsbegin);
638: PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
639: sr->numopsbegin += nv;
640: return(0);
641: }
643: /*@
644: VecMDotEnd - Ends a split phase multiple dot product computation.
646: Input Parameters:
647: + x - the first vector (can be NULL)
648: . nv - number of vectors
649: - y - array of vectors (can be NULL)
651: Output Parameters:
652: . result - where the result will go
654: Level: advanced
656: Notes:
657: Each call to VecMDotBegin() should be paired with a call to VecMDotEnd().
659: .seealso: VecMDotBegin(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
660: VecTDotBegin(),VecTDotEnd(), VecMTDotBegin(), VecMTDotEnd(), PetscCommSplitReductionBegin()
662: @*/
663: PetscErrorCode VecMDotEnd(Vec x,PetscInt nv,const Vec y[],PetscScalar result[])
664: {
665: PetscErrorCode ierr;
666: PetscSplitReduction *sr;
667: MPI_Comm comm;
668: int i;
671: PetscObjectGetComm((PetscObject)x,&comm);
672: PetscSplitReductionGet(comm,&sr);
673: PetscSplitReductionEnd(sr);
675: if (sr->numopsend >= sr->numopsbegin) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecxxxEnd() more times then VecxxxBegin()");
676: if (x && (void*) x != sr->invecs[sr->numopsend]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecxxxEnd() in a different order or with a different vector than VecxxxBegin()");
677: if (sr->reducetype[sr->numopsend] != REDUCE_SUM) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Called VecDotEnd() on a reduction started with VecNormBegin()");
678: for (i=0;i<nv;i++) result[i] = sr->gvalues[sr->numopsend++];
680: /*
681: We are finished getting all the results so reset to no outstanding requests
682: */
683: if (sr->numopsend == sr->numopsbegin) {
684: sr->state = STATE_BEGIN;
685: sr->numopsend = 0;
686: sr->numopsbegin = 0;
687: }
688: return(0);
689: }
691: /*@
692: VecMTDotBegin - Starts a split phase transpose multiple dot product computation.
694: Input Parameters:
695: + x - the first vector
696: . nv - number of vectors
697: . y - array of vectors
698: - result - where the result will go (can be NULL)
700: Level: advanced
702: Notes:
703: Each call to VecMTDotBegin() should be paired with a call to VecMTDotEnd().
705: .seealso: VecMTDotEnd(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
706: VecDotBegin(), VecDotEnd(), VecMDotBegin(), VecMDotEnd(), PetscCommSplitReductionBegin()
708: @*/
709: PetscErrorCode VecMTDotBegin(Vec x,PetscInt nv,const Vec y[],PetscScalar result[])
710: {
711: PetscErrorCode ierr;
712: PetscSplitReduction *sr;
713: MPI_Comm comm;
714: int i;
717: PetscObjectGetComm((PetscObject)x,&comm);
718: PetscSplitReductionGet(comm,&sr);
719: if (sr->state != STATE_BEGIN) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ORDER,"Called before all VecxxxEnd() called");
720: for (i=0; i<nv; i++) {
721: if (sr->numopsbegin+i >= sr->maxops) {
722: PetscSplitReductionExtend(sr);
723: }
724: sr->reducetype[sr->numopsbegin+i] = REDUCE_SUM;
725: sr->invecs[sr->numopsbegin+i] = (void*)x;
726: }
727: if (!x->ops->mtdot_local) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Vector does not suppport local mdots");
728: PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
729: (*x->ops->mdot_local)(x,nv,y,sr->lvalues+sr->numopsbegin);
730: PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
731: sr->numopsbegin += nv;
732: return(0);
733: }
735: /*@
736: VecMTDotEnd - Ends a split phase transpose multiple dot product computation.
738: Input Parameters:
739: + x - the first vector (can be NULL)
740: . nv - number of vectors
741: - y - array of vectors (can be NULL)
743: Output Parameters
744: . result - where the result will go
746: Level: advanced
748: Notes:
749: Each call to VecTDotBegin() should be paired with a call to VecTDotEnd().
751: .seealso: VecMTDotBegin(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
752: VecDotBegin(), VecDotEnd(), VecMDotBegin(), VecMDotEnd(), PetscCommSplitReductionBegin()
753: @*/
754: PetscErrorCode VecMTDotEnd(Vec x,PetscInt nv,const Vec y[],PetscScalar result[])
755: {
759: /*
760: MTDotEnd() is the same as MDotEnd() so reuse the code
761: */
762: VecMDotEnd(x,nv,y,result);
763: return(0);
764: }