Actual source code: comb.c
2: /*
3: Split phase global vector reductions with support for combining the
4: communication portion of several operations. Using MPI-1.1 support only
6: The idea for this and much of the initial code is contributed by
7: Victor Eijkhout.
9: Usage:
10: VecDotBegin(Vec,Vec,PetscScalar *);
11: VecNormBegin(Vec,NormType,PetscReal *);
12: ....
13: VecDotEnd(Vec,Vec,PetscScalar *);
14: VecNormEnd(Vec,NormType,PetscReal *);
16: Limitations:
17: - The order of the xxxEnd() functions MUST be in the same order
18: as the xxxBegin(). There is extensive error checking to try to
19: insure that the user calls the routines in the correct order
20: */
22: #include <petsc/private/vecimpl.h>
24: static PetscErrorCode MPIPetsc_Iallreduce(void *sendbuf,void *recvbuf,PetscMPIInt count,MPI_Datatype datatype,MPI_Op op,MPI_Comm comm,MPI_Request *request)
25: {
28: #if defined(PETSC_HAVE_MPI_NONBLOCKING_COLLECTIVES)
29: MPI_Iallreduce(sendbuf,recvbuf,count,datatype,op,comm,request);
30: #else
31: MPIU_Allreduce(sendbuf,recvbuf,count,datatype,op,comm);
32: *request = MPI_REQUEST_NULL;
33: #endif
34: return 0;
35: }
37: static PetscErrorCode PetscSplitReductionApply(PetscSplitReduction*);
39: /*
40: PetscSplitReductionCreate - Creates a data structure to contain the queued information.
41: */
42: static PetscErrorCode PetscSplitReductionCreate(MPI_Comm comm,PetscSplitReduction **sr)
43: {
44: PetscNew(sr);
45: (*sr)->numopsbegin = 0;
46: (*sr)->numopsend = 0;
47: (*sr)->state = STATE_BEGIN;
48: #define MAXOPS 32
49: (*sr)->maxops = MAXOPS;
50: PetscMalloc6(MAXOPS,&(*sr)->lvalues,MAXOPS,&(*sr)->gvalues,MAXOPS,&(*sr)->invecs,MAXOPS,&(*sr)->reducetype,MAXOPS,&(*sr)->lvalues_mix,MAXOPS,&(*sr)->gvalues_mix);
51: #undef MAXOPS
52: (*sr)->comm = comm;
53: (*sr)->request = MPI_REQUEST_NULL;
54: (*sr)->mix = PETSC_FALSE;
55: (*sr)->async = PETSC_FALSE;
56: #if defined(PETSC_HAVE_MPI_NONBLOCKING_COLLECTIVES)
57: (*sr)->async = PETSC_TRUE; /* Enable by default */
58: #endif
59: /* always check for option; so that tests that run on systems without support don't warn about unhandled options */
60: PetscOptionsGetBool(NULL,NULL,"-splitreduction_async",&(*sr)->async,NULL);
61: return 0;
62: }
64: /*
65: This function is the MPI reduction operation used when there is
66: a combination of sums and max in the reduction. The call below to
67: MPI_Op_create() converts the function PetscSplitReduction_Local() to the
68: MPI operator PetscSplitReduction_Op.
69: */
70: MPI_Op PetscSplitReduction_Op = 0;
72: PETSC_EXTERN void MPIAPI PetscSplitReduction_Local(void *in,void *out,PetscMPIInt *cnt,MPI_Datatype *datatype)
73: {
74: struct PetscScalarInt { PetscScalar v; PetscInt i; };
75: struct PetscScalarInt *xin = (struct PetscScalarInt*)in;
76: struct PetscScalarInt *xout = (struct PetscScalarInt*)out;
77: PetscInt i,count = (PetscInt)*cnt;
79: if (*datatype != MPIU_SCALAR_INT) {
80: (*PetscErrorPrintf)("Can only handle MPIU_SCALAR_INT data types");
81: PETSCABORT(MPI_COMM_SELF,PETSC_ERR_ARG_WRONG);
82: }
83: for (i=0; i<count; i++) {
84: if (xin[i].i == PETSC_SR_REDUCE_SUM) xout[i].v += xin[i].v;
85: else if (xin[i].i == PETSC_SR_REDUCE_MAX) xout[i].v = PetscMax(PetscRealPart(xout[i].v),PetscRealPart(xin[i].v));
86: else if (xin[i].i == PETSC_SR_REDUCE_MIN) xout[i].v = PetscMin(PetscRealPart(xout[i].v),PetscRealPart(xin[i].v));
87: else {
88: (*PetscErrorPrintf)("Reduction type input is not PETSC_SR_REDUCE_SUM, PETSC_SR_REDUCE_MAX, or PETSC_SR_REDUCE_MIN");
89: PETSCABORT(MPI_COMM_SELF,PETSC_ERR_ARG_WRONG);
90: }
91: }
92: return;
93: }
95: /*@
96: PetscCommSplitReductionBegin - Begin an asynchronous split-mode reduction
98: Collective but not synchronizing
100: Input Parameter:
101: comm - communicator on which split reduction has been queued
103: Level: advanced
105: Note:
106: Calling this function is optional when using split-mode reduction. On supporting hardware, calling this after all
107: VecXxxBegin() allows the reduction to make asynchronous progress before the result is needed (in VecXxxEnd()).
109: .seealso: VecNormBegin(), VecNormEnd(), VecDotBegin(), VecDotEnd(), VecTDotBegin(), VecTDotEnd(), VecMDotBegin(), VecMDotEnd(), VecMTDotBegin(), VecMTDotEnd()
110: @*/
111: PetscErrorCode PetscCommSplitReductionBegin(MPI_Comm comm)
112: {
113: PetscSplitReduction *sr;
115: PetscSplitReductionGet(comm,&sr);
117: if (sr->async) { /* Bad reuse, setup code copied from PetscSplitReductionApply(). */
118: PetscInt i,numops = sr->numopsbegin,*reducetype = sr->reducetype;
119: PetscScalar *lvalues = sr->lvalues,*gvalues = sr->gvalues;
120: PetscInt sum_flg = 0,max_flg = 0, min_flg = 0;
121: MPI_Comm comm = sr->comm;
122: PetscMPIInt size,cmul = sizeof(PetscScalar)/sizeof(PetscReal);
124: PetscLogEventBegin(VEC_ReduceBegin,0,0,0,0);
125: MPI_Comm_size(sr->comm,&size);
126: if (size == 1) {
127: PetscArraycpy(gvalues,lvalues,numops);
128: } else {
129: /* determine if all reductions are sum, max, or min */
130: for (i=0; i<numops; i++) {
131: if (reducetype[i] == PETSC_SR_REDUCE_MAX) max_flg = 1;
132: else if (reducetype[i] == PETSC_SR_REDUCE_SUM) sum_flg = 1;
133: else if (reducetype[i] == PETSC_SR_REDUCE_MIN) min_flg = 1;
134: else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in PetscSplitReduction() data structure, probably memory corruption");
135: }
137: if (sum_flg + max_flg + min_flg > 1) {
138: sr->mix = PETSC_TRUE;
139: for (i=0; i<numops; i++) { sr->lvalues_mix[i].v = lvalues[i]; sr->lvalues_mix[i].i = reducetype[i]; }
140: MPIPetsc_Iallreduce(sr->lvalues_mix,sr->gvalues_mix,numops,MPIU_SCALAR_INT,PetscSplitReduction_Op,comm,&sr->request);
141: } else if (max_flg) { /* Compute max of real and imag parts separately, presumably only the real part is used */
142: MPIPetsc_Iallreduce((PetscReal*)lvalues,(PetscReal*)gvalues,cmul*numops,MPIU_REAL,MPIU_MAX,comm,&sr->request);
143: } else if (min_flg) {
144: MPIPetsc_Iallreduce((PetscReal*)lvalues,(PetscReal*)gvalues,cmul*numops,MPIU_REAL,MPIU_MIN,comm,&sr->request);
145: } else {
146: MPIPetsc_Iallreduce(lvalues,gvalues,numops,MPIU_SCALAR,MPIU_SUM,comm,&sr->request);
147: }
148: }
149: sr->state = STATE_PENDING;
150: sr->numopsend = 0;
151: PetscLogEventEnd(VEC_ReduceBegin,0,0,0,0);
152: } else {
153: PetscSplitReductionApply(sr);
154: }
155: return 0;
156: }
158: PetscErrorCode PetscSplitReductionEnd(PetscSplitReduction *sr)
159: {
160: switch (sr->state) {
161: case STATE_BEGIN: /* We are doing synchronous communication and this is the first call to VecXxxEnd() so do the communication */
162: PetscSplitReductionApply(sr);
163: break;
164: case STATE_PENDING:
165: /* We are doing asynchronous-mode communication and this is the first VecXxxEnd() so wait for comm to complete */
166: PetscLogEventBegin(VEC_ReduceEnd,0,0,0,0);
167: if (sr->request != MPI_REQUEST_NULL) {
168: MPI_Wait(&sr->request,MPI_STATUS_IGNORE);
169: }
170: sr->state = STATE_END;
171: if (sr->mix) {
172: PetscInt i;
173: for (i=0; i<sr->numopsbegin; i++) { sr->gvalues[i] = sr->gvalues_mix[i].v; }
174: sr->mix = PETSC_FALSE;
175: }
176: PetscLogEventEnd(VEC_ReduceEnd,0,0,0,0);
177: break;
178: default: break; /* everything is already done */
179: }
180: return 0;
181: }
183: /*
184: PetscSplitReductionApply - Actually do the communication required for a split phase reduction
185: */
186: static PetscErrorCode PetscSplitReductionApply(PetscSplitReduction *sr)
187: {
188: PetscInt i,numops = sr->numopsbegin,*reducetype = sr->reducetype;
189: PetscScalar *lvalues = sr->lvalues,*gvalues = sr->gvalues;
190: PetscInt sum_flg = 0,max_flg = 0, min_flg = 0;
191: MPI_Comm comm = sr->comm;
192: PetscMPIInt size,cmul = sizeof(PetscScalar)/sizeof(PetscReal);
195: PetscLogEventBegin(VEC_ReduceCommunication,0,0,0,0);
196: MPI_Comm_size(sr->comm,&size);
197: if (size == 1) {
198: PetscArraycpy(gvalues,lvalues,numops);
199: } else {
200: /* determine if all reductions are sum, max, or min */
201: for (i=0; i<numops; i++) {
202: if (reducetype[i] == PETSC_SR_REDUCE_MAX) max_flg = 1;
203: else if (reducetype[i] == PETSC_SR_REDUCE_SUM) sum_flg = 1;
204: else if (reducetype[i] == PETSC_SR_REDUCE_MIN) min_flg = 1;
205: else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in PetscSplitReduction() data structure, probably memory corruption");
206: }
207: if (sum_flg + max_flg + min_flg > 1) {
209: for (i=0; i<numops; i++) { sr->lvalues_mix[i].v = lvalues[i]; sr->lvalues_mix[i].i = reducetype[i]; }
210: MPIU_Allreduce(sr->lvalues_mix,sr->gvalues_mix,numops,MPIU_SCALAR_INT,PetscSplitReduction_Op,comm);
211: for (i=0; i<numops; i++) { sr->gvalues[i] = sr->gvalues_mix[i].v; }
212: } else if (max_flg) { /* Compute max of real and imag parts separately, presumably only the real part is used */
213: MPIU_Allreduce((PetscReal*)lvalues,(PetscReal*)gvalues,cmul*numops,MPIU_REAL,MPIU_MAX,comm);
214: } else if (min_flg) {
215: MPIU_Allreduce((PetscReal*)lvalues,(PetscReal*)gvalues,cmul*numops,MPIU_REAL,MPIU_MIN,comm);
216: } else {
217: MPIU_Allreduce(lvalues,gvalues,numops,MPIU_SCALAR,MPIU_SUM,comm);
218: }
219: }
220: sr->state = STATE_END;
221: sr->numopsend = 0;
222: PetscLogEventEnd(VEC_ReduceCommunication,0,0,0,0);
223: return 0;
224: }
226: /*
227: PetscSplitReductionExtend - Double the amount of space (slots) allocated for a split reduction object.
228: */
229: PetscErrorCode PetscSplitReductionExtend(PetscSplitReduction *sr)
230: {
231: struct PetscScalarInt { PetscScalar v; PetscInt i; };
232: PetscInt maxops = sr->maxops,*reducetype = sr->reducetype;
233: PetscScalar *lvalues = sr->lvalues,*gvalues = sr->gvalues;
234: struct PetscScalarInt *lvalues_mix = (struct PetscScalarInt*)sr->lvalues_mix;
235: struct PetscScalarInt *gvalues_mix = (struct PetscScalarInt*)sr->gvalues_mix;
236: void **invecs = sr->invecs;
238: sr->maxops = 2*maxops;
239: PetscMalloc6(2*maxops,&sr->lvalues,2*maxops,&sr->gvalues,2*maxops,&sr->reducetype,2*maxops,&sr->invecs,2*maxops,&sr->lvalues_mix,2*maxops,&sr->gvalues_mix);
240: PetscArraycpy(sr->lvalues,lvalues,maxops);
241: PetscArraycpy(sr->gvalues,gvalues,maxops);
242: PetscArraycpy(sr->reducetype,reducetype,maxops);
243: PetscArraycpy(sr->invecs,invecs,maxops);
244: PetscArraycpy(sr->lvalues_mix,lvalues_mix,maxops);
245: PetscArraycpy(sr->gvalues_mix,gvalues_mix,maxops);
246: PetscFree6(lvalues,gvalues,reducetype,invecs,lvalues_mix,gvalues_mix);
247: return 0;
248: }
250: PetscErrorCode PetscSplitReductionDestroy(PetscSplitReduction *sr)
251: {
252: PetscFree6(sr->lvalues,sr->gvalues,sr->reducetype,sr->invecs,sr->lvalues_mix,sr->gvalues_mix);
253: PetscFree(sr);
254: return 0;
255: }
257: PetscMPIInt Petsc_Reduction_keyval = MPI_KEYVAL_INVALID;
259: /*
260: Private routine to delete internal storage when a communicator is freed.
261: This is called by MPI, not by users.
263: The binding for the first argument changed from MPI 1.0 to 1.1; in 1.0
264: it was MPI_Comm *comm.
265: */
266: PETSC_EXTERN int MPIAPI Petsc_DelReduction(MPI_Comm comm,int keyval,void* attr_val,void* extra_state)
267: {
268: PetscInfo(0,"Deleting reduction data in an MPI_Comm %ld\n",(long)comm);
269: PetscSplitReductionDestroy((PetscSplitReduction*)attr_val);
270: return 0;
271: }
273: /*
274: PetscSplitReductionGet - Gets the split reduction object from a
275: PETSc vector, creates if it does not exit.
277: */
278: PetscErrorCode PetscSplitReductionGet(MPI_Comm comm,PetscSplitReduction **sr)
279: {
280: PetscMPIInt flag;
282: if (Petsc_Reduction_keyval == MPI_KEYVAL_INVALID) {
283: /*
284: The calling sequence of the 2nd argument to this function changed
285: between MPI Standard 1.0 and the revisions 1.1 Here we match the
286: new standard, if you are using an MPI implementation that uses
287: the older version you will get a warning message about the next line;
288: it is only a warning message and should do no harm.
289: */
290: MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN,Petsc_DelReduction,&Petsc_Reduction_keyval,NULL);
291: }
292: MPI_Comm_get_attr(comm,Petsc_Reduction_keyval,(void**)sr,&flag);
293: if (!flag) { /* doesn't exist yet so create it and put it in */
294: PetscSplitReductionCreate(comm,sr);
295: MPI_Comm_set_attr(comm,Petsc_Reduction_keyval,*sr);
296: PetscInfo(0,"Putting reduction data in an MPI_Comm %ld\n",(long)comm);
297: }
298: return 0;
299: }
301: /* ----------------------------------------------------------------------------------------------------*/
303: /*@
304: VecDotBegin - Starts a split phase dot product computation.
306: Input Parameters:
307: + x - the first vector
308: . y - the second vector
309: - result - where the result will go (can be NULL)
311: Level: advanced
313: Notes:
314: Each call to VecDotBegin() should be paired with a call to VecDotEnd().
316: seealso: VecDotEnd(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
317: VecTDotBegin(), VecTDotEnd(), PetscCommSplitReductionBegin()
318: @*/
319: PetscErrorCode VecDotBegin(Vec x,Vec y,PetscScalar *result)
320: {
321: PetscSplitReduction *sr;
322: MPI_Comm comm;
326: PetscObjectGetComm((PetscObject)x,&comm);
327: PetscSplitReductionGet(comm,&sr);
329: if (sr->numopsbegin >= sr->maxops) {
330: PetscSplitReductionExtend(sr);
331: }
332: sr->reducetype[sr->numopsbegin] = PETSC_SR_REDUCE_SUM;
333: sr->invecs[sr->numopsbegin] = (void*)x;
335: PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
336: (*x->ops->dot_local)(x,y,sr->lvalues+sr->numopsbegin++);
337: PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
338: return 0;
339: }
341: /*@
342: VecDotEnd - Ends a split phase dot product computation.
344: Input Parameters:
345: + x - the first vector (can be NULL)
346: . y - the second vector (can be NULL)
347: - result - where the result will go
349: Level: advanced
351: Notes:
352: Each call to VecDotBegin() should be paired with a call to VecDotEnd().
354: .seealso: VecDotBegin(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
355: VecTDotBegin(),VecTDotEnd(), PetscCommSplitReductionBegin()
357: @*/
358: PetscErrorCode VecDotEnd(Vec x,Vec y,PetscScalar *result)
359: {
360: PetscSplitReduction *sr;
361: MPI_Comm comm;
363: PetscObjectGetComm((PetscObject)x,&comm);
364: PetscSplitReductionGet(comm,&sr);
365: PetscSplitReductionEnd(sr);
370: *result = sr->gvalues[sr->numopsend++];
372: /*
373: We are finished getting all the results so reset to no outstanding requests
374: */
375: if (sr->numopsend == sr->numopsbegin) {
376: sr->state = STATE_BEGIN;
377: sr->numopsend = 0;
378: sr->numopsbegin = 0;
379: sr->mix = PETSC_FALSE;
380: }
381: return 0;
382: }
384: /*@
385: VecTDotBegin - Starts a split phase transpose dot product computation.
387: Input Parameters:
388: + x - the first vector
389: . y - the second vector
390: - result - where the result will go (can be NULL)
392: Level: advanced
394: Notes:
395: Each call to VecTDotBegin() should be paired with a call to VecTDotEnd().
397: .seealso: VecTDotEnd(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
398: VecDotBegin(), VecDotEnd(), PetscCommSplitReductionBegin()
400: @*/
401: PetscErrorCode VecTDotBegin(Vec x,Vec y,PetscScalar *result)
402: {
403: PetscSplitReduction *sr;
404: MPI_Comm comm;
406: PetscObjectGetComm((PetscObject)x,&comm);
407: PetscSplitReductionGet(comm,&sr);
409: if (sr->numopsbegin >= sr->maxops) {
410: PetscSplitReductionExtend(sr);
411: }
412: sr->reducetype[sr->numopsbegin] = PETSC_SR_REDUCE_SUM;
413: sr->invecs[sr->numopsbegin] = (void*)x;
415: PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
416: (*x->ops->tdot_local)(x,y,sr->lvalues+sr->numopsbegin++);
417: PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
418: return 0;
419: }
421: /*@
422: VecTDotEnd - Ends a split phase transpose dot product computation.
424: Input Parameters:
425: + x - the first vector (can be NULL)
426: . y - the second vector (can be NULL)
427: - result - where the result will go
429: Level: advanced
431: Notes:
432: Each call to VecTDotBegin() should be paired with a call to VecTDotEnd().
434: seealso: VecTDotBegin(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
435: VecDotBegin(), VecDotEnd()
436: @*/
437: PetscErrorCode VecTDotEnd(Vec x,Vec y,PetscScalar *result)
438: {
439: /*
440: TDotEnd() is the same as DotEnd() so reuse the code
441: */
442: VecDotEnd(x,y,result);
443: return 0;
444: }
446: /* -------------------------------------------------------------------------*/
448: /*@
449: VecNormBegin - Starts a split phase norm computation.
451: Input Parameters:
452: + x - the first vector
453: . ntype - norm type, one of NORM_1, NORM_2, NORM_MAX, NORM_1_AND_2
454: - result - where the result will go (can be NULL)
456: Level: advanced
458: Notes:
459: Each call to VecNormBegin() should be paired with a call to VecNormEnd().
461: .seealso: VecNormEnd(), VecNorm(), VecDot(), VecMDot(), VecDotBegin(), VecDotEnd(), PetscCommSplitReductionBegin()
463: @*/
464: PetscErrorCode VecNormBegin(Vec x,NormType ntype,PetscReal *result)
465: {
466: PetscSplitReduction *sr;
467: PetscReal lresult[2];
468: MPI_Comm comm;
471: PetscObjectGetComm((PetscObject)x,&comm);
472: PetscSplitReductionGet(comm,&sr);
474: if (sr->numopsbegin >= sr->maxops || (sr->numopsbegin == sr->maxops-1 && ntype == NORM_1_AND_2)) {
475: PetscSplitReductionExtend(sr);
476: }
478: sr->invecs[sr->numopsbegin] = (void*)x;
480: PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
481: (*x->ops->norm_local)(x,ntype,lresult);
482: PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
483: if (ntype == NORM_2) lresult[0] = lresult[0]*lresult[0];
484: if (ntype == NORM_1_AND_2) lresult[1] = lresult[1]*lresult[1];
485: if (ntype == NORM_MAX) sr->reducetype[sr->numopsbegin] = PETSC_SR_REDUCE_MAX;
486: else sr->reducetype[sr->numopsbegin] = PETSC_SR_REDUCE_SUM;
487: sr->lvalues[sr->numopsbegin++] = lresult[0];
488: if (ntype == NORM_1_AND_2) {
489: sr->reducetype[sr->numopsbegin] = PETSC_SR_REDUCE_SUM;
490: sr->lvalues[sr->numopsbegin++] = lresult[1];
491: }
492: return 0;
493: }
495: /*@
496: VecNormEnd - Ends a split phase norm computation.
498: Input Parameters:
499: + x - the first vector
500: . ntype - norm type, one of NORM_1, NORM_2, NORM_MAX, NORM_1_AND_2
501: - result - where the result will go
503: Level: advanced
505: Notes:
506: Each call to VecNormBegin() should be paired with a call to VecNormEnd().
508: The x vector is not allowed to be NULL, otherwise the vector would not have its correctly cached norm value
510: .seealso: VecNormBegin(), VecNorm(), VecDot(), VecMDot(), VecDotBegin(), VecDotEnd(), PetscCommSplitReductionBegin()
512: @*/
513: PetscErrorCode VecNormEnd(Vec x,NormType ntype,PetscReal *result)
514: {
515: PetscSplitReduction *sr;
516: MPI_Comm comm;
519: PetscObjectGetComm((PetscObject)x,&comm);
520: PetscSplitReductionGet(comm,&sr);
521: PetscSplitReductionEnd(sr);
526: result[0] = PetscRealPart(sr->gvalues[sr->numopsend++]);
528: if (ntype == NORM_2) result[0] = PetscSqrtReal(result[0]);
529: else if (ntype == NORM_1_AND_2) {
530: result[1] = PetscRealPart(sr->gvalues[sr->numopsend++]);
531: result[1] = PetscSqrtReal(result[1]);
532: }
533: if (ntype!=NORM_1_AND_2) {
534: PetscObjectComposedDataSetReal((PetscObject)x,NormIds[ntype],result[0]);
535: }
537: if (sr->numopsend == sr->numopsbegin) {
538: sr->state = STATE_BEGIN;
539: sr->numopsend = 0;
540: sr->numopsbegin = 0;
541: }
542: return 0;
543: }
545: /*
546: Possibly add
548: PetscReductionSumBegin/End()
549: PetscReductionMaxBegin/End()
550: PetscReductionMinBegin/End()
551: or have more like MPI with a single function with flag for Op? Like first better
552: */
554: /*@
555: VecMDotBegin - Starts a split phase multiple dot product computation.
557: Input Parameters:
558: + x - the first vector
559: . nv - number of vectors
560: . y - array of vectors
561: - result - where the result will go (can be NULL)
563: Level: advanced
565: Notes:
566: Each call to VecMDotBegin() should be paired with a call to VecMDotEnd().
568: .seealso: VecMDotEnd(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
569: VecTDotBegin(), VecTDotEnd(), VecMTDotBegin(), VecMTDotEnd(), PetscCommSplitReductionBegin()
570: @*/
571: PetscErrorCode VecMDotBegin(Vec x,PetscInt nv,const Vec y[],PetscScalar result[])
572: {
573: PetscSplitReduction *sr;
574: MPI_Comm comm;
575: int i;
577: PetscObjectGetComm((PetscObject)x,&comm);
578: PetscSplitReductionGet(comm,&sr);
580: for (i=0; i<nv; i++) {
581: if (sr->numopsbegin+i >= sr->maxops) {
582: PetscSplitReductionExtend(sr);
583: }
584: sr->reducetype[sr->numopsbegin+i] = PETSC_SR_REDUCE_SUM;
585: sr->invecs[sr->numopsbegin+i] = (void*)x;
586: }
588: PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
589: (*x->ops->mdot_local)(x,nv,y,sr->lvalues+sr->numopsbegin);
590: PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
591: sr->numopsbegin += nv;
592: return 0;
593: }
595: /*@
596: VecMDotEnd - Ends a split phase multiple dot product computation.
598: Input Parameters:
599: + x - the first vector (can be NULL)
600: . nv - number of vectors
601: - y - array of vectors (can be NULL)
603: Output Parameters:
604: . result - where the result will go
606: Level: advanced
608: Notes:
609: Each call to VecMDotBegin() should be paired with a call to VecMDotEnd().
611: .seealso: VecMDotBegin(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
612: VecTDotBegin(),VecTDotEnd(), VecMTDotBegin(), VecMTDotEnd(), PetscCommSplitReductionBegin()
614: @*/
615: PetscErrorCode VecMDotEnd(Vec x,PetscInt nv,const Vec y[],PetscScalar result[])
616: {
617: PetscSplitReduction *sr;
618: MPI_Comm comm;
619: int i;
621: PetscObjectGetComm((PetscObject)x,&comm);
622: PetscSplitReductionGet(comm,&sr);
623: PetscSplitReductionEnd(sr);
628: for (i=0;i<nv;i++) result[i] = sr->gvalues[sr->numopsend++];
630: /*
631: We are finished getting all the results so reset to no outstanding requests
632: */
633: if (sr->numopsend == sr->numopsbegin) {
634: sr->state = STATE_BEGIN;
635: sr->numopsend = 0;
636: sr->numopsbegin = 0;
637: }
638: return 0;
639: }
641: /*@
642: VecMTDotBegin - Starts a split phase transpose multiple dot product computation.
644: Input Parameters:
645: + x - the first vector
646: . nv - number of vectors
647: . y - array of vectors
648: - result - where the result will go (can be NULL)
650: Level: advanced
652: Notes:
653: Each call to VecMTDotBegin() should be paired with a call to VecMTDotEnd().
655: .seealso: VecMTDotEnd(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
656: VecDotBegin(), VecDotEnd(), VecMDotBegin(), VecMDotEnd(), PetscCommSplitReductionBegin()
658: @*/
659: PetscErrorCode VecMTDotBegin(Vec x,PetscInt nv,const Vec y[],PetscScalar result[])
660: {
661: PetscSplitReduction *sr;
662: MPI_Comm comm;
663: int i;
665: PetscObjectGetComm((PetscObject)x,&comm);
666: PetscSplitReductionGet(comm,&sr);
668: for (i=0; i<nv; i++) {
669: if (sr->numopsbegin+i >= sr->maxops) {
670: PetscSplitReductionExtend(sr);
671: }
672: sr->reducetype[sr->numopsbegin+i] = PETSC_SR_REDUCE_SUM;
673: sr->invecs[sr->numopsbegin+i] = (void*)x;
674: }
676: PetscLogEventBegin(VEC_ReduceArithmetic,0,0,0,0);
677: (*x->ops->mtdot_local)(x,nv,y,sr->lvalues+sr->numopsbegin);
678: PetscLogEventEnd(VEC_ReduceArithmetic,0,0,0,0);
679: sr->numopsbegin += nv;
680: return 0;
681: }
683: /*@
684: VecMTDotEnd - Ends a split phase transpose multiple dot product computation.
686: Input Parameters:
687: + x - the first vector (can be NULL)
688: . nv - number of vectors
689: - y - array of vectors (can be NULL)
691: Output Parameters:
692: . result - where the result will go
694: Level: advanced
696: Notes:
697: Each call to VecTDotBegin() should be paired with a call to VecTDotEnd().
699: .seealso: VecMTDotBegin(), VecNormBegin(), VecNormEnd(), VecNorm(), VecDot(), VecMDot(),
700: VecDotBegin(), VecDotEnd(), VecMDotBegin(), VecMDotEnd(), PetscCommSplitReductionBegin()
701: @*/
702: PetscErrorCode VecMTDotEnd(Vec x,PetscInt nv,const Vec y[],PetscScalar result[])
703: {
704: /*
705: MTDotEnd() is the same as MDotEnd() so reuse the code
706: */
707: VecMDotEnd(x,nv,y,result);
708: return 0;
709: }