Actual source code: sfbasic.c
petsc-3.10.5 2019-03-28
2: #include <petsc/private/sfimpl.h>
4: typedef struct _n_PetscSFBasicPack *PetscSFBasicPack;
5: struct _n_PetscSFBasicPack {
6: void (*Pack)(PetscInt,PetscInt,const PetscInt*,const void*,void*);
7: void (*UnpackInsert)(PetscInt,PetscInt,const PetscInt*,void*,const void*);
8: void (*UnpackAdd)(PetscInt,PetscInt,const PetscInt*,void*,const void*);
9: void (*UnpackMin)(PetscInt,PetscInt,const PetscInt*,void*,const void*);
10: void (*UnpackMax)(PetscInt,PetscInt,const PetscInt*,void*,const void*);
11: void (*UnpackMinloc)(PetscInt,PetscInt,const PetscInt*,void*,const void*);
12: void (*UnpackMaxloc)(PetscInt,PetscInt,const PetscInt*,void*,const void*);
13: void (*UnpackMult)(PetscInt,PetscInt,const PetscInt*,void*,const void *);
14: void (*UnpackLAND)(PetscInt,PetscInt,const PetscInt*,void*,const void *);
15: void (*UnpackBAND)(PetscInt,PetscInt,const PetscInt*,void*,const void *);
16: void (*UnpackLOR)(PetscInt,PetscInt,const PetscInt*,void*,const void *);
17: void (*UnpackBOR)(PetscInt,PetscInt,const PetscInt*,void*,const void *);
18: void (*UnpackLXOR)(PetscInt,PetscInt,const PetscInt*,void*,const void *);
19: void (*UnpackBXOR)(PetscInt,PetscInt,const PetscInt*,void*,const void *);
20: void (*FetchAndInsert)(PetscInt,PetscInt,const PetscInt*,void*,void*);
21: void (*FetchAndAdd)(PetscInt,PetscInt,const PetscInt*,void*,void*);
22: void (*FetchAndMin)(PetscInt,PetscInt,const PetscInt*,void*,void*);
23: void (*FetchAndMax)(PetscInt,PetscInt,const PetscInt*,void*,void*);
24: void (*FetchAndMinloc)(PetscInt,PetscInt,const PetscInt*,void*,void*);
25: void (*FetchAndMaxloc)(PetscInt,PetscInt,const PetscInt*,void*,void*);
26: void (*FetchAndMult)(PetscInt,PetscInt,const PetscInt*,void*,void*);
27: void (*FetchAndLAND)(PetscInt,PetscInt,const PetscInt*,void*,void*);
28: void (*FetchAndBAND)(PetscInt,PetscInt,const PetscInt*,void*,void*);
29: void (*FetchAndLOR)(PetscInt,PetscInt,const PetscInt*,void*,void*);
30: void (*FetchAndBOR)(PetscInt,PetscInt,const PetscInt*,void*,void*);
31: void (*FetchAndLXOR)(PetscInt,PetscInt,const PetscInt*,void*,void*);
32: void (*FetchAndBXOR)(PetscInt,PetscInt,const PetscInt*,void*,void*);
34: MPI_Datatype unit;
35: size_t unitbytes; /* Number of bytes in a unit */
36: PetscInt bs; /* Number of basic units in a unit */
37: const void *key; /* Array used as key for operation */
38: char **root; /* Packed root data, indexed by leaf rank */
39: char **leaf; /* Packed leaf data, indexed by root rank */
40: MPI_Request *requests; /* Array of root requests followed by leaf requests */
41: PetscSFBasicPack next;
42: };
44: typedef struct {
45: PetscMPIInt tag;
46: PetscMPIInt niranks; /* Number of incoming ranks (ranks accessing my roots) */
47: PetscMPIInt ndiranks; /* Number of incoming ranks (ranks accessing my roots) in distinguished set */
48: PetscMPIInt *iranks; /* Array of ranks that reference my roots */
49: PetscInt itotal; /* Total number of graph edges referencing my roots */
50: PetscInt *ioffset; /* Array of length niranks+1 holding offset in irootloc[] for each rank */
51: PetscInt *irootloc; /* Incoming roots referenced by ranks starting at ioffset[rank] */
52: PetscSFBasicPack avail; /* One or more entries per MPI Datatype, lazily constructed */
53: PetscSFBasicPack inuse; /* Buffers being used for transactions that have not yet completed */
54: } PetscSF_Basic;
56: #if !defined(PETSC_HAVE_MPI_TYPE_DUP)
57: PETSC_STATIC_INLINE int MPI_Type_dup(MPI_Datatype datatype,MPI_Datatype *newtype)
58: {
59: int ierr;
60: MPI_Type_contiguous(1,datatype,newtype); if (ierr) return ierr;
61: MPI_Type_commit(newtype); if (ierr) return ierr;
62: return MPI_SUCCESS;
63: }
64: #endif
66: /*
67: * MPI_Reduce_local is not really useful because it can't handle sparse data and it vectorizes "in the wrong direction",
68: * therefore we pack data types manually. This section defines packing routines for the standard data types.
69: */
71: #define CPPJoin2_exp(a,b) a ## b
72: #define CPPJoin2(a,b) CPPJoin2_exp(a,b)
73: #define CPPJoin3_exp_(a,b,c) a ## b ## _ ## c
74: #define CPPJoin3_(a,b,c) CPPJoin3_exp_(a,b,c)
76: /* Basic types without addition */
77: #define DEF_PackNoInit(type,BS) \
78: static void CPPJoin3_(Pack_,type,BS)(PetscInt n,PetscInt bs,const PetscInt *idx,const void *unpacked,void *packed) { \
79: const type *u = (const type*)unpacked; \
80: type *p = (type*)packed; \
81: PetscInt i,j,k; \
82: for (i=0; i<n; i++) \
83: for (j=0; j<bs; j+=BS) \
84: for (k=j; k<j+BS; k++) \
85: p[i*bs+k] = u[idx[i]*bs+k]; \
86: } \
87: static void CPPJoin3_(UnpackInsert_,type,BS)(PetscInt n,PetscInt bs,const PetscInt *idx,void *unpacked,const void *packed) { \
88: type *u = (type*)unpacked; \
89: const type *p = (const type*)packed; \
90: PetscInt i,j,k; \
91: for (i=0; i<n; i++) \
92: for (j=0; j<bs; j+=BS) \
93: for (k=j; k<j+BS; k++) \
94: u[idx[i]*bs+k] = p[i*bs+k]; \
95: } \
96: static void CPPJoin3_(FetchAndInsert_,type,BS)(PetscInt n,PetscInt bs,const PetscInt *idx,void *unpacked,void *packed) { \
97: type *u = (type*)unpacked; \
98: type *p = (type*)packed; \
99: PetscInt i,j,k; \
100: for (i=0; i<n; i++) { \
101: PetscInt ii = idx[i]; \
102: for (j=0; j<bs; j+=BS) \
103: for (k=j; k<j+BS; k++) { \
104: type t = u[ii*bs+k]; \
105: u[ii*bs+k] = p[i*bs+k]; \
106: p[i*bs+k] = t; \
107: } \
108: } \
109: }
111: /* Basic types defining addition */
112: #define DEF_PackAddNoInit(type,BS) \
113: DEF_PackNoInit(type,BS) \
114: static void CPPJoin3_(UnpackAdd_,type,BS)(PetscInt n,PetscInt bs,const PetscInt *idx,void *unpacked,const void *packed) { \
115: type *u = (type*)unpacked; \
116: const type *p = (const type*)packed; \
117: PetscInt i,j,k; \
118: for (i=0; i<n; i++) \
119: for (j=0; j<bs; j+=BS) \
120: for (k=j; k<j+BS; k++) \
121: u[idx[i]*bs+k] += p[i*bs+k]; \
122: } \
123: static void CPPJoin3_(FetchAndAdd_,type,BS)(PetscInt n,PetscInt bs,const PetscInt *idx,void *unpacked,void *packed) { \
124: type *u = (type*)unpacked; \
125: type *p = (type*)packed; \
126: PetscInt i,j,k; \
127: for (i=0; i<n; i++) { \
128: PetscInt ii = idx[i]; \
129: for (j=0; j<bs; j+=BS) \
130: for (k=j; k<j+BS; k++) { \
131: type t = u[ii*bs+k]; \
132: u[ii*bs+k] = t + p[i*bs+k]; \
133: p[i*bs+k] = t; \
134: } \
135: } \
136: } \
137: static void CPPJoin3_(UnpackMult_,type,BS)(PetscInt n,PetscInt bs,const PetscInt *idx,void *unpacked,const void *packed) { \
138: type *u = (type*)unpacked; \
139: const type *p = (const type*)packed; \
140: PetscInt i,j,k; \
141: for (i=0; i<n; i++) \
142: for (j=0; j<bs; j+=BS) \
143: for (k=j; k<j+BS; k++) \
144: u[idx[i]*bs+k] *= p[i*bs+k]; \
145: } \
146: static void CPPJoin3_(FetchAndMult_,type,BS)(PetscInt n,PetscInt bs,const PetscInt *idx,void *unpacked,void *packed) { \
147: type *u = (type*)unpacked; \
148: type *p = (type*)packed; \
149: PetscInt i,j,k; \
150: for (i=0; i<n; i++) { \
151: PetscInt ii = idx[i]; \
152: for (j=0; j<bs; j+=BS) \
153: for (k=j; k<j+BS; k++) { \
154: type t = u[ii*bs+k]; \
155: u[ii*bs+k] = t * p[i*bs+k]; \
156: p[i*bs+k] = t; \
157: } \
158: } \
159: }
160: #define DEF_Pack(type,BS) \
161: DEF_PackAddNoInit(type,BS) \
162: static void CPPJoin3_(PackInit_,type,BS)(PetscSFBasicPack link) { \
163: link->Pack = CPPJoin3_(Pack_,type,BS); \
164: link->UnpackInsert = CPPJoin3_(UnpackInsert_,type,BS); \
165: link->UnpackAdd = CPPJoin3_(UnpackAdd_,type,BS); \
166: link->UnpackMult = CPPJoin3_(UnpackMult_,type,BS); \
167: link->FetchAndInsert = CPPJoin3_(FetchAndInsert_,type,BS); \
168: link->FetchAndAdd = CPPJoin3_(FetchAndAdd_,type,BS); \
169: link->FetchAndMult = CPPJoin3_(FetchAndMult_,type,BS); \
170: link->unitbytes = sizeof(type); \
171: }
172: /* Comparable types */
173: #define DEF_PackCmp(type) \
174: DEF_PackAddNoInit(type,1) \
175: static void CPPJoin2(UnpackMax_,type)(PetscInt n,PetscInt bs,const PetscInt *idx,void *unpacked,const void *packed) { \
176: type *u = (type*)unpacked; \
177: const type *p = (const type*)packed; \
178: PetscInt i; \
179: for (i=0; i<n; i++) { \
180: type v = u[idx[i]]; \
181: u[idx[i]] = PetscMax(v,p[i]); \
182: } \
183: } \
184: static void CPPJoin2(UnpackMin_,type)(PetscInt n,PetscInt bs,const PetscInt *idx,void *unpacked,const void *packed) { \
185: type *u = (type*)unpacked; \
186: const type *p = (const type*)packed; \
187: PetscInt i; \
188: for (i=0; i<n; i++) { \
189: type v = u[idx[i]]; \
190: u[idx[i]] = PetscMin(v,p[i]); \
191: } \
192: } \
193: static void CPPJoin2(FetchAndMax_,type)(PetscInt n,PetscInt bs,const PetscInt *idx,void *unpacked,void *packed) { \
194: type *u = (type*)unpacked; \
195: type *p = (type*)packed; \
196: PetscInt i; \
197: for (i=0; i<n; i++) { \
198: PetscInt j = idx[i]; \
199: type v = u[j]; \
200: u[j] = PetscMax(v,p[i]); \
201: p[i] = v; \
202: } \
203: } \
204: static void CPPJoin2(FetchAndMin_,type)(PetscInt n,PetscInt bs,const PetscInt *idx,void *unpacked,void *packed) { \
205: type *u = (type*)unpacked; \
206: type *p = (type*)packed; \
207: PetscInt i; \
208: for (i=0; i<n; i++) { \
209: PetscInt j = idx[i]; \
210: type v = u[j]; \
211: u[j] = PetscMin(v,p[i]); \
212: p[i] = v; \
213: } \
214: } \
215: static void CPPJoin2(PackInit_,type)(PetscSFBasicPack link) { \
216: link->Pack = CPPJoin3_(Pack_,type,1); \
217: link->UnpackInsert = CPPJoin3_(UnpackInsert_,type,1); \
218: link->UnpackAdd = CPPJoin3_(UnpackAdd_,type,1); \
219: link->UnpackMax = CPPJoin2(UnpackMax_,type); \
220: link->UnpackMin = CPPJoin2(UnpackMin_,type); \
221: link->UnpackMult = CPPJoin3_(UnpackMult_,type,1); \
222: link->FetchAndInsert = CPPJoin3_(FetchAndInsert_,type,1); \
223: link->FetchAndAdd = CPPJoin3_(FetchAndAdd_ ,type,1); \
224: link->FetchAndMax = CPPJoin2(FetchAndMax_ ,type); \
225: link->FetchAndMin = CPPJoin2(FetchAndMin_ ,type); \
226: link->FetchAndMult = CPPJoin3_(FetchAndMult_,type,1); \
227: link->unitbytes = sizeof(type); \
228: }
230: /* Logical Types */
231: #define DEF_PackLog(type) \
232: static void CPPJoin2(UnpackLAND_,type)(PetscInt n,PetscInt bs,const PetscInt *idx,void *unpacked,const void *packed) { \
233: type *u = (type*)unpacked; \
234: const type *p = (const type*)packed; \
235: PetscInt i; \
236: for (i=0; i<n; i++) { \
237: type v = u[idx[i]]; \
238: u[idx[i]] = v && p[i]; \
239: } \
240: } \
241: static void CPPJoin2(UnpackLOR_,type)(PetscInt n,PetscInt bs,const PetscInt *idx,void *unpacked,const void *packed) { \
242: type *u = (type*)unpacked; \
243: const type *p = (const type*)packed; \
244: PetscInt i; \
245: for (i=0; i<n; i++) { \
246: type v = u[idx[i]]; \
247: u[idx[i]] = v || p[i]; \
248: } \
249: } \
250: static void CPPJoin2(UnpackLXOR_,type)(PetscInt n,PetscInt bs,const PetscInt *idx,void *unpacked,const void *packed) { \
251: type *u = (type*)unpacked; \
252: const type *p = (const type*)packed; \
253: PetscInt i; \
254: for (i=0; i<n; i++) { \
255: type v = u[idx[i]]; \
256: u[idx[i]] = (!v)!=(!p[i]); \
257: } \
258: } \
259: static void CPPJoin2(FetchAndLAND_,type)(PetscInt n,PetscInt bs,const PetscInt *idx,void *unpacked,void *packed) { \
260: type *u = (type*)unpacked; \
261: type *p = (type*)packed; \
262: PetscInt i; \
263: for (i=0; i<n; i++) { \
264: PetscInt j = idx[i]; \
265: type v = u[j]; \
266: u[j] = v && p[i]; \
267: p[i] = v; \
268: } \
269: } \
270: static void CPPJoin2(FetchAndLOR_,type)(PetscInt n,PetscInt bs,const PetscInt *idx,void *unpacked,void *packed) { \
271: type *u = (type*)unpacked; \
272: type *p = (type*)packed; \
273: PetscInt i; \
274: for (i=0; i<n; i++) { \
275: PetscInt j = idx[i]; \
276: type v = u[j]; \
277: u[j] = v || p[i]; \
278: p[i] = v; \
279: } \
280: } \
281: static void CPPJoin2(FetchAndLXOR_,type)(PetscInt n,PetscInt bs,const PetscInt *idx,void *unpacked,void *packed) { \
282: type *u = (type*)unpacked; \
283: type *p = (type*)packed; \
284: PetscInt i; \
285: for (i=0; i<n; i++) { \
286: PetscInt j = idx[i]; \
287: type v = u[j]; \
288: u[j] = (!v)!=(!p[i]); \
289: p[i] = v; \
290: } \
291: } \
292: static void CPPJoin2(PackInit_Logical_,type)(PetscSFBasicPack link) { \
293: link->UnpackLAND = CPPJoin2(UnpackLAND_,type); \
294: link->UnpackLOR = CPPJoin2(UnpackLOR_,type); \
295: link->UnpackLXOR = CPPJoin2(UnpackLXOR_,type); \
296: link->FetchAndLAND = CPPJoin2(FetchAndLAND_,type); \
297: link->FetchAndLOR = CPPJoin2(FetchAndLOR_,type); \
298: link->FetchAndLXOR = CPPJoin2(FetchAndLXOR_,type); \
299: }
302: /* Bitwise Types */
303: #define DEF_PackBit(type) \
304: static void CPPJoin2(UnpackBAND_,type)(PetscInt n,PetscInt bs,const PetscInt *idx,void *unpacked,const void *packed) { \
305: type *u = (type*)unpacked; \
306: const type *p = (const type*)packed; \
307: PetscInt i; \
308: for (i=0; i<n; i++) { \
309: type v = u[idx[i]]; \
310: u[idx[i]] = v & p[i]; \
311: } \
312: } \
313: static void CPPJoin2(UnpackBOR_,type)(PetscInt n,PetscInt bs,const PetscInt *idx,void *unpacked,const void *packed) { \
314: type *u = (type*)unpacked; \
315: const type *p = (const type*)packed; \
316: PetscInt i; \
317: for (i=0; i<n; i++) { \
318: type v = u[idx[i]]; \
319: u[idx[i]] = v | p[i]; \
320: } \
321: } \
322: static void CPPJoin2(UnpackBXOR_,type)(PetscInt n,PetscInt bs,const PetscInt *idx,void *unpacked,const void *packed) { \
323: type *u = (type*)unpacked; \
324: const type *p = (const type*)packed; \
325: PetscInt i; \
326: for (i=0; i<n; i++) { \
327: type v = u[idx[i]]; \
328: u[idx[i]] = v^p[i]; \
329: } \
330: } \
331: static void CPPJoin2(FetchAndBAND_,type)(PetscInt n,PetscInt bs,const PetscInt *idx,void *unpacked,void *packed) { \
332: type *u = (type*)unpacked; \
333: type *p = (type*)packed; \
334: PetscInt i; \
335: for (i=0; i<n; i++) { \
336: PetscInt j = idx[i]; \
337: type v = u[j]; \
338: u[j] = v & p[i]; \
339: p[i] = v; \
340: } \
341: } \
342: static void CPPJoin2(FetchAndBOR_,type)(PetscInt n,PetscInt bs,const PetscInt *idx,void *unpacked,void *packed) { \
343: type *u = (type*)unpacked; \
344: type *p = (type*)packed; \
345: PetscInt i; \
346: for (i=0; i<n; i++) { \
347: PetscInt j = idx[i]; \
348: type v = u[j]; \
349: u[j] = v | p[i]; \
350: p[i] = v; \
351: } \
352: } \
353: static void CPPJoin2(FetchAndBXOR_,type)(PetscInt n,PetscInt bs,const PetscInt *idx,void *unpacked,void *packed) { \
354: type *u = (type*)unpacked; \
355: type *p = (type*)packed; \
356: PetscInt i; \
357: for (i=0; i<n; i++) { \
358: PetscInt j = idx[i]; \
359: type v = u[j]; \
360: u[j] = v^p[i]; \
361: p[i] = v; \
362: } \
363: } \
364: static void CPPJoin2(PackInit_Bitwise_,type)(PetscSFBasicPack link) { \
365: link->UnpackBAND = CPPJoin2(UnpackBAND_,type); \
366: link->UnpackBOR = CPPJoin2(UnpackBOR_,type); \
367: link->UnpackBXOR = CPPJoin2(UnpackBXOR_,type); \
368: link->FetchAndBAND = CPPJoin2(FetchAndBAND_,type); \
369: link->FetchAndBOR = CPPJoin2(FetchAndBOR_,type); \
370: link->FetchAndBXOR = CPPJoin2(FetchAndBXOR_,type); \
371: }
373: /* Pair types */
374: #define CPPJoinloc_exp(base,op,t1,t2) base ## op ## loc_ ## t1 ## _ ## t2
375: #define CPPJoinloc(base,op,t1,t2) CPPJoinloc_exp(base,op,t1,t2)
376: #define PairType(type1,type2) CPPJoin3_(_pairtype_,type1,type2)
377: #define DEF_UnpackXloc(type1,type2,locname,op) \
378: static void CPPJoinloc(Unpack,locname,type1,type2)(PetscInt n,PetscInt bs,const PetscInt *idx,void *unpacked,const void *packed) { \
379: PairType(type1,type2) *u = (PairType(type1,type2)*)unpacked; \
380: const PairType(type1,type2) *p = (const PairType(type1,type2)*)packed; \
381: PetscInt i; \
382: for (i=0; i<n; i++) { \
383: PetscInt j = idx[i]; \
384: if (p[i].a op u[j].a) { \
385: u[j].a = p[i].a; \
386: u[j].b = p[i].b; \
387: } else if (u[j].a == p[i].a) { \
388: u[j].b = PetscMin(u[j].b,p[i].b); \
389: } \
390: } \
391: } \
392: static void CPPJoinloc(FetchAnd,locname,type1,type2)(PetscInt n,PetscInt bs,const PetscInt *idx,void *unpacked,void *packed) { \
393: PairType(type1,type2) *u = (PairType(type1,type2)*)unpacked; \
394: PairType(type1,type2) *p = (PairType(type1,type2)*)packed; \
395: PetscInt i; \
396: for (i=0; i<n; i++) { \
397: PetscInt j = idx[i]; \
398: PairType(type1,type2) v; \
399: v.a = u[j].a; \
400: v.b = u[j].b; \
401: if (p[i].a op u[j].a) { \
402: u[j].a = p[i].a; \
403: u[j].b = p[i].b; \
404: } else if (u[j].a == p[i].a) { \
405: u[j].b = PetscMin(u[j].b,p[i].b); \
406: } \
407: p[i].a = v.a; \
408: p[i].b = v.b; \
409: } \
410: }
411: #define DEF_PackPair(type1,type2) \
412: typedef struct {type1 a; type2 b;} PairType(type1,type2); \
413: static void CPPJoin3_(Pack_,type1,type2)(PetscInt n,PetscInt bs,const PetscInt *idx,const void *unpacked,void *packed) { \
414: const PairType(type1,type2) *u = (const PairType(type1,type2)*)unpacked; \
415: PairType(type1,type2) *p = (PairType(type1,type2)*)packed; \
416: PetscInt i; \
417: for (i=0; i<n; i++) { \
418: p[i].a = u[idx[i]].a; \
419: p[i].b = u[idx[i]].b; \
420: } \
421: } \
422: static void CPPJoin3_(UnpackInsert_,type1,type2)(PetscInt n,PetscInt bs,const PetscInt *idx,void *unpacked,const void *packed) { \
423: PairType(type1,type2) *u = (PairType(type1,type2)*)unpacked; \
424: const PairType(type1,type2) *p = (const PairType(type1,type2)*)packed; \
425: PetscInt i; \
426: for (i=0; i<n; i++) { \
427: u[idx[i]].a = p[i].a; \
428: u[idx[i]].b = p[i].b; \
429: } \
430: } \
431: static void CPPJoin3_(UnpackAdd_,type1,type2)(PetscInt n,PetscInt bs,const PetscInt *idx,void *unpacked,const void *packed) { \
432: PairType(type1,type2) *u = (PairType(type1,type2)*)unpacked; \
433: const PairType(type1,type2) *p = (const PairType(type1,type2)*)packed; \
434: PetscInt i; \
435: for (i=0; i<n; i++) { \
436: u[idx[i]].a += p[i].a; \
437: u[idx[i]].b += p[i].b; \
438: } \
439: } \
440: static void CPPJoin3_(FetchAndInsert_,type1,type2)(PetscInt n,PetscInt bs,const PetscInt *idx,void *unpacked,void *packed) { \
441: PairType(type1,type2) *u = (PairType(type1,type2)*)unpacked; \
442: PairType(type1,type2) *p = (PairType(type1,type2)*)packed; \
443: PetscInt i; \
444: for (i=0; i<n; i++) { \
445: PetscInt j = idx[i]; \
446: PairType(type1,type2) v; \
447: v.a = u[j].a; \
448: v.b = u[j].b; \
449: u[j].a = p[i].a; \
450: u[j].b = p[i].b; \
451: p[i].a = v.a; \
452: p[i].b = v.b; \
453: } \
454: } \
455: static void FetchAndAdd_ ## type1 ## _ ## type2(PetscInt n,PetscInt bs,const PetscInt *idx,void *unpacked,void *packed) { \
456: PairType(type1,type2) *u = (PairType(type1,type2)*)unpacked; \
457: PairType(type1,type2) *p = (PairType(type1,type2)*)packed; \
458: PetscInt i; \
459: for (i=0; i<n; i++) { \
460: PetscInt j = idx[i]; \
461: PairType(type1,type2) v; \
462: v.a = u[j].a; \
463: v.b = u[j].b; \
464: u[j].a = v.a + p[i].a; \
465: u[j].b = v.b + p[i].b; \
466: p[i].a = v.a; \
467: p[i].b = v.b; \
468: } \
469: } \
470: DEF_UnpackXloc(type1,type2,Max,>) \
471: DEF_UnpackXloc(type1,type2,Min,<) \
472: static void CPPJoin3_(PackInit_,type1,type2)(PetscSFBasicPack link) { \
473: link->Pack = CPPJoin3_(Pack_,type1,type2); \
474: link->UnpackInsert = CPPJoin3_(UnpackInsert_,type1,type2); \
475: link->UnpackAdd = CPPJoin3_(UnpackAdd_,type1,type2); \
476: link->UnpackMaxloc = CPPJoin3_(UnpackMaxloc_,type1,type2); \
477: link->UnpackMinloc = CPPJoin3_(UnpackMinloc_,type1,type2); \
478: link->FetchAndInsert = CPPJoin3_(FetchAndInsert_,type1,type2); \
479: link->FetchAndAdd = CPPJoin3_(FetchAndAdd_,type1,type2); \
480: link->FetchAndMaxloc = CPPJoin3_(FetchAndMaxloc_,type1,type2); \
481: link->FetchAndMinloc = CPPJoin3_(FetchAndMinloc_,type1,type2); \
482: link->unitbytes = sizeof(PairType(type1,type2)); \
483: }
485: /* Currently only dumb blocks of data */
486: #define BlockType(unit,count) CPPJoin3_(_blocktype_,unit,count)
487: #define DEF_Block(unit,count) \
488: typedef struct {unit v[count];} BlockType(unit,count); \
489: DEF_PackNoInit(BlockType(unit,count),1) \
490: static void CPPJoin3_(PackInit_block_,unit,count)(PetscSFBasicPack link) { \
491: link->Pack = CPPJoin3_(Pack_,BlockType(unit,count),1); \
492: link->UnpackInsert = CPPJoin3_(UnpackInsert_,BlockType(unit,count),1); \
493: link->FetchAndInsert = CPPJoin3_(FetchAndInsert_,BlockType(unit,count),1); \
494: link->unitbytes = sizeof(BlockType(unit,count)); \
495: }
497: DEF_PackCmp(int)
498: DEF_PackBit(int)
499: DEF_PackLog(int)
500: DEF_PackCmp(PetscInt)
501: DEF_PackBit(PetscInt)
502: DEF_PackLog(PetscInt)
503: DEF_Pack(PetscInt,2)
504: DEF_Pack(PetscInt,3)
505: DEF_Pack(PetscInt,4)
506: DEF_Pack(PetscInt,5)
507: DEF_Pack(PetscInt,7)
508: DEF_PackCmp(PetscReal)
509: DEF_PackLog(PetscReal)
510: DEF_Pack(PetscReal,2)
511: DEF_Pack(PetscReal,3)
512: DEF_Pack(PetscReal,4)
513: DEF_Pack(PetscReal,5)
514: DEF_Pack(PetscReal,7)
515: #if defined(PETSC_HAVE_COMPLEX)
516: DEF_Pack(PetscComplex,1)
517: DEF_Pack(PetscComplex,2)
518: DEF_Pack(PetscComplex,3)
519: DEF_Pack(PetscComplex,4)
520: DEF_Pack(PetscComplex,5)
521: DEF_Pack(PetscComplex,7)
522: #endif
523: DEF_PackPair(int,int)
524: DEF_PackPair(PetscInt,PetscInt)
525: DEF_Block(int,1)
526: DEF_Block(int,2)
527: DEF_Block(int,3)
528: DEF_Block(int,4)
529: DEF_Block(int,5)
530: DEF_Block(int,6)
531: DEF_Block(int,7)
532: DEF_Block(int,8)
534: static PetscErrorCode PetscSFSetUp_Basic(PetscSF sf)
535: {
536: PetscSF_Basic *bas = (PetscSF_Basic*)sf->data;
538: PetscInt *rlengths,*ilengths,i;
539: PetscMPIInt rank,niranks,*iranks;
540: MPI_Comm comm;
541: MPI_Group group;
542: MPI_Request *rootreqs,*leafreqs;
545: MPI_Comm_group(PETSC_COMM_SELF,&group);
546: PetscSFSetUpRanks(sf,group);
547: MPI_Group_free(&group);
548: PetscObjectGetComm((PetscObject)sf,&comm);
549: PetscObjectGetNewTag((PetscObject)sf,&bas->tag);
550: MPI_Comm_rank(comm,&rank);
551: /*
552: * Inform roots about how many leaves and from which ranks
553: */
554: PetscMalloc1(sf->nranks,&rlengths);
555: /* Determine number, sending ranks, and length of incoming */
556: for (i=0; i<sf->nranks; i++) {
557: rlengths[i] = sf->roffset[i+1] - sf->roffset[i]; /* Number of roots referenced by my leaves; for rank sf->ranks[i] */
558: }
559: PetscCommBuildTwoSided(comm,1,MPIU_INT,sf->nranks-sf->ndranks,sf->ranks+sf->ndranks,rlengths+sf->ndranks,&niranks,&iranks,(void**)&ilengths);
561: /* Partition into distinguished and non-distinguished incoming ranks */
562: bas->ndiranks = sf->ndranks;
563: bas->niranks = bas->ndiranks + niranks;
564: PetscMalloc2(bas->niranks,&bas->iranks,bas->niranks+1,&bas->ioffset);
565: bas->ioffset[0] = 0;
566: for (i=0; i<bas->ndiranks; i++) {
567: bas->iranks[i] = sf->ranks[i];
568: bas->ioffset[i+1] = bas->ioffset[i] + rlengths[i];
569: }
570: if (bas->ndiranks > 1 || (bas->ndiranks == 1 && bas->iranks[0] != rank)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Broken setup for shared ranks");
571: for ( ; i<bas->niranks; i++) {
572: bas->iranks[i] = iranks[i-bas->ndiranks];
573: bas->ioffset[i+1] = bas->ioffset[i] + ilengths[i-bas->ndiranks];
574: }
575: bas->itotal = bas->ioffset[i];
576: PetscFree(rlengths);
577: PetscFree(iranks);
578: PetscFree(ilengths);
580: /* Send leaf identities to roots */
581: PetscMalloc1(bas->itotal,&bas->irootloc);
582: PetscMalloc2(bas->niranks-bas->ndiranks,&rootreqs,sf->nranks-sf->ndranks,&leafreqs);
583: for (i=bas->ndiranks; i<bas->niranks; i++) {
584: MPI_Irecv(bas->irootloc+bas->ioffset[i],bas->ioffset[i+1]-bas->ioffset[i],MPIU_INT,bas->iranks[i],bas->tag,comm,&rootreqs[i-bas->ndiranks]);
585: }
586: for (i=0; i<sf->nranks; i++) {
587: PetscMPIInt npoints;
588: PetscMPIIntCast(sf->roffset[i+1] - sf->roffset[i],&npoints);
589: if (i < sf->ndranks) {
590: if (sf->ranks[i] != rank) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot interpret distinguished leaf rank");
591: if (bas->iranks[0] != rank) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot interpret distinguished root rank");
592: if (npoints != bas->ioffset[1]-bas->ioffset[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Distinguished rank exchange has mismatched lengths");
593: PetscMemcpy(bas->irootloc+bas->ioffset[0],sf->rremote+sf->roffset[i],npoints*sizeof(bas->irootloc[0]));
594: continue;
595: }
596: MPI_Isend(sf->rremote+sf->roffset[i],npoints,MPIU_INT,sf->ranks[i],bas->tag,comm,&leafreqs[i-sf->ndranks]);
597: }
598: MPI_Waitall(bas->niranks-bas->ndiranks,rootreqs,MPI_STATUSES_IGNORE);
599: MPI_Waitall(sf->nranks-sf->ndranks,leafreqs,MPI_STATUSES_IGNORE);
600: PetscFree2(rootreqs,leafreqs);
601: return(0);
602: }
604: static PetscErrorCode PetscSFBasicPackTypeSetup(PetscSFBasicPack link,MPI_Datatype unit)
605: {
607: PetscBool isInt,isPetscInt,isPetscReal,is2Int,is2PetscInt;
608: PetscInt nPetscIntContig,nPetscRealContig;
609: #if defined(PETSC_HAVE_COMPLEX)
610: PetscBool isPetscComplex;
611: PetscInt nPetscComplexContig;
612: #endif
615: MPIPetsc_Type_compare(unit,MPI_INT,&isInt);
616: MPIPetsc_Type_compare(unit,MPIU_INT,&isPetscInt);
617: MPIPetsc_Type_compare_contig(unit,MPIU_INT,&nPetscIntContig);
618: MPIPetsc_Type_compare(unit,MPIU_REAL,&isPetscReal);
619: MPIPetsc_Type_compare_contig(unit,MPIU_REAL,&nPetscRealContig);
620: #if defined(PETSC_HAVE_COMPLEX)
621: MPIPetsc_Type_compare(unit,MPIU_COMPLEX,&isPetscComplex);
622: MPIPetsc_Type_compare_contig(unit,MPIU_COMPLEX,&nPetscComplexContig);
623: #endif
624: MPIPetsc_Type_compare(unit,MPI_2INT,&is2Int);
625: MPIPetsc_Type_compare(unit,MPIU_2INT,&is2PetscInt);
626: link->bs = 1;
627: if (isInt) {PackInit_int(link); PackInit_Logical_int(link); PackInit_Bitwise_int(link);}
628: else if (isPetscInt) {PackInit_PetscInt(link); PackInit_Logical_PetscInt(link); PackInit_Bitwise_PetscInt(link);}
629: else if (isPetscReal) {PackInit_PetscReal(link); PackInit_Logical_PetscReal(link);}
630: #if defined(PETSC_HAVE_COMPLEX)
631: else if (isPetscComplex) PackInit_PetscComplex_1(link);
632: #endif
633: else if (is2Int) PackInit_int_int(link);
634: else if (is2PetscInt) PackInit_PetscInt_PetscInt(link);
635: else if (nPetscIntContig) {
636: if (nPetscIntContig%7 == 0) PackInit_PetscInt_7(link);
637: else if (nPetscIntContig%5 == 0) PackInit_PetscInt_5(link);
638: else if (nPetscIntContig%4 == 0) PackInit_PetscInt_4(link);
639: else if (nPetscIntContig%3 == 0) PackInit_PetscInt_3(link);
640: else if (nPetscIntContig%2 == 0) PackInit_PetscInt_2(link);
641: else PackInit_PetscInt(link);
642: link->bs = nPetscIntContig;
643: link->unitbytes *= nPetscIntContig;
644: } else if (nPetscRealContig) {
645: if (nPetscRealContig%7 == 0) PackInit_PetscReal_7(link);
646: else if (nPetscRealContig%5 == 0) PackInit_PetscReal_5(link);
647: else if (nPetscRealContig%4 == 0) PackInit_PetscReal_4(link);
648: else if (nPetscRealContig%3 == 0) PackInit_PetscReal_3(link);
649: else if (nPetscRealContig%2 == 0) PackInit_PetscReal_2(link);
650: else PackInit_PetscReal(link);
651: link->bs = nPetscRealContig;
652: link->unitbytes *= nPetscRealContig;
653: #if defined(PETSC_HAVE_COMPLEX)
654: } else if (nPetscComplexContig) {
655: if (nPetscComplexContig%7 == 0) PackInit_PetscComplex_7(link);
656: else if (nPetscComplexContig%5 == 0) PackInit_PetscComplex_5(link);
657: else if (nPetscComplexContig%4 == 0) PackInit_PetscComplex_4(link);
658: else if (nPetscComplexContig%3 == 0) PackInit_PetscComplex_3(link);
659: else if (nPetscComplexContig%2 == 0) PackInit_PetscComplex_2(link);
660: else PackInit_PetscComplex_1(link);
661: link->bs = nPetscComplexContig;
662: link->unitbytes *= nPetscComplexContig;
663: #endif
664: } else {
665: MPI_Aint lb,bytes;
666: MPI_Type_get_extent(unit,&lb,&bytes);
667: if (lb != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Datatype with nonzero lower bound %ld\n",(long)lb);
668: if (bytes % sizeof(int)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for type size not divisible by %D",sizeof(int));
669: switch (bytes / sizeof(int)) {
670: case 1: PackInit_block_int_1(link); break;
671: case 2: PackInit_block_int_2(link); break;
672: case 3: PackInit_block_int_3(link); break;
673: case 4: PackInit_block_int_4(link); break;
674: case 5: PackInit_block_int_5(link); break;
675: case 6: PackInit_block_int_6(link); break;
676: case 7: PackInit_block_int_7(link); break;
677: case 8: PackInit_block_int_8(link); break;
678: default: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for arbitrary block sizes");
679: }
680: }
681: MPI_Type_dup(unit,&link->unit);
682: return(0);
683: }
685: static PetscErrorCode PetscSFBasicPackGetUnpackOp(PetscSF sf,PetscSFBasicPack link,MPI_Op op,void (**UnpackOp)(PetscInt,PetscInt,const PetscInt*,void*,const void*))
686: {
688: *UnpackOp = NULL;
689: if (op == MPIU_REPLACE) *UnpackOp = link->UnpackInsert;
690: else if (op == MPI_SUM || op == MPIU_SUM) *UnpackOp = link->UnpackAdd;
691: else if (op == MPI_PROD) *UnpackOp = link->UnpackMult;
692: else if (op == MPI_MAX || op == MPIU_MAX) *UnpackOp = link->UnpackMax;
693: else if (op == MPI_MIN || op == MPIU_MIN) *UnpackOp = link->UnpackMin;
694: else if (op == MPI_LAND) *UnpackOp = link->UnpackLAND;
695: else if (op == MPI_BAND) *UnpackOp = link->UnpackBAND;
696: else if (op == MPI_LOR) *UnpackOp = link->UnpackLOR;
697: else if (op == MPI_BOR) *UnpackOp = link->UnpackBOR;
698: else if (op == MPI_LXOR) *UnpackOp = link->UnpackLXOR;
699: else if (op == MPI_BXOR) *UnpackOp = link->UnpackBXOR;
700: else if (op == MPI_MAXLOC) *UnpackOp = link->UnpackMaxloc;
701: else if (op == MPI_MINLOC) *UnpackOp = link->UnpackMinloc;
702: else *UnpackOp = NULL;
703: return(0);
704: }
705: static PetscErrorCode PetscSFBasicPackGetFetchAndOp(PetscSF sf,PetscSFBasicPack link,MPI_Op op,void (**FetchAndOp)(PetscInt,PetscInt,const PetscInt*,void*,void*))
706: {
708: *FetchAndOp = NULL;
709: if (op == MPIU_REPLACE) *FetchAndOp = link->FetchAndInsert;
710: else if (op == MPI_SUM || op == MPIU_SUM) *FetchAndOp = link->FetchAndAdd;
711: else if (op == MPI_MAX || op == MPIU_MAX) *FetchAndOp = link->FetchAndMax;
712: else if (op == MPI_MIN || op == MPIU_MIN) *FetchAndOp = link->FetchAndMin;
713: else if (op == MPI_MAXLOC) *FetchAndOp = link->FetchAndMaxloc;
714: else if (op == MPI_MINLOC) *FetchAndOp = link->FetchAndMinloc;
715: else if (op == MPI_PROD) *FetchAndOp = link->FetchAndMult;
716: else if (op == MPI_LAND) *FetchAndOp = link->FetchAndLAND;
717: else if (op == MPI_BAND) *FetchAndOp = link->FetchAndBAND;
718: else if (op == MPI_LOR) *FetchAndOp = link->FetchAndLOR;
719: else if (op == MPI_BOR) *FetchAndOp = link->FetchAndBOR;
720: else if (op == MPI_LXOR) *FetchAndOp = link->FetchAndLXOR;
721: else if (op == MPI_BXOR) *FetchAndOp = link->FetchAndBXOR;
722: else SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_SUP,"No support for MPI_Op");
723: return(0);
724: }
726: static PetscErrorCode PetscSFBasicPackGetReqs(PetscSF sf,PetscSFBasicPack link,MPI_Request **rootreqs,MPI_Request **leafreqs)
727: {
728: PetscSF_Basic *bas = (PetscSF_Basic*)sf->data;
731: if (rootreqs) *rootreqs = link->requests;
732: if (leafreqs) *leafreqs = link->requests + (bas->niranks - bas->ndiranks);
733: return(0);
734: }
736: static PetscErrorCode PetscSFBasicPackWaitall(PetscSF sf,PetscSFBasicPack link)
737: {
738: PetscSF_Basic *bas = (PetscSF_Basic*)sf->data;
742: MPI_Waitall(bas->niranks+sf->nranks-(bas->ndiranks+sf->ndranks),link->requests,MPI_STATUSES_IGNORE);
743: return(0);
744: }
746: static PetscErrorCode PetscSFBasicGetRootInfo(PetscSF sf,PetscInt *nrootranks,PetscInt *ndrootranks,const PetscMPIInt **rootranks,const PetscInt **rootoffset,const PetscInt **rootloc)
747: {
748: PetscSF_Basic *bas = (PetscSF_Basic*)sf->data;
751: if (nrootranks) *nrootranks = bas->niranks;
752: if (ndrootranks) *ndrootranks = bas->ndiranks;
753: if (rootranks) *rootranks = bas->iranks;
754: if (rootoffset) *rootoffset = bas->ioffset;
755: if (rootloc) *rootloc = bas->irootloc;
756: return(0);
757: }
759: static PetscErrorCode PetscSFBasicGetLeafInfo(PetscSF sf,PetscInt *nleafranks,PetscInt *ndleafranks,const PetscMPIInt **leafranks,const PetscInt **leafoffset,const PetscInt **leafloc)
760: {
762: if (nleafranks) *nleafranks = sf->nranks;
763: if (ndleafranks) *ndleafranks = sf->ndranks;
764: if (leafranks) *leafranks = sf->ranks;
765: if (leafoffset) *leafoffset = sf->roffset;
766: if (leafloc) *leafloc = sf->rmine;
767: return(0);
768: }
770: static PetscErrorCode PetscSFBasicGetPack(PetscSF sf,MPI_Datatype unit,const void *key,PetscSFBasicPack *mylink)
771: {
772: PetscSF_Basic *bas = (PetscSF_Basic*)sf->data;
773: PetscErrorCode ierr;
774: PetscSFBasicPack link,*p;
775: PetscInt nrootranks,ndrootranks,nleafranks,ndleafranks,i;
776: const PetscInt *rootoffset,*leafoffset;
779: /* Look for types in cache */
780: for (p=&bas->avail; (link=*p); p=&link->next) {
781: PetscBool match;
782: MPIPetsc_Type_compare(unit,link->unit,&match);
783: if (match) {
784: *p = link->next; /* Remove from available list */
785: goto found;
786: }
787: }
789: /* Create new composite types for each send rank */
790: PetscSFBasicGetRootInfo(sf,&nrootranks,&ndrootranks,NULL,&rootoffset,NULL);
791: PetscSFBasicGetLeafInfo(sf,&nleafranks,&ndleafranks,NULL,&leafoffset,NULL);
792: PetscNew(&link);
793: PetscSFBasicPackTypeSetup(link,unit);
794: PetscMalloc2(nrootranks,&link->root,nleafranks,&link->leaf);
795: for (i=0; i<nrootranks; i++) {
796: PetscMalloc((rootoffset[i+1]-rootoffset[i])*link->unitbytes,&link->root[i]);
797: }
798: for (i=0; i<nleafranks; i++) {
799: if (i < ndleafranks) { /* Leaf buffers for distinguished ranks are pointers directly into root buffers */
800: if (ndrootranks != 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot match distinguished ranks");
801: link->leaf[i] = link->root[0];
802: continue;
803: }
804: PetscMalloc((leafoffset[i+1]-leafoffset[i])*link->unitbytes,&link->leaf[i]);
805: }
806: PetscCalloc1(nrootranks+nleafranks,&link->requests);
808: found:
809: link->key = key;
810: link->next = bas->inuse;
811: bas->inuse = link;
813: *mylink = link;
814: return(0);
815: }
817: static PetscErrorCode PetscSFBasicGetPackInUse(PetscSF sf,MPI_Datatype unit,const void *key,PetscCopyMode cmode,PetscSFBasicPack *mylink)
818: {
819: PetscSF_Basic *bas = (PetscSF_Basic*)sf->data;
820: PetscErrorCode ierr;
821: PetscSFBasicPack link,*p;
824: /* Look for types in cache */
825: for (p=&bas->inuse; (link=*p); p=&link->next) {
826: PetscBool match;
827: MPIPetsc_Type_compare(unit,link->unit,&match);
828: if (match && (key == link->key)) {
829: switch (cmode) {
830: case PETSC_OWN_POINTER: *p = link->next; break; /* Remove from inuse list */
831: case PETSC_USE_POINTER: break;
832: default: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_INCOMP,"invalid cmode");
833: }
834: *mylink = link;
835: return(0);
836: }
837: }
838: SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Could not find pack");
839: return(0);
840: }
842: static PetscErrorCode PetscSFBasicReclaimPack(PetscSF sf,PetscSFBasicPack *link)
843: {
844: PetscSF_Basic *bas = (PetscSF_Basic*)sf->data;
847: (*link)->key = NULL;
848: (*link)->next = bas->avail;
849: bas->avail = *link;
850: *link = NULL;
851: return(0);
852: }
854: static PetscErrorCode PetscSFSetFromOptions_Basic(PetscOptionItems *PetscOptionsObject,PetscSF sf)
855: {
859: PetscOptionsHead(PetscOptionsObject,"PetscSF Basic options");
860: PetscOptionsTail();
861: return(0);
862: }
864: static PetscErrorCode PetscSFReset_Basic(PetscSF sf)
865: {
866: PetscSF_Basic *bas = (PetscSF_Basic*)sf->data;
867: PetscErrorCode ierr;
868: PetscSFBasicPack link,next;
871: if (bas->inuse) SETERRQ(PetscObjectComm((PetscObject)sf),PETSC_ERR_ARG_WRONGSTATE,"Outstanding operation has not been completed");
872: PetscFree2(bas->iranks,bas->ioffset);
873: PetscFree(bas->irootloc);
874: for (link=bas->avail; link; link=next) {
875: PetscInt i;
876: next = link->next;
877: MPI_Type_free(&link->unit);
878: for (i=0; i<bas->niranks; i++) {PetscFree(link->root[i]);}
879: for (i=sf->ndranks; i<sf->nranks; i++) {PetscFree(link->leaf[i]);} /* Free only non-distinguished leaf buffers */
880: PetscFree2(link->root,link->leaf);
881: PetscFree(link->requests);
882: PetscFree(link);
883: }
884: bas->avail = NULL;
885: return(0);
886: }
888: static PetscErrorCode PetscSFDestroy_Basic(PetscSF sf)
889: {
893: PetscSFReset_Basic(sf);
894: PetscFree(sf->data);
895: return(0);
896: }
898: static PetscErrorCode PetscSFView_Basic(PetscSF sf,PetscViewer viewer)
899: {
900: /* PetscSF_Basic *bas = (PetscSF_Basic*)sf->data; */
902: PetscBool iascii;
905: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
906: if (iascii) {
907: PetscViewerASCIIPrintf(viewer," sort=%s\n",sf->rankorder ? "rank-order" : "unordered");
908: }
909: return(0);
910: }
912: /* Send from roots to leaves */
913: static PetscErrorCode PetscSFBcastBegin_Basic(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata)
914: {
915: PetscSF_Basic *bas = (PetscSF_Basic*)sf->data;
916: PetscErrorCode ierr;
917: PetscSFBasicPack link;
918: PetscInt i,nrootranks,ndrootranks,nleafranks,ndleafranks;
919: const PetscInt *rootoffset,*leafoffset,*rootloc,*leafloc;
920: const PetscMPIInt *rootranks,*leafranks;
921: MPI_Request *rootreqs,*leafreqs;
924: PetscSFBasicGetRootInfo(sf,&nrootranks,&ndrootranks,&rootranks,&rootoffset,&rootloc);
925: PetscSFBasicGetLeafInfo(sf,&nleafranks,&ndleafranks,&leafranks,&leafoffset,&leafloc);
926: PetscSFBasicGetPack(sf,unit,rootdata,&link);
928: PetscSFBasicPackGetReqs(sf,link,&rootreqs,&leafreqs);
929: /* Eagerly post leaf receives, but only from non-distinguished ranks -- distinguished ranks will receive via shared memory */
930: for (i=ndleafranks; i<nleafranks; i++) {
931: PetscMPIInt n = leafoffset[i+1] - leafoffset[i];
932: MPI_Irecv(link->leaf[i],n,unit,leafranks[i],bas->tag,PetscObjectComm((PetscObject)sf),&leafreqs[i-ndleafranks]);
933: }
934: /* Pack and send root data */
935: for (i=0; i<nrootranks; i++) {
936: PetscMPIInt n = rootoffset[i+1] - rootoffset[i];
937: void *packstart = link->root[i];
938: (*link->Pack)(n,link->bs,rootloc+rootoffset[i],rootdata,packstart);
939: if (i < ndrootranks) continue; /* shared memory */
940: MPI_Isend(packstart,n,unit,rootranks[i],bas->tag,PetscObjectComm((PetscObject)sf),&rootreqs[i-ndrootranks]);
941: }
942: return(0);
943: }
945: PetscErrorCode PetscSFBcastEnd_Basic(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata)
946: {
947: PetscErrorCode ierr;
948: PetscSFBasicPack link;
949: PetscInt i,nleafranks,ndleafranks;
950: const PetscInt *leafoffset,*leafloc;
953: PetscSFBasicGetPackInUse(sf,unit,rootdata,PETSC_OWN_POINTER,&link);
954: PetscSFBasicPackWaitall(sf,link);
955: PetscSFBasicGetLeafInfo(sf,&nleafranks,&ndleafranks,NULL,&leafoffset,&leafloc);
956: for (i=0; i<nleafranks; i++) {
957: PetscMPIInt n = leafoffset[i+1] - leafoffset[i];
958: const void *packstart = link->leaf[i];
959: (*link->UnpackInsert)(n,link->bs,leafloc+leafoffset[i],leafdata,packstart);
960: }
961: PetscSFBasicReclaimPack(sf,&link);
962: return(0);
963: }
965: /* leaf -> root with reduction */
966: PetscErrorCode PetscSFReduceBegin_Basic(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
967: {
968: PetscSF_Basic *bas = (PetscSF_Basic*)sf->data;
969: PetscSFBasicPack link;
970: PetscErrorCode ierr;
971: PetscInt i,nrootranks,ndrootranks,nleafranks,ndleafranks;
972: const PetscInt *rootoffset,*leafoffset,*rootloc,*leafloc;
973: const PetscMPIInt *rootranks,*leafranks;
974: MPI_Request *rootreqs,*leafreqs;
977: PetscSFBasicGetRootInfo(sf,&nrootranks,&ndrootranks,&rootranks,&rootoffset,&rootloc);
978: PetscSFBasicGetLeafInfo(sf,&nleafranks,&ndleafranks,&leafranks,&leafoffset,&leafloc);
979: PetscSFBasicGetPack(sf,unit,rootdata,&link);
981: PetscSFBasicPackGetReqs(sf,link,&rootreqs,&leafreqs);
982: /* Eagerly post root receives for non-distinguished ranks */
983: for (i=ndrootranks; i<nrootranks; i++) {
984: PetscMPIInt n = rootoffset[i+1] - rootoffset[i];
985: MPI_Irecv(link->root[i],n,unit,rootranks[i],bas->tag,PetscObjectComm((PetscObject)sf),&rootreqs[i-ndrootranks]);
986: }
987: /* Pack and send leaf data */
988: for (i=0; i<nleafranks; i++) {
989: PetscMPIInt n = leafoffset[i+1] - leafoffset[i];
990: void *packstart = link->leaf[i];
991: (*link->Pack)(n,link->bs,leafloc+leafoffset[i],leafdata,packstart);
992: if (i < ndleafranks) continue; /* shared memory */
993: MPI_Isend(packstart,n,unit,leafranks[i],bas->tag,PetscObjectComm((PetscObject)sf),&leafreqs[i-ndleafranks]);
994: }
995: return(0);
996: }
998: static PetscErrorCode PetscSFReduceEnd_Basic(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
999: {
1000: void (*UnpackOp)(PetscInt,PetscInt,const PetscInt*,void*,const void*);
1001: PetscErrorCode ierr;
1002: PetscSFBasicPack link;
1003: PetscInt i,nrootranks;
1004: PetscMPIInt typesize = -1;
1005: const PetscInt *rootoffset,*rootloc;
1008: PetscSFBasicGetPackInUse(sf,unit,rootdata,PETSC_OWN_POINTER,&link);
1009: /* This implementation could be changed to unpack as receives arrive, at the cost of non-determinism */
1010: PetscSFBasicPackWaitall(sf,link);
1011: PetscSFBasicGetRootInfo(sf,&nrootranks,NULL,NULL,&rootoffset,&rootloc);
1012: PetscSFBasicPackGetUnpackOp(sf,link,op,&UnpackOp);
1013: if (UnpackOp) {
1014: typesize = link->unitbytes;
1015: }
1016: else {
1017: MPI_Type_size(unit,&typesize);
1018: }
1019: for (i=0; i<nrootranks; i++) {
1020: PetscMPIInt n = rootoffset[i+1] - rootoffset[i];
1021: char *packstart = (char *) link->root[i];
1023: if (UnpackOp) {
1024: (*UnpackOp)(n,link->bs,rootloc+rootoffset[i],rootdata,(const void *)packstart);
1025: }
1026: #if defined(PETSC_HAVE_MPI_REDUCE_LOCAL)
1027: else if (n) { /* the op should be defined to operate on the whole datatype, so we ignore link->bs */
1028: PetscInt j;
1030: for (j = 0; j < n; j++) {
1031: MPI_Reduce_local(packstart+j*typesize,((char *) rootdata)+(rootloc[rootoffset[i]+j])*typesize,1,unit,op);
1032: }
1033: }
1034: #else
1035: else {
1036: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No unpacking reduction operation for this MPI_Op");
1037: }
1038: #endif
1039: }
1040: PetscSFBasicReclaimPack(sf,&link);
1041: return(0);
1042: }
1044: static PetscErrorCode PetscSFFetchAndOpBegin_Basic(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
1045: {
1049: PetscSFReduceBegin_Basic(sf,unit,leafdata,rootdata,op);
1050: return(0);
1051: }
1053: static PetscErrorCode PetscSFFetchAndOpEnd_Basic(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
1054: {
1055: PetscSF_Basic *bas = (PetscSF_Basic*)sf->data;
1056: void (*FetchAndOp)(PetscInt,PetscInt,const PetscInt*,void*,void*);
1057: PetscErrorCode ierr;
1058: PetscSFBasicPack link;
1059: PetscInt i,nrootranks,ndrootranks,nleafranks,ndleafranks;
1060: const PetscInt *rootoffset,*leafoffset,*rootloc,*leafloc;
1061: const PetscMPIInt *rootranks,*leafranks;
1062: MPI_Request *rootreqs,*leafreqs;
1065: PetscSFBasicGetPackInUse(sf,unit,rootdata,PETSC_OWN_POINTER,&link);
1066: /* This implementation could be changed to unpack as receives arrive, at the cost of non-determinism */
1067: PetscSFBasicPackWaitall(sf,link);
1068: PetscSFBasicGetRootInfo(sf,&nrootranks,&ndrootranks,&rootranks,&rootoffset,&rootloc);
1069: PetscSFBasicGetLeafInfo(sf,&nleafranks,&ndleafranks,&leafranks,&leafoffset,&leafloc);
1070: PetscSFBasicPackGetReqs(sf,link,&rootreqs,&leafreqs);
1071: /* Post leaf receives */
1072: for (i=ndleafranks; i<nleafranks; i++) {
1073: PetscMPIInt n = leafoffset[i+1] - leafoffset[i];
1074: MPI_Irecv(link->leaf[i],n,unit,leafranks[i],bas->tag,PetscObjectComm((PetscObject)sf),&leafreqs[i-ndleafranks]);
1075: }
1076: /* Process local fetch-and-op, post root sends */
1077: PetscSFBasicPackGetFetchAndOp(sf,link,op,&FetchAndOp);
1078: for (i=0; i<nrootranks; i++) {
1079: PetscMPIInt n = rootoffset[i+1] - rootoffset[i];
1080: void *packstart = link->root[i];
1082: (*FetchAndOp)(n,link->bs,rootloc+rootoffset[i],rootdata,packstart);
1083: if (i < ndrootranks) continue; /* shared memory */
1084: MPI_Isend(packstart,n,unit,rootranks[i],bas->tag,PetscObjectComm((PetscObject)sf),&rootreqs[i-ndrootranks]);
1085: }
1086: PetscSFBasicPackWaitall(sf,link);
1087: for (i=0; i<nleafranks; i++) {
1088: PetscMPIInt n = leafoffset[i+1] - leafoffset[i];
1089: const void *packstart = link->leaf[i];
1090: (*link->UnpackInsert)(n,link->bs,leafloc+leafoffset[i],leafupdate,packstart);
1091: }
1092: PetscSFBasicReclaimPack(sf,&link);
1093: return(0);
1094: }
1096: PETSC_EXTERN PetscErrorCode PetscSFCreate_Basic(PetscSF sf)
1097: {
1098: PetscSF_Basic *bas = (PetscSF_Basic*)sf->data;
1102: sf->ops->SetUp = PetscSFSetUp_Basic;
1103: sf->ops->SetFromOptions = PetscSFSetFromOptions_Basic;
1104: sf->ops->Reset = PetscSFReset_Basic;
1105: sf->ops->Destroy = PetscSFDestroy_Basic;
1106: sf->ops->View = PetscSFView_Basic;
1107: sf->ops->BcastBegin = PetscSFBcastBegin_Basic;
1108: sf->ops->BcastEnd = PetscSFBcastEnd_Basic;
1109: sf->ops->ReduceBegin = PetscSFReduceBegin_Basic;
1110: sf->ops->ReduceEnd = PetscSFReduceEnd_Basic;
1111: sf->ops->FetchAndOpBegin = PetscSFFetchAndOpBegin_Basic;
1112: sf->ops->FetchAndOpEnd = PetscSFFetchAndOpEnd_Basic;
1114: PetscNewLog(sf,&bas);
1115: sf->data = (void*)bas;
1116: return(0);
1117: }