Actual source code: ex1.c
petsc-3.11.4 2019-09-28
1: static const char help[] = "Test star forest communication (PetscSF)\n\n";
3: /*T
4: Description: A star is a simple tree with one root and zero or more leaves.
5: A star forest is a union of disjoint stars.
6: Many common communication patterns can be expressed as updates of rootdata using leafdata and vice-versa.
7: This example creates a star forest, communicates values using the graph (see options for types of communication), views the graph, then destroys it.
8: T*/
10: /*
11: Include petscsf.h so we can use PetscSF objects. Note that this automatically
12: includes petscsys.h.
13: */
14: #include <petscsf.h>
15: #include <petscviewer.h>
17: /* like PetscSFView() but with alternative array of local indices */
18: static PetscErrorCode PetscSFViewCustomLocals_Private(PetscSF sf,const PetscInt locals[],PetscViewer viewer)
19: {
20: const PetscSFNode *iremote;
21: PetscInt i,nroots,nleaves,nranks;
22: PetscMPIInt rank;
23: PetscErrorCode ierr;
26: MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);
27: PetscSFGetGraph(sf,&nroots,&nleaves,NULL,&iremote);
28: PetscSFGetRanks(sf,&nranks,NULL,NULL,NULL,NULL);
29: PetscViewerASCIIPushTab(viewer);
30: PetscViewerASCIIPushSynchronized(viewer);
31: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number of roots=%D, leaves=%D, remote ranks=%D\n",rank,nroots,nleaves,nranks);
32: for (i=0; i<nleaves; i++) {
33: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D <- (%D,%D)\n",rank,locals[i],iremote[i].rank,iremote[i].index);
34: }
35: PetscViewerFlush(viewer);
36: PetscViewerASCIIPopTab(viewer);
37: PetscViewerASCIIPopSynchronized(viewer);
38: return(0);
39: }
41: int main(int argc,char **argv)
42: {
44: PetscInt i,nroots,nrootsalloc,nleaves,nleavesalloc,*mine,stride;
45: PetscSFNode *remote;
46: PetscMPIInt rank,size;
47: PetscSF sf;
48: PetscBool test_bcast,test_bcastop,test_reduce,test_degree,test_fetchandop,test_gather,test_scatter,test_embed,test_invert,test_sf_distribute;
49: MPI_Op mop=MPI_OP_NULL; /* initialize to prevent compiler warnings with cxx_quad build */
50: char opstring[256];
51: PetscBool strflg;
53: PetscInitialize(&argc,&argv,(char*)0,help);if (ierr) return ierr;
54: MPI_Comm_rank(PETSC_COMM_WORLD,&rank);
55: MPI_Comm_size(PETSC_COMM_WORLD,&size);
57: PetscOptionsBegin(PETSC_COMM_WORLD,"","PetscSF Test Options","none");
58: test_bcast = PETSC_FALSE;
59: PetscOptionsBool("-test_bcast","Test broadcast","",test_bcast,&test_bcast,NULL);
60: test_bcastop = PETSC_FALSE;
61: PetscOptionsBool("-test_bcastop","Test broadcast and reduce","",test_bcastop,&test_bcastop,NULL);
62: test_reduce = PETSC_FALSE;
63: PetscOptionsBool("-test_reduce","Test reduction","",test_reduce,&test_reduce,NULL);
64: mop = MPI_SUM;
65: PetscStrcpy(opstring,"sum");
66: PetscOptionsString("-test_op","Designate which MPI_Op to use","",opstring,opstring,256,NULL);
67: PetscStrcmp("sum",opstring,&strflg);
68: if (strflg) {
69: mop = MPIU_SUM;
70: }
71: PetscStrcmp("prod",opstring,&strflg);
72: if (strflg) {
73: mop = MPI_PROD;
74: }
75: PetscStrcmp("max",opstring,&strflg);
76: if (strflg) {
77: mop = MPI_MAX;
78: }
79: PetscStrcmp("min",opstring,&strflg);
80: if (strflg) {
81: mop = MPI_MIN;
82: }
83: PetscStrcmp("land",opstring,&strflg);
84: if (strflg) {
85: mop = MPI_LAND;
86: }
87: PetscStrcmp("band",opstring,&strflg);
88: if (strflg) {
89: mop = MPI_BAND;
90: }
91: PetscStrcmp("lor",opstring,&strflg);
92: if (strflg) {
93: mop = MPI_LOR;
94: }
95: PetscStrcmp("bor",opstring,&strflg);
96: if (strflg) {
97: mop = MPI_BOR;
98: }
99: PetscStrcmp("lxor",opstring,&strflg);
100: if (strflg) {
101: mop = MPI_LXOR;
102: }
103: PetscStrcmp("bxor",opstring,&strflg);
104: if (strflg) {
105: mop = MPI_BXOR;
106: }
107: test_degree = PETSC_FALSE;
108: PetscOptionsBool("-test_degree","Test computation of vertex degree","",test_degree,&test_degree,NULL);
109: test_fetchandop = PETSC_FALSE;
110: PetscOptionsBool("-test_fetchandop","Test atomic Fetch-And-Op","",test_fetchandop,&test_fetchandop,NULL);
111: test_gather = PETSC_FALSE;
112: PetscOptionsBool("-test_gather","Test point gather","",test_gather,&test_gather,NULL);
113: test_scatter = PETSC_FALSE;
114: PetscOptionsBool("-test_scatter","Test point scatter","",test_scatter,&test_scatter,NULL);
115: test_embed = PETSC_FALSE;
116: PetscOptionsBool("-test_embed","Test point embed","",test_embed,&test_embed,NULL);
117: test_invert = PETSC_FALSE;
118: PetscOptionsBool("-test_invert","Test point invert","",test_invert,&test_invert,NULL);
119: stride = 1;
120: PetscOptionsInt("-stride","Stride for leaf and root data","",stride,&stride,NULL);
121: test_sf_distribute = PETSC_FALSE;
122: PetscOptionsBool("-test_sf_distribute","Create an SF that 'distributes' to each process, like an alltoall","",test_sf_distribute,&test_sf_distribute,NULL);
123: PetscOptionsEnd();
125: if (test_sf_distribute) {
126: nroots = size;
127: nrootsalloc = size;
128: nleaves = size;
129: nleavesalloc = size;
130: mine = NULL;
131: PetscMalloc1(nleaves,&remote);
132: for (i=0; i<size; i++) {
133: remote[i].rank = i;
134: remote[i].index = rank;
135: }
136: } else {
137: nroots = 2 + (PetscInt)(rank == 0);
138: nrootsalloc = nroots * stride;
139: nleaves = 2 + (PetscInt)(rank > 0);
140: nleavesalloc = nleaves * stride;
141: mine = NULL;
142: if (stride > 1) {
143: PetscInt i;
145: PetscMalloc1(nleaves,&mine);
146: for (i = 0; i < nleaves; i++) {
147: mine[i] = stride * i;
148: }
149: }
150: PetscMalloc1(nleaves,&remote);
151: /* Left periodic neighbor */
152: remote[0].rank = (rank+size-1)%size;
153: remote[0].index = 1 * stride;
154: /* Right periodic neighbor */
155: remote[1].rank = (rank+1)%size;
156: remote[1].index = 0 * stride;
157: if (rank > 0) { /* All processes reference rank 0, index 1 */
158: remote[2].rank = 0;
159: remote[2].index = 2 * stride;
160: }
161: }
163: /* Create a star forest for communication. In this example, the leaf space is dense, so we pass NULL. */
164: PetscSFCreate(PETSC_COMM_WORLD,&sf);
165: PetscSFSetFromOptions(sf);
166: PetscSFSetGraph(sf,nrootsalloc,nleaves,mine,PETSC_OWN_POINTER,remote,PETSC_OWN_POINTER);
167: PetscSFSetUp(sf);
169: /* View graph, mostly useful for debugging purposes. */
170: PetscViewerPushFormat(PETSC_VIEWER_STDOUT_WORLD,PETSC_VIEWER_ASCII_INFO_DETAIL);
171: PetscSFView(sf,PETSC_VIEWER_STDOUT_WORLD);
172: PetscViewerPopFormat(PETSC_VIEWER_STDOUT_WORLD);
175: if (test_bcast) { /* broadcast rootdata into leafdata */
176: PetscInt *rootdata,*leafdata;
177: /* Allocate space for send and recieve buffers. This example communicates PetscInt, but other types, including
178: * user-defined structures, could also be used. */
179: PetscMalloc2(nrootsalloc,&rootdata,nleavesalloc,&leafdata);
180: /* Set rootdata buffer to be broadcast */
181: for (i=0; i<nrootsalloc; i++) rootdata[i] = -1;
182: for (i=0; i<nroots; i++) rootdata[i*stride] = 100*(rank+1) + i;
183: /* Initialize local buffer, these values are never used. */
184: for (i=0; i<nleavesalloc; i++) leafdata[i] = -1;
185: /* Broadcast entries from rootdata to leafdata. Computation or other communication can be performed between the begin and end calls. */
186: PetscSFBcastBegin(sf,MPIU_INT,rootdata,leafdata);
187: PetscSFBcastEnd(sf,MPIU_INT,rootdata,leafdata);
188: PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Bcast Rootdata\n");
189: PetscIntView(nrootsalloc,rootdata,PETSC_VIEWER_STDOUT_WORLD);
190: PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Bcast Leafdata\n");
191: PetscIntView(nleavesalloc,leafdata,PETSC_VIEWER_STDOUT_WORLD);
192: PetscFree2(rootdata,leafdata);
193: }
195: if (test_bcastop) { /* Reduce rootdata into leafdata */
196: PetscInt *rootdata,*leafdata;
197: /* Allocate space for send and recieve buffers. This example communicates PetscInt, but other types, including
198: * user-defined structures, could also be used. */
199: PetscMalloc2(nrootsalloc,&rootdata,nleavesalloc,&leafdata);
200: /* Set rootdata buffer to be broadcast */
201: for (i=0; i<nrootsalloc; i++) rootdata[i] = -1;
202: for (i=0; i<nroots; i++) rootdata[i*stride] = 100*(rank+1) + i;
203: /* Set leaf values to reduce with */
204: for (i=0; i<nleavesalloc; i++) leafdata[i] = -10*(rank+1) - i;
205: PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Pre-BcastAndOp Leafdata\n");
206: PetscIntView(nleavesalloc,leafdata,PETSC_VIEWER_STDOUT_WORLD);
207: /* Broadcast entries from rootdata to leafdata. Computation or other communication can be performed between the begin and end calls. */
208: PetscSFBcastAndOpBegin(sf,MPIU_INT,rootdata,leafdata,mop);
209: PetscSFBcastAndOpEnd(sf,MPIU_INT,rootdata,leafdata,mop);
210: PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## BcastAndOp Rootdata\n");
211: PetscIntView(nrootsalloc,rootdata,PETSC_VIEWER_STDOUT_WORLD);
212: PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## BcastAndOp Leafdata\n");
213: PetscIntView(nleavesalloc,leafdata,PETSC_VIEWER_STDOUT_WORLD);
214: PetscFree2(rootdata,leafdata);
215: }
217: if (test_reduce) { /* Reduce leafdata into rootdata */
218: PetscInt *rootdata,*leafdata;
219: PetscMalloc2(nrootsalloc,&rootdata,nleavesalloc,&leafdata);
220: /* Initialize rootdata buffer in which the result of the reduction will appear. */
221: for (i=0; i<nrootsalloc; i++) rootdata[i] = -1;
222: for (i=0; i<nroots; i++) rootdata[i*stride] = 100*(rank+1) + i;
223: /* Set leaf values to reduce. */
224: for (i=0; i<nleavesalloc; i++) leafdata[i] = -1;
225: for (i=0; i<nleaves; i++) leafdata[i*stride] = 1000*(rank+1) + 10*i;
226: PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Pre-Reduce Rootdata\n");
227: PetscIntView(nrootsalloc,rootdata,PETSC_VIEWER_STDOUT_WORLD);
228: /* Perform reduction. Computation or other communication can be performed between the begin and end calls.
229: * This example sums the values, but other MPI_Ops can be used (e.g MPI_MAX, MPI_PROD). */
230: PetscSFReduceBegin(sf,MPIU_INT,leafdata,rootdata,mop);
231: PetscSFReduceEnd(sf,MPIU_INT,leafdata,rootdata,mop);
232: PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Reduce Leafdata\n");
233: PetscIntView(nleavesalloc,leafdata,PETSC_VIEWER_STDOUT_WORLD);
234: PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Reduce Rootdata\n");
235: PetscIntView(nrootsalloc,rootdata,PETSC_VIEWER_STDOUT_WORLD);
236: PetscFree2(rootdata,leafdata);
237: }
239: if (test_degree) {
240: const PetscInt *degree;
241: PetscSFComputeDegreeBegin(sf,°ree);
242: PetscSFComputeDegreeEnd(sf,°ree);
243: PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Root degrees\n");
244: PetscIntView(nrootsalloc,degree,PETSC_VIEWER_STDOUT_WORLD);
245: }
247: if (test_fetchandop) {
248: /* Cannot use text compare here because token ordering is not deterministic */
249: PetscInt *leafdata,*leafupdate,*rootdata;
250: PetscMalloc3(nleavesalloc,&leafdata,nleavesalloc,&leafupdate,nrootsalloc,&rootdata);
251: for (i=0; i<nleavesalloc; i++) leafdata[i] = -1;
252: for (i=0; i<nleaves; i++) leafdata[i*stride] = 1;
253: for (i=0; i<nrootsalloc; i++) rootdata[i] = -1;
254: for (i=0; i<nroots; i++) rootdata[i*stride] = 0;
255: PetscSFFetchAndOpBegin(sf,MPIU_INT,rootdata,leafdata,leafupdate,mop);
256: PetscSFFetchAndOpEnd(sf,MPIU_INT,rootdata,leafdata,leafupdate,mop);
257: PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Rootdata (sum of 1 from each leaf)\n");
258: PetscIntView(nrootsalloc,rootdata,PETSC_VIEWER_STDOUT_WORLD);
259: PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Leafupdate (value at roots prior to my atomic update)\n");
260: PetscIntView(nleavesalloc,leafupdate,PETSC_VIEWER_STDOUT_WORLD);
261: PetscFree3(leafdata,leafupdate,rootdata);
262: }
264: if (test_gather) {
265: const PetscInt *degree;
266: PetscInt inedges,*indata,*outdata;
267: PetscSFComputeDegreeBegin(sf,°ree);
268: PetscSFComputeDegreeEnd(sf,°ree);
269: for (i=0,inedges=0; i<nrootsalloc; i++) inedges += degree[i];
270: PetscMalloc2(inedges,&indata,nleavesalloc,&outdata);
271: for (i=0; i<nleavesalloc; i++) outdata[i] = -1;
272: for (i=0; i<nleaves; i++) outdata[i*stride] = 1000*(rank+1) + i;
273: PetscSFGatherBegin(sf,MPIU_INT,outdata,indata);
274: PetscSFGatherEnd(sf,MPIU_INT,outdata,indata);
275: PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Gathered data at multi-roots from leaves\n");
276: PetscIntView(inedges,indata,PETSC_VIEWER_STDOUT_WORLD);
277: PetscFree2(indata,outdata);
278: }
280: if (test_scatter) {
281: const PetscInt *degree;
282: PetscInt j,count,inedges,*indata,*outdata;
283: PetscSFComputeDegreeBegin(sf,°ree);
284: PetscSFComputeDegreeEnd(sf,°ree);
285: for (i=0,inedges=0; i<nrootsalloc; i++) inedges += degree[i];
286: PetscMalloc2(inedges,&indata,nleavesalloc,&outdata);
287: for (i=0; i<nleavesalloc; i++) outdata[i] = -1;
288: for (i=0,count=0; i<nrootsalloc; i++) {
289: for (j=0; j<degree[i]; j++) indata[count++] = 1000*(rank+1) + 100*i + j;
290: }
291: PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Data at multi-roots, to scatter to leaves\n");
292: PetscIntView(inedges,indata,PETSC_VIEWER_STDOUT_WORLD);
294: PetscSFScatterBegin(sf,MPIU_INT,indata,outdata);
295: PetscSFScatterEnd(sf,MPIU_INT,indata,outdata);
296: PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Scattered data at leaves\n");
297: PetscIntView(nleavesalloc,outdata,PETSC_VIEWER_STDOUT_WORLD);
298: PetscFree2(indata,outdata);
299: }
301: if (test_embed) {
302: const PetscInt nroots = 1 + (PetscInt) !rank;
303: PetscInt selected[2];
304: PetscSF esf;
306: selected[0] = stride;
307: selected[1] = 2*stride;
308: PetscSFCreateEmbeddedSF(sf,nroots,selected,&esf);
309: PetscSFSetUp(esf);
310: PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Embedded PetscSF\n");
311: PetscViewerPushFormat(PETSC_VIEWER_STDOUT_WORLD,PETSC_VIEWER_ASCII_INFO_DETAIL);
312: PetscSFView(esf,PETSC_VIEWER_STDOUT_WORLD);
313: PetscViewerPopFormat(PETSC_VIEWER_STDOUT_WORLD);
314: PetscSFDestroy(&esf);
315: }
317: if (test_invert) {
318: const PetscInt *degree;
319: PetscInt *mRootsOrigNumbering;
320: PetscInt inedges;
321: PetscSF msf,imsf;
323: PetscSFGetMultiSF(sf,&msf);
324: PetscSFCreateInverseSF(msf,&imsf);
325: PetscSFSetUp(msf);
326: PetscSFSetUp(imsf);
327: PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Multi-SF\n");
328: PetscSFView(msf,PETSC_VIEWER_STDOUT_WORLD);
329: PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Multi-SF roots indices in original SF roots numbering\n");
330: PetscSFGetGraph(msf,&inedges,NULL,NULL,NULL);
331: PetscSFComputeDegreeBegin(sf,°ree);
332: PetscSFComputeDegreeEnd(sf,°ree);
333: PetscSFComputeMultiRootOriginalNumbering(sf,degree,&mRootsOrigNumbering);
334: PetscIntView(inedges,mRootsOrigNumbering,PETSC_VIEWER_STDOUT_WORLD);
335: PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Inverse of Multi-SF\n");
336: PetscSFView(imsf,PETSC_VIEWER_STDOUT_WORLD);
337: PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Inverse of Multi-SF, original numbering\n");
338: PetscSFComputeDegreeBegin(sf,°ree);
339: PetscSFComputeDegreeEnd(sf,°ree);
340: PetscSFViewCustomLocals_Private(imsf,mRootsOrigNumbering,PETSC_VIEWER_STDOUT_WORLD);
341: PetscSFDestroy(&imsf);
342: PetscFree(mRootsOrigNumbering);
343: }
345: /* Clean storage for star forest. */
346: PetscSFDestroy(&sf);
347: PetscFinalize();
348: return ierr;
349: }
351: /*TEST
353: test:
354: nsize: 4
355: args: -test_bcast -sf_type window
356: requires: define(PETSC_HAVE_MPI_WIN_CREATE) define(PETSC_HAVE_MPICH_NUMVERSION)
359: test:
360: suffix: 2
361: nsize: 4
362: args: -test_reduce -sf_type window
363: requires: define(PETSC_HAVE_MPI_WIN_CREATE) define(PETSC_HAVE_MPICH_NUMVERSION)
365: test:
366: suffix: 2_basic
367: nsize: 4
368: args: -test_reduce -sf_type basic
370: test:
371: suffix: 3
372: nsize: 4
373: args: -test_degree -sf_type window
374: requires: define(PETSC_HAVE_MPI_WIN_CREATE) define(PETSC_HAVE_MPICH_NUMVERSION)
376: test:
377: suffix: 3_basic
378: nsize: 4
379: args: -test_degree -sf_type basic
381: test:
382: suffix: 4
383: nsize: 4
384: args: -test_gather -sf_type window
385: requires: define(PETSC_HAVE_MPI_WIN_CREATE) define(PETSC_HAVE_MPICH_NUMVERSION)
387: test:
388: suffix: 4_basic
389: nsize: 4
390: args: -test_gather -sf_type basic
392: test:
393: suffix: 4_stride
394: nsize: 4
395: args: -test_gather -sf_type basic -stride 2
397: test:
398: suffix: 5
399: nsize: 4
400: args: -test_scatter -sf_type window
401: requires: define(PETSC_HAVE_MPI_WIN_CREATE) define(PETSC_HAVE_MPICH_NUMVERSION)
403: test:
404: suffix: 5_basic
405: nsize: 4
406: args: -test_scatter -sf_type basic
408: test:
409: suffix: 5_stride
410: nsize: 4
411: args: -test_scatter -sf_type basic -stride 2
413: test:
414: suffix: 6
415: nsize: 4
416: args: -test_embed -sf_type window
417: requires: define(PETSC_HAVE_MPI_WIN_CREATE) define(PETSC_HAVE_MPICH_NUMVERSION)
419: test:
420: suffix: 6_basic
421: nsize: 4
422: args: -test_embed -sf_type basic
424: test:
425: suffix: 7
426: nsize: 4
427: args: -test_invert -sf_type window
428: requires: define(PETSC_HAVE_MPI_WIN_CREATE) define(PETSC_HAVE_MPICH_NUMVERSION)
430: test:
431: suffix: 7_basic
432: nsize: 4
433: args: -test_invert -sf_type basic
435: test:
436: suffix: basic
437: nsize: 4
438: args: -test_bcast -sf_type basic
439: output_file: output/ex1_1_basic.out
441: test:
442: suffix: bcastop_basic
443: nsize: 4
444: args: -test_bcastop -sf_type basic
445: output_file: output/ex1_bcastop_basic.out
447: test:
448: suffix: 8
449: nsize: 3
450: args: -test_bcast -test_sf_distribute -sf_type window
451: requires: define(PETSC_HAVE_MPI_WIN_CREATE)
453: test:
454: suffix: 8_basic
455: nsize: 3
456: args: -test_bcast -test_sf_distribute -sf_type basic
458: TEST*/