Actual source code: ex1.c
1: static const char help[] = "Test star forest communication (PetscSF)\n\n";
3: /*
4: Description: A star is a simple tree with one root and zero or more leaves.
5: A star forest is a union of disjoint stars.
6: Many common communication patterns can be expressed as updates of rootdata using leafdata and vice-versa.
7: This example creates a star forest, communicates values using the graph (see options for types of communication), views the graph, then destroys it.
8: */
10: /*
11: Include petscsf.h so we can use PetscSF objects. Note that this automatically
12: includes petscsys.h.
13: */
14: #include <petscsf.h>
15: #include <petscviewer.h>
17: /* like PetscSFView() but with alternative array of local indices */
18: static PetscErrorCode PetscSFViewCustomLocals_Private(PetscSF sf,const PetscInt locals[],PetscViewer viewer)
19: {
20: const PetscSFNode *iremote;
21: PetscInt i,nroots,nleaves,nranks;
22: PetscMPIInt rank;
25: MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);
26: PetscSFGetGraph(sf,&nroots,&nleaves,NULL,&iremote);
27: PetscSFGetRootRanks(sf,&nranks,NULL,NULL,NULL,NULL);
28: PetscViewerASCIIPushTab(viewer);
29: PetscViewerASCIIPushSynchronized(viewer);
30: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number of roots=%" PetscInt_FMT ", leaves=%" PetscInt_FMT ", remote ranks=%" PetscInt_FMT "\n",rank,nroots,nleaves,nranks);
31: for (i=0; i<nleaves; i++) {
32: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %" PetscInt_FMT " <- (%" PetscInt_FMT ",%" PetscInt_FMT ")\n",rank,locals[i],iremote[i].rank,iremote[i].index);
33: }
34: PetscViewerFlush(viewer);
35: PetscViewerASCIIPopTab(viewer);
36: PetscViewerASCIIPopSynchronized(viewer);
37: return 0;
38: }
40: int main(int argc,char **argv)
41: {
43: PetscInt i,nroots,nrootsalloc,nleaves,nleavesalloc,*mine,stride;
44: PetscSFNode *remote;
45: PetscMPIInt rank,size;
46: PetscSF sf;
47: PetscBool test_all,test_bcast,test_bcastop,test_reduce,test_degree,test_fetchandop,test_gather,test_scatter,test_embed,test_invert,test_sf_distribute,test_char;
48: MPI_Op mop=MPI_OP_NULL; /* initialize to prevent compiler warnings with cxx_quad build */
49: char opstring[256];
50: PetscBool strflg;
52: PetscInitialize(&argc,&argv,(char*)0,help);
53: MPI_Comm_rank(PETSC_COMM_WORLD,&rank);
54: MPI_Comm_size(PETSC_COMM_WORLD,&size);
56: PetscOptionsBegin(PETSC_COMM_WORLD,"","PetscSF Test Options","none");
57: test_all = PETSC_FALSE;
58: PetscOptionsBool("-test_all","Test all SF communications","",test_all,&test_all,NULL);
59: test_bcast = test_all;
60: PetscOptionsBool("-test_bcast","Test broadcast","",test_bcast,&test_bcast,NULL);
61: test_bcastop = test_all;
62: PetscOptionsBool("-test_bcastop","Test broadcast and reduce","",test_bcastop,&test_bcastop,NULL);
63: test_reduce = test_all;
64: PetscOptionsBool("-test_reduce","Test reduction","",test_reduce,&test_reduce,NULL);
65: test_char = test_all;
66: PetscOptionsBool("-test_char","Test signed char, unsigned char, and char","",test_char,&test_char,NULL);
67: mop = MPI_SUM;
68: PetscStrcpy(opstring,"sum");
69: PetscOptionsString("-test_op","Designate which MPI_Op to use","",opstring,opstring,sizeof(opstring),NULL);
70: PetscStrcmp("sum",opstring,&strflg);
71: if (strflg) {
72: mop = MPIU_SUM;
73: }
74: PetscStrcmp("prod",opstring,&strflg);
75: if (strflg) {
76: mop = MPI_PROD;
77: }
78: PetscStrcmp("max",opstring,&strflg);
79: if (strflg) {
80: mop = MPI_MAX;
81: }
82: PetscStrcmp("min",opstring,&strflg);
83: if (strflg) {
84: mop = MPI_MIN;
85: }
86: PetscStrcmp("land",opstring,&strflg);
87: if (strflg) {
88: mop = MPI_LAND;
89: }
90: PetscStrcmp("band",opstring,&strflg);
91: if (strflg) {
92: mop = MPI_BAND;
93: }
94: PetscStrcmp("lor",opstring,&strflg);
95: if (strflg) {
96: mop = MPI_LOR;
97: }
98: PetscStrcmp("bor",opstring,&strflg);
99: if (strflg) {
100: mop = MPI_BOR;
101: }
102: PetscStrcmp("lxor",opstring,&strflg);
103: if (strflg) {
104: mop = MPI_LXOR;
105: }
106: PetscStrcmp("bxor",opstring,&strflg);
107: if (strflg) {
108: mop = MPI_BXOR;
109: }
110: test_degree = test_all;
111: PetscOptionsBool("-test_degree","Test computation of vertex degree","",test_degree,&test_degree,NULL);
112: test_fetchandop = test_all;
113: PetscOptionsBool("-test_fetchandop","Test atomic Fetch-And-Op","",test_fetchandop,&test_fetchandop,NULL);
114: test_gather = test_all;
115: PetscOptionsBool("-test_gather","Test point gather","",test_gather,&test_gather,NULL);
116: test_scatter = test_all;
117: PetscOptionsBool("-test_scatter","Test point scatter","",test_scatter,&test_scatter,NULL);
118: test_embed = test_all;
119: PetscOptionsBool("-test_embed","Test point embed","",test_embed,&test_embed,NULL);
120: test_invert = test_all;
121: PetscOptionsBool("-test_invert","Test point invert","",test_invert,&test_invert,NULL);
122: stride = 1;
123: PetscOptionsInt("-stride","Stride for leaf and root data","",stride,&stride,NULL);
124: test_sf_distribute = PETSC_FALSE;
125: PetscOptionsBool("-test_sf_distribute","Create an SF that 'distributes' to each process, like an alltoall","",test_sf_distribute,&test_sf_distribute,NULL);
126: PetscOptionsString("-test_op","Designate which MPI_Op to use","",opstring,opstring,sizeof(opstring),NULL);
127: PetscOptionsEnd();
129: if (test_sf_distribute) {
130: nroots = size;
131: nrootsalloc = size;
132: nleaves = size;
133: nleavesalloc = size;
134: mine = NULL;
135: PetscMalloc1(nleaves,&remote);
136: for (i=0; i<size; i++) {
137: remote[i].rank = i;
138: remote[i].index = rank;
139: }
140: } else {
141: nroots = 2 + (PetscInt)(rank == 0);
142: nrootsalloc = nroots * stride;
143: nleaves = 2 + (PetscInt)(rank > 0);
144: nleavesalloc = nleaves * stride;
145: mine = NULL;
146: if (stride > 1) {
147: PetscInt i;
149: PetscMalloc1(nleaves,&mine);
150: for (i = 0; i < nleaves; i++) {
151: mine[i] = stride * i;
152: }
153: }
154: PetscMalloc1(nleaves,&remote);
155: /* Left periodic neighbor */
156: remote[0].rank = (rank+size-1)%size;
157: remote[0].index = 1 * stride;
158: /* Right periodic neighbor */
159: remote[1].rank = (rank+1)%size;
160: remote[1].index = 0 * stride;
161: if (rank > 0) { /* All processes reference rank 0, index 1 */
162: remote[2].rank = 0;
163: remote[2].index = 2 * stride;
164: }
165: }
167: /* Create a star forest for communication. In this example, the leaf space is dense, so we pass NULL. */
168: PetscSFCreate(PETSC_COMM_WORLD,&sf);
169: PetscSFSetFromOptions(sf);
170: PetscSFSetGraph(sf,nrootsalloc,nleaves,mine,PETSC_OWN_POINTER,remote,PETSC_OWN_POINTER);
171: PetscSFSetUp(sf);
173: /* View graph, mostly useful for debugging purposes. */
174: PetscViewerPushFormat(PETSC_VIEWER_STDOUT_WORLD,PETSC_VIEWER_ASCII_INFO_DETAIL);
175: PetscSFView(sf,PETSC_VIEWER_STDOUT_WORLD);
176: PetscViewerPopFormat(PETSC_VIEWER_STDOUT_WORLD);
178: if (test_bcast) { /* broadcast rootdata into leafdata */
179: PetscInt *rootdata,*leafdata;
180: /* Allocate space for send and receive buffers. This example communicates PetscInt, but other types, including
181: * user-defined structures, could also be used. */
182: PetscMalloc2(nrootsalloc,&rootdata,nleavesalloc,&leafdata);
183: /* Set rootdata buffer to be broadcast */
184: for (i=0; i<nrootsalloc; i++) rootdata[i] = -1;
185: for (i=0; i<nroots; i++) rootdata[i*stride] = 100*(rank+1) + i;
186: /* Initialize local buffer, these values are never used. */
187: for (i=0; i<nleavesalloc; i++) leafdata[i] = -1;
188: /* Broadcast entries from rootdata to leafdata. Computation or other communication can be performed between the begin and end calls. */
189: PetscSFBcastBegin(sf,MPIU_INT,rootdata,leafdata,MPI_REPLACE);
190: PetscSFBcastEnd(sf,MPIU_INT,rootdata,leafdata,MPI_REPLACE);
191: PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Bcast Rootdata\n");
192: PetscIntView(nrootsalloc,rootdata,PETSC_VIEWER_STDOUT_WORLD);
193: PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Bcast Leafdata\n");
194: PetscIntView(nleavesalloc,leafdata,PETSC_VIEWER_STDOUT_WORLD);
195: PetscFree2(rootdata,leafdata);
196: }
198: if (test_bcast && test_char) { /* Bcast with char */
199: PetscInt len;
200: char buf[256];
201: char *rootdata,*leafdata;
202: PetscMalloc2(nrootsalloc,&rootdata,nleavesalloc,&leafdata);
203: /* Set rootdata buffer to be broadcast */
204: for (i=0; i<nrootsalloc; i++) rootdata[i] = '*';
205: for (i=0; i<nroots; i++) rootdata[i*stride] = 'A' + rank*3 + i; /* rank is very small, so it is fine to compute a char */
206: /* Initialize local buffer, these values are never used. */
207: for (i=0; i<nleavesalloc; i++) leafdata[i] = '?';
209: PetscSFBcastBegin(sf,MPI_CHAR,rootdata,leafdata,MPI_REPLACE);
210: PetscSFBcastEnd(sf,MPI_CHAR,rootdata,leafdata,MPI_REPLACE);
212: PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Bcast Rootdata in type of char\n");
213: len = 0; PetscSNPrintf(buf,256,"%4d:",rank); len += 5;
214: for (i=0; i<nrootsalloc; i++) {PetscSNPrintf(buf+len,256-len,"%5c",rootdata[i]); len += 5;}
215: PetscSynchronizedPrintf(PETSC_COMM_WORLD,"%s\n",buf);
216: PetscSynchronizedFlush(PETSC_COMM_WORLD,PETSC_STDOUT);
218: PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Bcast Leafdata in type of char\n");
219: len = 0; PetscSNPrintf(buf,256,"%4d:",rank); len += 5;
220: for (i=0; i<nleavesalloc; i++) {PetscSNPrintf(buf+len,256-len,"%5c",leafdata[i]); len += 5;}
221: PetscSynchronizedPrintf(PETSC_COMM_WORLD,"%s\n",buf);
222: PetscSynchronizedFlush(PETSC_COMM_WORLD,PETSC_STDOUT);
224: PetscFree2(rootdata,leafdata);
225: }
227: if (test_bcastop) { /* Reduce rootdata into leafdata */
228: PetscInt *rootdata,*leafdata;
229: /* Allocate space for send and receive buffers. This example communicates PetscInt, but other types, including
230: * user-defined structures, could also be used. */
231: PetscMalloc2(nrootsalloc,&rootdata,nleavesalloc,&leafdata);
232: /* Set rootdata buffer to be broadcast */
233: for (i=0; i<nrootsalloc; i++) rootdata[i] = -1;
234: for (i=0; i<nroots; i++) rootdata[i*stride] = 100*(rank+1) + i;
235: /* Set leaf values to reduce with */
236: for (i=0; i<nleavesalloc; i++) leafdata[i] = -10*(rank+1) - i;
237: PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Pre-BcastAndOp Leafdata\n");
238: PetscIntView(nleavesalloc,leafdata,PETSC_VIEWER_STDOUT_WORLD);
239: /* Broadcast entries from rootdata to leafdata. Computation or other communication can be performed between the begin and end calls. */
240: PetscSFBcastBegin(sf,MPIU_INT,rootdata,leafdata,mop);
241: PetscSFBcastEnd(sf,MPIU_INT,rootdata,leafdata,mop);
242: PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## BcastAndOp Rootdata\n");
243: PetscIntView(nrootsalloc,rootdata,PETSC_VIEWER_STDOUT_WORLD);
244: PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## BcastAndOp Leafdata\n");
245: PetscIntView(nleavesalloc,leafdata,PETSC_VIEWER_STDOUT_WORLD);
246: PetscFree2(rootdata,leafdata);
247: }
249: if (test_reduce) { /* Reduce leafdata into rootdata */
250: PetscInt *rootdata,*leafdata;
251: PetscMalloc2(nrootsalloc,&rootdata,nleavesalloc,&leafdata);
252: /* Initialize rootdata buffer in which the result of the reduction will appear. */
253: for (i=0; i<nrootsalloc; i++) rootdata[i] = -1;
254: for (i=0; i<nroots; i++) rootdata[i*stride] = 100*(rank+1) + i;
255: /* Set leaf values to reduce. */
256: for (i=0; i<nleavesalloc; i++) leafdata[i] = -1;
257: for (i=0; i<nleaves; i++) leafdata[i*stride] = 1000*(rank+1) + 10*i;
258: PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Pre-Reduce Rootdata\n");
259: PetscIntView(nrootsalloc,rootdata,PETSC_VIEWER_STDOUT_WORLD);
260: /* Perform reduction. Computation or other communication can be performed between the begin and end calls.
261: * This example sums the values, but other MPI_Ops can be used (e.g MPI_MAX, MPI_PROD). */
262: PetscSFReduceBegin(sf,MPIU_INT,leafdata,rootdata,mop);
263: PetscSFReduceEnd(sf,MPIU_INT,leafdata,rootdata,mop);
264: PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Reduce Leafdata\n");
265: PetscIntView(nleavesalloc,leafdata,PETSC_VIEWER_STDOUT_WORLD);
266: PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Reduce Rootdata\n");
267: PetscIntView(nrootsalloc,rootdata,PETSC_VIEWER_STDOUT_WORLD);
268: PetscFree2(rootdata,leafdata);
269: }
271: if (test_reduce && test_char) { /* Reduce with signed char */
272: PetscInt len;
273: char buf[256];
274: signed char *rootdata,*leafdata;
275: PetscMalloc2(nrootsalloc,&rootdata,nleavesalloc,&leafdata);
276: /* Initialize rootdata buffer in which the result of the reduction will appear. */
277: for (i=0; i<nrootsalloc; i++) rootdata[i] = -1;
278: for (i=0; i<nroots; i++) rootdata[i*stride] = 10*(rank+1) + i;
279: /* Set leaf values to reduce. */
280: for (i=0; i<nleavesalloc; i++) leafdata[i] = -1;
281: for (i=0; i<nleaves; i++) leafdata[i*stride] = 50*(rank+1) + 10*i;
282: PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Pre-Reduce Rootdata in type of signed char\n");
284: len = 0; PetscSNPrintf(buf,256,"%4d:",rank); len += 5;
285: for (i=0; i<nrootsalloc; i++) {PetscSNPrintf(buf+len,256-len,"%5d",rootdata[i]); len += 5;}
286: PetscSynchronizedPrintf(PETSC_COMM_WORLD,"%s\n",buf);
287: PetscSynchronizedFlush(PETSC_COMM_WORLD,PETSC_STDOUT);
289: /* Using MPI_CHAR should trigger an error since MPI standard does not support reduction on MPI_CHAR.
290: Testing with -test_op max, one can see the sign does take effect in MPI_MAX.
291: */
292: PetscSFReduceBegin(sf,MPI_SIGNED_CHAR,leafdata,rootdata,mop);
293: PetscSFReduceEnd(sf,MPI_SIGNED_CHAR,leafdata,rootdata,mop);
295: PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Reduce Leafdata in type of signed char\n");
296: len = 0; PetscSNPrintf(buf,256,"%4d:",rank); len += 5;
297: for (i=0; i<nleavesalloc; i++) {PetscSNPrintf(buf+len,256-len,"%5d",leafdata[i]); len += 5;}
298: PetscSynchronizedPrintf(PETSC_COMM_WORLD,"%s\n",buf);
299: PetscSynchronizedFlush(PETSC_COMM_WORLD,PETSC_STDOUT);
301: PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Reduce Rootdata in type of signed char\n");
302: len = 0; PetscSNPrintf(buf,256,"%4d:",rank); len += 5;
303: for (i=0; i<nrootsalloc; i++) {PetscSNPrintf(buf+len,256-len,"%5d",rootdata[i]); len += 5;}
304: PetscSynchronizedPrintf(PETSC_COMM_WORLD,"%s\n",buf);
305: PetscSynchronizedFlush(PETSC_COMM_WORLD,PETSC_STDOUT);
307: PetscFree2(rootdata,leafdata);
308: }
310: if (test_reduce && test_char) { /* Reduce with unsigned char */
311: PetscInt len;
312: char buf[256];
313: unsigned char *rootdata,*leafdata;
314: PetscMalloc2(nrootsalloc,&rootdata,nleavesalloc,&leafdata);
315: /* Initialize rootdata buffer in which the result of the reduction will appear. */
316: for (i=0; i<nrootsalloc; i++) rootdata[i] = 0;
317: for (i=0; i<nroots; i++) rootdata[i*stride] = 10*(rank+1) + i;
318: /* Set leaf values to reduce. */
319: for (i=0; i<nleavesalloc; i++) leafdata[i] = 0;
320: for (i=0; i<nleaves; i++) leafdata[i*stride] = 50*(rank+1) + 10*i;
321: PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Pre-Reduce Rootdata in type of unsigned char\n");
323: len = 0; PetscSNPrintf(buf,256,"%4d:",rank); len += 5;
324: for (i=0; i<nrootsalloc; i++) {PetscSNPrintf(buf+len,256-len,"%5u",rootdata[i]); len += 5;}
325: PetscSynchronizedPrintf(PETSC_COMM_WORLD,"%s\n",buf);
326: PetscSynchronizedFlush(PETSC_COMM_WORLD,PETSC_STDOUT);
328: /* Using MPI_CHAR should trigger an error since MPI standard does not support reduction on MPI_CHAR.
329: Testing with -test_op max, one can see the sign does take effect in MPI_MAX.
330: */
331: PetscSFReduceBegin(sf,MPI_UNSIGNED_CHAR,leafdata,rootdata,mop);
332: PetscSFReduceEnd(sf,MPI_UNSIGNED_CHAR,leafdata,rootdata,mop);
334: PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Reduce Leafdata in type of unsigned char\n");
335: len = 0; PetscSNPrintf(buf,256,"%4d:",rank); len += 5;
336: for (i=0; i<nleavesalloc; i++) {PetscSNPrintf(buf+len,256-len,"%5u",leafdata[i]); len += 5;}
337: PetscSynchronizedPrintf(PETSC_COMM_WORLD,"%s\n",buf);
338: PetscSynchronizedFlush(PETSC_COMM_WORLD,PETSC_STDOUT);
340: PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Reduce Rootdata in type of unsigned char\n");
341: len = 0; PetscSNPrintf(buf,256,"%4d:",rank); len += 5;
342: for (i=0; i<nrootsalloc; i++) {PetscSNPrintf(buf+len,256-len,"%5u",rootdata[i]); len += 5;}
343: PetscSynchronizedPrintf(PETSC_COMM_WORLD,"%s\n",buf);
344: PetscSynchronizedFlush(PETSC_COMM_WORLD,PETSC_STDOUT);
346: PetscFree2(rootdata,leafdata);
347: }
349: if (test_degree) {
350: const PetscInt *degree;
351: PetscSFComputeDegreeBegin(sf,°ree);
352: PetscSFComputeDegreeEnd(sf,°ree);
353: PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Root degrees\n");
354: PetscIntView(nrootsalloc,degree,PETSC_VIEWER_STDOUT_WORLD);
355: }
357: if (test_fetchandop) {
358: /* Cannot use text compare here because token ordering is not deterministic */
359: PetscInt *leafdata,*leafupdate,*rootdata;
360: PetscMalloc3(nleavesalloc,&leafdata,nleavesalloc,&leafupdate,nrootsalloc,&rootdata);
361: for (i=0; i<nleavesalloc; i++) leafdata[i] = -1;
362: for (i=0; i<nleaves; i++) leafdata[i*stride] = 1;
363: for (i=0; i<nrootsalloc; i++) rootdata[i] = -1;
364: for (i=0; i<nroots; i++) rootdata[i*stride] = 0;
365: PetscSFFetchAndOpBegin(sf,MPIU_INT,rootdata,leafdata,leafupdate,mop);
366: PetscSFFetchAndOpEnd(sf,MPIU_INT,rootdata,leafdata,leafupdate,mop);
367: PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Rootdata (sum of 1 from each leaf)\n");
368: PetscIntView(nrootsalloc,rootdata,PETSC_VIEWER_STDOUT_WORLD);
369: PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Leafupdate (value at roots prior to my atomic update)\n");
370: PetscIntView(nleavesalloc,leafupdate,PETSC_VIEWER_STDOUT_WORLD);
371: PetscFree3(leafdata,leafupdate,rootdata);
372: }
374: if (test_gather) {
375: const PetscInt *degree;
376: PetscInt inedges,*indata,*outdata;
377: PetscSFComputeDegreeBegin(sf,°ree);
378: PetscSFComputeDegreeEnd(sf,°ree);
379: for (i=0,inedges=0; i<nrootsalloc; i++) inedges += degree[i];
380: PetscMalloc2(inedges,&indata,nleavesalloc,&outdata);
381: for (i=0; i<nleavesalloc; i++) outdata[i] = -1;
382: for (i=0; i<nleaves; i++) outdata[i*stride] = 1000*(rank+1) + i;
383: PetscSFGatherBegin(sf,MPIU_INT,outdata,indata);
384: PetscSFGatherEnd(sf,MPIU_INT,outdata,indata);
385: PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Gathered data at multi-roots from leaves\n");
386: PetscIntView(inedges,indata,PETSC_VIEWER_STDOUT_WORLD);
387: PetscFree2(indata,outdata);
388: }
390: if (test_scatter) {
391: const PetscInt *degree;
392: PetscInt j,count,inedges,*indata,*outdata;
393: PetscSFComputeDegreeBegin(sf,°ree);
394: PetscSFComputeDegreeEnd(sf,°ree);
395: for (i=0,inedges=0; i<nrootsalloc; i++) inedges += degree[i];
396: PetscMalloc2(inedges,&indata,nleavesalloc,&outdata);
397: for (i=0; i<nleavesalloc; i++) outdata[i] = -1;
398: for (i=0,count=0; i<nrootsalloc; i++) {
399: for (j=0; j<degree[i]; j++) indata[count++] = 1000*(rank+1) + 100*i + j;
400: }
401: PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Data at multi-roots, to scatter to leaves\n");
402: PetscIntView(inedges,indata,PETSC_VIEWER_STDOUT_WORLD);
404: PetscSFScatterBegin(sf,MPIU_INT,indata,outdata);
405: PetscSFScatterEnd(sf,MPIU_INT,indata,outdata);
406: PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Scattered data at leaves\n");
407: PetscIntView(nleavesalloc,outdata,PETSC_VIEWER_STDOUT_WORLD);
408: PetscFree2(indata,outdata);
409: }
411: if (test_embed) {
412: const PetscInt nroots = 1 + (PetscInt) (rank == 0);
413: PetscInt selected[2];
414: PetscSF esf;
416: selected[0] = stride;
417: selected[1] = 2*stride;
418: PetscSFCreateEmbeddedRootSF(sf,nroots,selected,&esf);
419: PetscSFSetUp(esf);
420: PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Embedded PetscSF\n");
421: PetscViewerPushFormat(PETSC_VIEWER_STDOUT_WORLD,PETSC_VIEWER_ASCII_INFO_DETAIL);
422: PetscSFView(esf,PETSC_VIEWER_STDOUT_WORLD);
423: PetscViewerPopFormat(PETSC_VIEWER_STDOUT_WORLD);
424: PetscSFDestroy(&esf);
425: }
427: if (test_invert) {
428: const PetscInt *degree;
429: PetscInt *mRootsOrigNumbering;
430: PetscInt inedges;
431: PetscSF msf,imsf;
433: PetscSFGetMultiSF(sf,&msf);
434: PetscSFCreateInverseSF(msf,&imsf);
435: PetscSFSetUp(msf);
436: PetscSFSetUp(imsf);
437: PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Multi-SF\n");
438: PetscSFView(msf,PETSC_VIEWER_STDOUT_WORLD);
439: PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Multi-SF roots indices in original SF roots numbering\n");
440: PetscSFComputeDegreeBegin(sf,°ree);
441: PetscSFComputeDegreeEnd(sf,°ree);
442: PetscSFComputeMultiRootOriginalNumbering(sf,degree,&inedges,&mRootsOrigNumbering);
443: PetscIntView(inedges,mRootsOrigNumbering,PETSC_VIEWER_STDOUT_WORLD);
444: PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Inverse of Multi-SF\n");
445: PetscSFView(imsf,PETSC_VIEWER_STDOUT_WORLD);
446: PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Inverse of Multi-SF, original numbering\n");
447: PetscSFViewCustomLocals_Private(imsf,mRootsOrigNumbering,PETSC_VIEWER_STDOUT_WORLD);
448: PetscSFDestroy(&imsf);
449: PetscFree(mRootsOrigNumbering);
450: }
452: /* Clean storage for star forest. */
453: PetscSFDestroy(&sf);
454: PetscFinalize();
455: return 0;
456: }
458: /*TEST
460: test:
461: nsize: 4
462: filter: grep -v "type" | grep -v "sort"
463: args: -test_bcast -sf_type window -sf_window_sync {{fence active lock}} -sf_window_flavor {{create dynamic allocate}}
464: requires: defined(PETSC_HAVE_MPI_ONE_SIDED) defined(PETSC_HAVE_MPI_FEATURE_DYNAMIC_WINDOW)
466: test:
467: suffix: 2
468: nsize: 4
469: filter: grep -v "type" | grep -v "sort"
470: args: -test_reduce -sf_type window -sf_window_sync {{fence active lock}} -sf_window_flavor {{create dynamic allocate}}
471: requires: defined(PETSC_HAVE_MPI_ONE_SIDED) defined(PETSC_HAVE_MPI_FEATURE_DYNAMIC_WINDOW)
473: test:
474: suffix: 2_basic
475: nsize: 4
476: args: -test_reduce -sf_type basic
478: test:
479: suffix: 3
480: nsize: 4
481: filter: grep -v "type" | grep -v "sort"
482: args: -test_degree -sf_type window -sf_window_sync {{fence active lock}} -sf_window_flavor {{create dynamic allocate}}
483: requires: defined(PETSC_HAVE_MPI_ONE_SIDED) defined(PETSC_HAVE_MPI_FEATURE_DYNAMIC_WINDOW)
485: test:
486: suffix: 3_basic
487: nsize: 4
488: args: -test_degree -sf_type basic
490: test:
491: suffix: 4
492: nsize: 4
493: filter: grep -v "type" | grep -v "sort"
494: args: -test_gather -sf_type window -sf_window_sync {{fence active lock}} -sf_window_flavor {{create dynamic allocate}}
495: requires: defined(PETSC_HAVE_MPI_ONE_SIDED) defined(PETSC_HAVE_MPI_FEATURE_DYNAMIC_WINDOW)
497: test:
498: suffix: 4_basic
499: nsize: 4
500: args: -test_gather -sf_type basic
502: test:
503: suffix: 4_stride
504: nsize: 4
505: args: -test_gather -sf_type basic -stride 2
507: test:
508: suffix: 5
509: nsize: 4
510: filter: grep -v "type" | grep -v "sort"
511: args: -test_scatter -sf_type window -sf_window_sync {{fence active lock}} -sf_window_flavor {{create dynamic allocate}}
512: requires: defined(PETSC_HAVE_MPI_ONE_SIDED) defined(PETSC_HAVE_MPI_FEATURE_DYNAMIC_WINDOW)
514: test:
515: suffix: 5_basic
516: nsize: 4
517: args: -test_scatter -sf_type basic
519: test:
520: suffix: 5_stride
521: nsize: 4
522: args: -test_scatter -sf_type basic -stride 2
524: test:
525: suffix: 6
526: nsize: 4
527: filter: grep -v "type" | grep -v "sort"
528: # No -sf_window_flavor dynamic due to bug https://gitlab.com/petsc/petsc/issues/555
529: args: -test_embed -sf_type window -sf_window_sync {{fence active lock}} -sf_window_flavor {{create allocate}}
530: requires: defined(PETSC_HAVE_MPI_ONE_SIDED) defined(PETSC_HAVE_MPI_FEATURE_DYNAMIC_WINDOW)
532: test:
533: suffix: 6_basic
534: nsize: 4
535: args: -test_embed -sf_type basic
537: test:
538: suffix: 7
539: nsize: 4
540: filter: grep -v "type" | grep -v "sort"
541: args: -test_invert -sf_type window -sf_window_sync {{fence active lock}} -sf_window_flavor {{create dynamic allocate}}
542: requires: defined(PETSC_HAVE_MPI_ONE_SIDED) defined(PETSC_HAVE_MPI_FEATURE_DYNAMIC_WINDOW)
544: test:
545: suffix: 7_basic
546: nsize: 4
547: args: -test_invert -sf_type basic
549: test:
550: suffix: basic
551: nsize: 4
552: args: -test_bcast -sf_type basic
553: output_file: output/ex1_1_basic.out
555: test:
556: suffix: bcastop_basic
557: nsize: 4
558: args: -test_bcastop -sf_type basic
559: output_file: output/ex1_bcastop_basic.out
561: test:
562: suffix: 8
563: nsize: 3
564: filter: grep -v "type" | grep -v "sort"
565: args: -test_bcast -test_sf_distribute -sf_type window -sf_window_sync {{fence active lock}} -sf_window_flavor {{create dynamic allocate}}
566: requires: defined(PETSC_HAVE_MPI_ONE_SIDED) defined(PETSC_HAVE_MPI_FEATURE_DYNAMIC_WINDOW)
568: test:
569: suffix: 8_basic
570: nsize: 3
571: args: -test_bcast -test_sf_distribute -sf_type basic
573: test:
574: suffix: 9_char
575: nsize: 4
576: args: -sf_type basic -test_bcast -test_reduce -test_op max -test_char
578: # Here we do not test -sf_window_flavor dynamic since it is designed for repeated SFs with few different rootdata pointers
579: test:
580: suffix: 10
581: filter: grep -v "type" | grep -v "sort"
582: nsize: 4
583: args: -sf_type window -sf_window_sync {{fence active lock}} -sf_window_flavor {{create allocate}} -test_all -test_bcastop 0 -test_fetchandop 0
584: requires: defined(PETSC_HAVE_MPI_ONE_SIDED) defined(PETSC_HAVE_MPI_FEATURE_DYNAMIC_WINDOW)
586: # The nightly test suite with MPICH uses ch3:sock, which is broken when winsize == 0 in some of the processes
587: test:
588: suffix: 10_shared
589: output_file: output/ex1_10.out
590: filter: grep -v "type" | grep -v "sort"
591: nsize: 4
592: args: -sf_type window -sf_window_sync {{fence active lock}} -sf_window_flavor shared -test_all -test_bcastop 0 -test_fetchandop 0
593: requires: defined(PETSC_HAVE_MPI_PROCESS_SHARED_MEMORY) !defined(PETSC_HAVE_MPICH_NUMVERSION) defined(PETSC_HAVE_MPI_ONE_SIDED) defined(PETSC_HAVE_MPI_FEATURE_DYNAMIC_WINDOW)
595: test:
596: suffix: 10_basic
597: nsize: 4
598: args: -sf_type basic -test_all -test_bcastop 0 -test_fetchandop 0
600: TEST*/