Actual source code: ex1.c

  1: static const char help[] = "Test star forest communication (PetscSF)\n\n";

  3: /*
  4:     Description: A star is a simple tree with one root and zero or more leaves.
  5:     A star forest is a union of disjoint stars.
  6:     Many common communication patterns can be expressed as updates of rootdata using leafdata and vice-versa.
  7:     This example creates a star forest, communicates values using the graph (see options for types of communication), views the graph, then destroys it.
  8: */

 10: /*
 11:   Include petscsf.h so we can use PetscSF objects. Note that this automatically
 12:   includes petscsys.h.
 13: */
 14: #include <petscsf.h>
 15: #include <petscviewer.h>

 17: /* like PetscSFView() but with alternative array of local indices */
 18: static PetscErrorCode PetscSFViewCustomLocals_Private(PetscSF sf,const PetscInt locals[],PetscViewer viewer)
 19: {
 20:   const PetscSFNode *iremote;
 21:   PetscInt          i,nroots,nleaves,nranks;
 22:   PetscMPIInt       rank;

 25:   MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);
 26:   PetscSFGetGraph(sf,&nroots,&nleaves,NULL,&iremote);
 27:   PetscSFGetRootRanks(sf,&nranks,NULL,NULL,NULL,NULL);
 28:   PetscViewerASCIIPushTab(viewer);
 29:   PetscViewerASCIIPushSynchronized(viewer);
 30:   PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number of roots=%" PetscInt_FMT ", leaves=%" PetscInt_FMT ", remote ranks=%" PetscInt_FMT "\n",rank,nroots,nleaves,nranks);
 31:   for (i=0; i<nleaves; i++) {
 32:     PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %" PetscInt_FMT " <- (%" PetscInt_FMT ",%" PetscInt_FMT ")\n",rank,locals[i],iremote[i].rank,iremote[i].index);
 33:   }
 34:   PetscViewerFlush(viewer);
 35:   PetscViewerASCIIPopTab(viewer);
 36:   PetscViewerASCIIPopSynchronized(viewer);
 37:   return 0;
 38: }

 40: int main(int argc,char **argv)
 41: {
 43:   PetscInt       i,nroots,nrootsalloc,nleaves,nleavesalloc,*mine,stride;
 44:   PetscSFNode    *remote;
 45:   PetscMPIInt    rank,size;
 46:   PetscSF        sf;
 47:   PetscBool      test_all,test_bcast,test_bcastop,test_reduce,test_degree,test_fetchandop,test_gather,test_scatter,test_embed,test_invert,test_sf_distribute,test_char;
 48:   MPI_Op         mop=MPI_OP_NULL; /* initialize to prevent compiler warnings with cxx_quad build */
 49:   char           opstring[256];
 50:   PetscBool      strflg;

 52:   PetscInitialize(&argc,&argv,(char*)0,help);
 53:   MPI_Comm_rank(PETSC_COMM_WORLD,&rank);
 54:   MPI_Comm_size(PETSC_COMM_WORLD,&size);

 56:   PetscOptionsBegin(PETSC_COMM_WORLD,"","PetscSF Test Options","none");
 57:   test_all        = PETSC_FALSE;
 58:   PetscOptionsBool("-test_all","Test all SF communications","",test_all,&test_all,NULL);
 59:   test_bcast      = test_all;
 60:   PetscOptionsBool("-test_bcast","Test broadcast","",test_bcast,&test_bcast,NULL);
 61:   test_bcastop    = test_all;
 62:   PetscOptionsBool("-test_bcastop","Test broadcast and reduce","",test_bcastop,&test_bcastop,NULL);
 63:   test_reduce     = test_all;
 64:   PetscOptionsBool("-test_reduce","Test reduction","",test_reduce,&test_reduce,NULL);
 65:   test_char       = test_all;
 66:   PetscOptionsBool("-test_char","Test signed char, unsigned char, and char","",test_char,&test_char,NULL);
 67:   mop             = MPI_SUM;
 68:   PetscStrcpy(opstring,"sum");
 69:   PetscOptionsString("-test_op","Designate which MPI_Op to use","",opstring,opstring,sizeof(opstring),NULL);
 70:   PetscStrcmp("sum",opstring,&strflg);
 71:   if (strflg) {
 72:     mop = MPIU_SUM;
 73:   }
 74:   PetscStrcmp("prod",opstring,&strflg);
 75:   if (strflg) {
 76:     mop = MPI_PROD;
 77:   }
 78:   PetscStrcmp("max",opstring,&strflg);
 79:   if (strflg) {
 80:     mop = MPI_MAX;
 81:   }
 82:   PetscStrcmp("min",opstring,&strflg);
 83:   if (strflg) {
 84:     mop = MPI_MIN;
 85:   }
 86:   PetscStrcmp("land",opstring,&strflg);
 87:   if (strflg) {
 88:     mop = MPI_LAND;
 89:   }
 90:   PetscStrcmp("band",opstring,&strflg);
 91:   if (strflg) {
 92:     mop = MPI_BAND;
 93:   }
 94:   PetscStrcmp("lor",opstring,&strflg);
 95:   if (strflg) {
 96:     mop = MPI_LOR;
 97:   }
 98:   PetscStrcmp("bor",opstring,&strflg);
 99:   if (strflg) {
100:     mop = MPI_BOR;
101:   }
102:   PetscStrcmp("lxor",opstring,&strflg);
103:   if (strflg) {
104:     mop = MPI_LXOR;
105:   }
106:   PetscStrcmp("bxor",opstring,&strflg);
107:   if (strflg) {
108:     mop = MPI_BXOR;
109:   }
110:   test_degree     = test_all;
111:   PetscOptionsBool("-test_degree","Test computation of vertex degree","",test_degree,&test_degree,NULL);
112:   test_fetchandop = test_all;
113:   PetscOptionsBool("-test_fetchandop","Test atomic Fetch-And-Op","",test_fetchandop,&test_fetchandop,NULL);
114:   test_gather     = test_all;
115:   PetscOptionsBool("-test_gather","Test point gather","",test_gather,&test_gather,NULL);
116:   test_scatter    = test_all;
117:   PetscOptionsBool("-test_scatter","Test point scatter","",test_scatter,&test_scatter,NULL);
118:   test_embed      = test_all;
119:   PetscOptionsBool("-test_embed","Test point embed","",test_embed,&test_embed,NULL);
120:   test_invert     = test_all;
121:   PetscOptionsBool("-test_invert","Test point invert","",test_invert,&test_invert,NULL);
122:   stride          = 1;
123:   PetscOptionsInt("-stride","Stride for leaf and root data","",stride,&stride,NULL);
124:   test_sf_distribute = PETSC_FALSE;
125:   PetscOptionsBool("-test_sf_distribute","Create an SF that 'distributes' to each process, like an alltoall","",test_sf_distribute,&test_sf_distribute,NULL);
126:   PetscOptionsString("-test_op","Designate which MPI_Op to use","",opstring,opstring,sizeof(opstring),NULL);
127:   PetscOptionsEnd();

129:   if (test_sf_distribute) {
130:     nroots = size;
131:     nrootsalloc = size;
132:     nleaves = size;
133:     nleavesalloc = size;
134:     mine = NULL;
135:     PetscMalloc1(nleaves,&remote);
136:     for (i=0; i<size; i++) {
137:       remote[i].rank = i;
138:       remote[i].index = rank;
139:     }
140:   } else {
141:     nroots       = 2 + (PetscInt)(rank == 0);
142:     nrootsalloc  = nroots * stride;
143:     nleaves      = 2 + (PetscInt)(rank > 0);
144:     nleavesalloc = nleaves * stride;
145:     mine         = NULL;
146:     if (stride > 1) {
147:       PetscInt i;

149:       PetscMalloc1(nleaves,&mine);
150:       for (i = 0; i < nleaves; i++) {
151:         mine[i] = stride * i;
152:       }
153:     }
154:     PetscMalloc1(nleaves,&remote);
155:     /* Left periodic neighbor */
156:     remote[0].rank  = (rank+size-1)%size;
157:     remote[0].index = 1 * stride;
158:     /* Right periodic neighbor */
159:     remote[1].rank  = (rank+1)%size;
160:     remote[1].index = 0 * stride;
161:     if (rank > 0) {               /* All processes reference rank 0, index 1 */
162:       remote[2].rank  = 0;
163:       remote[2].index = 2 * stride;
164:     }
165:   }

167:   /* Create a star forest for communication. In this example, the leaf space is dense, so we pass NULL. */
168:   PetscSFCreate(PETSC_COMM_WORLD,&sf);
169:   PetscSFSetFromOptions(sf);
170:   PetscSFSetGraph(sf,nrootsalloc,nleaves,mine,PETSC_OWN_POINTER,remote,PETSC_OWN_POINTER);
171:   PetscSFSetUp(sf);

173:   /* View graph, mostly useful for debugging purposes. */
174:   PetscViewerPushFormat(PETSC_VIEWER_STDOUT_WORLD,PETSC_VIEWER_ASCII_INFO_DETAIL);
175:   PetscSFView(sf,PETSC_VIEWER_STDOUT_WORLD);
176:   PetscViewerPopFormat(PETSC_VIEWER_STDOUT_WORLD);

178:   if (test_bcast) {             /* broadcast rootdata into leafdata */
179:     PetscInt *rootdata,*leafdata;
180:     /* Allocate space for send and receive buffers. This example communicates PetscInt, but other types, including
181:      * user-defined structures, could also be used. */
182:     PetscMalloc2(nrootsalloc,&rootdata,nleavesalloc,&leafdata);
183:     /* Set rootdata buffer to be broadcast */
184:     for (i=0; i<nrootsalloc; i++) rootdata[i] = -1;
185:     for (i=0; i<nroots; i++) rootdata[i*stride] = 100*(rank+1) + i;
186:     /* Initialize local buffer, these values are never used. */
187:     for (i=0; i<nleavesalloc; i++) leafdata[i] = -1;
188:     /* Broadcast entries from rootdata to leafdata. Computation or other communication can be performed between the begin and end calls. */
189:     PetscSFBcastBegin(sf,MPIU_INT,rootdata,leafdata,MPI_REPLACE);
190:     PetscSFBcastEnd(sf,MPIU_INT,rootdata,leafdata,MPI_REPLACE);
191:     PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Bcast Rootdata\n");
192:     PetscIntView(nrootsalloc,rootdata,PETSC_VIEWER_STDOUT_WORLD);
193:     PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Bcast Leafdata\n");
194:     PetscIntView(nleavesalloc,leafdata,PETSC_VIEWER_STDOUT_WORLD);
195:     PetscFree2(rootdata,leafdata);
196:   }

198:   if (test_bcast && test_char) { /* Bcast with char */
199:     PetscInt len;
200:     char buf[256];
201:     char *rootdata,*leafdata;
202:     PetscMalloc2(nrootsalloc,&rootdata,nleavesalloc,&leafdata);
203:     /* Set rootdata buffer to be broadcast */
204:     for (i=0; i<nrootsalloc; i++) rootdata[i] = '*';
205:     for (i=0; i<nroots; i++) rootdata[i*stride] = 'A' + rank*3 + i; /* rank is very small, so it is fine to compute a char */
206:     /* Initialize local buffer, these values are never used. */
207:     for (i=0; i<nleavesalloc; i++) leafdata[i] = '?';

209:     PetscSFBcastBegin(sf,MPI_CHAR,rootdata,leafdata,MPI_REPLACE);
210:     PetscSFBcastEnd(sf,MPI_CHAR,rootdata,leafdata,MPI_REPLACE);

212:     PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Bcast Rootdata in type of char\n");
213:     len  = 0; PetscSNPrintf(buf,256,"%4d:",rank); len += 5;
214:     for (i=0; i<nrootsalloc; i++) {PetscSNPrintf(buf+len,256-len,"%5c",rootdata[i]); len += 5;}
215:     PetscSynchronizedPrintf(PETSC_COMM_WORLD,"%s\n",buf);
216:     PetscSynchronizedFlush(PETSC_COMM_WORLD,PETSC_STDOUT);

218:     PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Bcast Leafdata in type of char\n");
219:     len = 0; PetscSNPrintf(buf,256,"%4d:",rank); len += 5;
220:     for (i=0; i<nleavesalloc; i++) {PetscSNPrintf(buf+len,256-len,"%5c",leafdata[i]); len += 5;}
221:     PetscSynchronizedPrintf(PETSC_COMM_WORLD,"%s\n",buf);
222:     PetscSynchronizedFlush(PETSC_COMM_WORLD,PETSC_STDOUT);

224:     PetscFree2(rootdata,leafdata);
225:   }

227:   if (test_bcastop) {         /* Reduce rootdata into leafdata */
228:     PetscInt *rootdata,*leafdata;
229:     /* Allocate space for send and receive buffers. This example communicates PetscInt, but other types, including
230:      * user-defined structures, could also be used. */
231:     PetscMalloc2(nrootsalloc,&rootdata,nleavesalloc,&leafdata);
232:     /* Set rootdata buffer to be broadcast */
233:     for (i=0; i<nrootsalloc; i++) rootdata[i] = -1;
234:     for (i=0; i<nroots; i++) rootdata[i*stride] = 100*(rank+1) + i;
235:     /* Set leaf values to reduce with */
236:     for (i=0; i<nleavesalloc; i++) leafdata[i] = -10*(rank+1) - i;
237:     PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Pre-BcastAndOp Leafdata\n");
238:     PetscIntView(nleavesalloc,leafdata,PETSC_VIEWER_STDOUT_WORLD);
239:     /* Broadcast entries from rootdata to leafdata. Computation or other communication can be performed between the begin and end calls. */
240:     PetscSFBcastBegin(sf,MPIU_INT,rootdata,leafdata,mop);
241:     PetscSFBcastEnd(sf,MPIU_INT,rootdata,leafdata,mop);
242:     PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## BcastAndOp Rootdata\n");
243:     PetscIntView(nrootsalloc,rootdata,PETSC_VIEWER_STDOUT_WORLD);
244:     PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## BcastAndOp Leafdata\n");
245:     PetscIntView(nleavesalloc,leafdata,PETSC_VIEWER_STDOUT_WORLD);
246:     PetscFree2(rootdata,leafdata);
247:   }

249:   if (test_reduce) {            /* Reduce leafdata into rootdata */
250:     PetscInt *rootdata,*leafdata;
251:     PetscMalloc2(nrootsalloc,&rootdata,nleavesalloc,&leafdata);
252:     /* Initialize rootdata buffer in which the result of the reduction will appear. */
253:     for (i=0; i<nrootsalloc; i++) rootdata[i] = -1;
254:     for (i=0; i<nroots; i++) rootdata[i*stride] = 100*(rank+1) + i;
255:     /* Set leaf values to reduce. */
256:     for (i=0; i<nleavesalloc; i++) leafdata[i] = -1;
257:     for (i=0; i<nleaves; i++) leafdata[i*stride] = 1000*(rank+1) + 10*i;
258:     PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Pre-Reduce Rootdata\n");
259:     PetscIntView(nrootsalloc,rootdata,PETSC_VIEWER_STDOUT_WORLD);
260:     /* Perform reduction. Computation or other communication can be performed between the begin and end calls.
261:      * This example sums the values, but other MPI_Ops can be used (e.g MPI_MAX, MPI_PROD). */
262:     PetscSFReduceBegin(sf,MPIU_INT,leafdata,rootdata,mop);
263:     PetscSFReduceEnd(sf,MPIU_INT,leafdata,rootdata,mop);
264:     PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Reduce Leafdata\n");
265:     PetscIntView(nleavesalloc,leafdata,PETSC_VIEWER_STDOUT_WORLD);
266:     PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Reduce Rootdata\n");
267:     PetscIntView(nrootsalloc,rootdata,PETSC_VIEWER_STDOUT_WORLD);
268:     PetscFree2(rootdata,leafdata);
269:   }

271:   if (test_reduce && test_char) { /* Reduce with signed char */
272:     PetscInt len;
273:     char buf[256];
274:     signed char *rootdata,*leafdata;
275:     PetscMalloc2(nrootsalloc,&rootdata,nleavesalloc,&leafdata);
276:     /* Initialize rootdata buffer in which the result of the reduction will appear. */
277:     for (i=0; i<nrootsalloc; i++) rootdata[i] = -1;
278:     for (i=0; i<nroots; i++) rootdata[i*stride] = 10*(rank+1) + i;
279:     /* Set leaf values to reduce. */
280:     for (i=0; i<nleavesalloc; i++) leafdata[i] = -1;
281:     for (i=0; i<nleaves; i++) leafdata[i*stride] = 50*(rank+1) + 10*i;
282:     PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Pre-Reduce Rootdata in type of signed char\n");

284:     len = 0; PetscSNPrintf(buf,256,"%4d:",rank); len += 5;
285:     for (i=0; i<nrootsalloc; i++) {PetscSNPrintf(buf+len,256-len,"%5d",rootdata[i]); len += 5;}
286:     PetscSynchronizedPrintf(PETSC_COMM_WORLD,"%s\n",buf);
287:     PetscSynchronizedFlush(PETSC_COMM_WORLD,PETSC_STDOUT);

289:     /* Using MPI_CHAR should trigger an error since MPI standard does not support reduction on MPI_CHAR.
290:        Testing with -test_op max, one can see the sign does take effect in MPI_MAX.
291:      */
292:     PetscSFReduceBegin(sf,MPI_SIGNED_CHAR,leafdata,rootdata,mop);
293:     PetscSFReduceEnd(sf,MPI_SIGNED_CHAR,leafdata,rootdata,mop);

295:     PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Reduce Leafdata in type of signed char\n");
296:     len  = 0; PetscSNPrintf(buf,256,"%4d:",rank); len += 5;
297:     for (i=0; i<nleavesalloc; i++) {PetscSNPrintf(buf+len,256-len,"%5d",leafdata[i]); len += 5;}
298:     PetscSynchronizedPrintf(PETSC_COMM_WORLD,"%s\n",buf);
299:     PetscSynchronizedFlush(PETSC_COMM_WORLD,PETSC_STDOUT);

301:     PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Reduce Rootdata in type of signed char\n");
302:     len = 0; PetscSNPrintf(buf,256,"%4d:",rank); len += 5;
303:     for (i=0; i<nrootsalloc; i++) {PetscSNPrintf(buf+len,256-len,"%5d",rootdata[i]); len += 5;}
304:     PetscSynchronizedPrintf(PETSC_COMM_WORLD,"%s\n",buf);
305:     PetscSynchronizedFlush(PETSC_COMM_WORLD,PETSC_STDOUT);

307:     PetscFree2(rootdata,leafdata);
308:   }

310:   if (test_reduce && test_char) { /* Reduce with unsigned char */
311:     PetscInt len;
312:     char buf[256];
313:     unsigned char *rootdata,*leafdata;
314:     PetscMalloc2(nrootsalloc,&rootdata,nleavesalloc,&leafdata);
315:     /* Initialize rootdata buffer in which the result of the reduction will appear. */
316:     for (i=0; i<nrootsalloc; i++) rootdata[i] = 0;
317:     for (i=0; i<nroots; i++) rootdata[i*stride] = 10*(rank+1) + i;
318:     /* Set leaf values to reduce. */
319:     for (i=0; i<nleavesalloc; i++) leafdata[i] = 0;
320:     for (i=0; i<nleaves; i++) leafdata[i*stride] = 50*(rank+1) + 10*i;
321:     PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Pre-Reduce Rootdata in type of unsigned char\n");

323:     len = 0; PetscSNPrintf(buf,256,"%4d:",rank); len += 5;
324:     for (i=0; i<nrootsalloc; i++) {PetscSNPrintf(buf+len,256-len,"%5u",rootdata[i]); len += 5;}
325:     PetscSynchronizedPrintf(PETSC_COMM_WORLD,"%s\n",buf);
326:     PetscSynchronizedFlush(PETSC_COMM_WORLD,PETSC_STDOUT);

328:     /* Using MPI_CHAR should trigger an error since MPI standard does not support reduction on MPI_CHAR.
329:        Testing with -test_op max, one can see the sign does take effect in MPI_MAX.
330:      */
331:     PetscSFReduceBegin(sf,MPI_UNSIGNED_CHAR,leafdata,rootdata,mop);
332:     PetscSFReduceEnd(sf,MPI_UNSIGNED_CHAR,leafdata,rootdata,mop);

334:     PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Reduce Leafdata in type of unsigned char\n");
335:     len  = 0; PetscSNPrintf(buf,256,"%4d:",rank); len += 5;
336:     for (i=0; i<nleavesalloc; i++) {PetscSNPrintf(buf+len,256-len,"%5u",leafdata[i]); len += 5;}
337:     PetscSynchronizedPrintf(PETSC_COMM_WORLD,"%s\n",buf);
338:     PetscSynchronizedFlush(PETSC_COMM_WORLD,PETSC_STDOUT);

340:     PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Reduce Rootdata in type of unsigned char\n");
341:     len = 0; PetscSNPrintf(buf,256,"%4d:",rank); len += 5;
342:     for (i=0; i<nrootsalloc; i++) {PetscSNPrintf(buf+len,256-len,"%5u",rootdata[i]); len += 5;}
343:     PetscSynchronizedPrintf(PETSC_COMM_WORLD,"%s\n",buf);
344:     PetscSynchronizedFlush(PETSC_COMM_WORLD,PETSC_STDOUT);

346:     PetscFree2(rootdata,leafdata);
347:   }

349:   if (test_degree) {
350:     const PetscInt *degree;
351:     PetscSFComputeDegreeBegin(sf,&degree);
352:     PetscSFComputeDegreeEnd(sf,&degree);
353:     PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Root degrees\n");
354:     PetscIntView(nrootsalloc,degree,PETSC_VIEWER_STDOUT_WORLD);
355:   }

357:   if (test_fetchandop) {
358:     /* Cannot use text compare here because token ordering is not deterministic */
359:     PetscInt *leafdata,*leafupdate,*rootdata;
360:     PetscMalloc3(nleavesalloc,&leafdata,nleavesalloc,&leafupdate,nrootsalloc,&rootdata);
361:     for (i=0; i<nleavesalloc; i++) leafdata[i] = -1;
362:     for (i=0; i<nleaves; i++) leafdata[i*stride] = 1;
363:     for (i=0; i<nrootsalloc; i++) rootdata[i] = -1;
364:     for (i=0; i<nroots; i++) rootdata[i*stride] = 0;
365:     PetscSFFetchAndOpBegin(sf,MPIU_INT,rootdata,leafdata,leafupdate,mop);
366:     PetscSFFetchAndOpEnd(sf,MPIU_INT,rootdata,leafdata,leafupdate,mop);
367:     PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Rootdata (sum of 1 from each leaf)\n");
368:     PetscIntView(nrootsalloc,rootdata,PETSC_VIEWER_STDOUT_WORLD);
369:     PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Leafupdate (value at roots prior to my atomic update)\n");
370:     PetscIntView(nleavesalloc,leafupdate,PETSC_VIEWER_STDOUT_WORLD);
371:     PetscFree3(leafdata,leafupdate,rootdata);
372:   }

374:   if (test_gather) {
375:     const PetscInt *degree;
376:     PetscInt       inedges,*indata,*outdata;
377:     PetscSFComputeDegreeBegin(sf,&degree);
378:     PetscSFComputeDegreeEnd(sf,&degree);
379:     for (i=0,inedges=0; i<nrootsalloc; i++) inedges += degree[i];
380:     PetscMalloc2(inedges,&indata,nleavesalloc,&outdata);
381:     for (i=0; i<nleavesalloc; i++) outdata[i] = -1;
382:     for (i=0; i<nleaves; i++) outdata[i*stride] = 1000*(rank+1) + i;
383:     PetscSFGatherBegin(sf,MPIU_INT,outdata,indata);
384:     PetscSFGatherEnd(sf,MPIU_INT,outdata,indata);
385:     PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Gathered data at multi-roots from leaves\n");
386:     PetscIntView(inedges,indata,PETSC_VIEWER_STDOUT_WORLD);
387:     PetscFree2(indata,outdata);
388:   }

390:   if (test_scatter) {
391:     const PetscInt *degree;
392:     PetscInt       j,count,inedges,*indata,*outdata;
393:     PetscSFComputeDegreeBegin(sf,&degree);
394:     PetscSFComputeDegreeEnd(sf,&degree);
395:     for (i=0,inedges=0; i<nrootsalloc; i++) inedges += degree[i];
396:     PetscMalloc2(inedges,&indata,nleavesalloc,&outdata);
397:     for (i=0; i<nleavesalloc; i++) outdata[i] = -1;
398:     for (i=0,count=0; i<nrootsalloc; i++) {
399:       for (j=0; j<degree[i]; j++) indata[count++] = 1000*(rank+1) + 100*i + j;
400:     }
401:     PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Data at multi-roots, to scatter to leaves\n");
402:     PetscIntView(inedges,indata,PETSC_VIEWER_STDOUT_WORLD);

404:     PetscSFScatterBegin(sf,MPIU_INT,indata,outdata);
405:     PetscSFScatterEnd(sf,MPIU_INT,indata,outdata);
406:     PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Scattered data at leaves\n");
407:     PetscIntView(nleavesalloc,outdata,PETSC_VIEWER_STDOUT_WORLD);
408:     PetscFree2(indata,outdata);
409:   }

411:   if (test_embed) {
412:     const PetscInt nroots = 1 + (PetscInt) (rank == 0);
413:     PetscInt       selected[2];
414:     PetscSF        esf;

416:     selected[0] = stride;
417:     selected[1] = 2*stride;
418:     PetscSFCreateEmbeddedRootSF(sf,nroots,selected,&esf);
419:     PetscSFSetUp(esf);
420:     PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Embedded PetscSF\n");
421:     PetscViewerPushFormat(PETSC_VIEWER_STDOUT_WORLD,PETSC_VIEWER_ASCII_INFO_DETAIL);
422:     PetscSFView(esf,PETSC_VIEWER_STDOUT_WORLD);
423:     PetscViewerPopFormat(PETSC_VIEWER_STDOUT_WORLD);
424:     PetscSFDestroy(&esf);
425:   }

427:   if (test_invert) {
428:     const PetscInt *degree;
429:     PetscInt *mRootsOrigNumbering;
430:     PetscInt inedges;
431:     PetscSF msf,imsf;

433:     PetscSFGetMultiSF(sf,&msf);
434:     PetscSFCreateInverseSF(msf,&imsf);
435:     PetscSFSetUp(msf);
436:     PetscSFSetUp(imsf);
437:     PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Multi-SF\n");
438:     PetscSFView(msf,PETSC_VIEWER_STDOUT_WORLD);
439:     PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Multi-SF roots indices in original SF roots numbering\n");
440:     PetscSFComputeDegreeBegin(sf,&degree);
441:     PetscSFComputeDegreeEnd(sf,&degree);
442:     PetscSFComputeMultiRootOriginalNumbering(sf,degree,&inedges,&mRootsOrigNumbering);
443:     PetscIntView(inedges,mRootsOrigNumbering,PETSC_VIEWER_STDOUT_WORLD);
444:     PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Inverse of Multi-SF\n");
445:     PetscSFView(imsf,PETSC_VIEWER_STDOUT_WORLD);
446:     PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Inverse of Multi-SF, original numbering\n");
447:     PetscSFViewCustomLocals_Private(imsf,mRootsOrigNumbering,PETSC_VIEWER_STDOUT_WORLD);
448:     PetscSFDestroy(&imsf);
449:     PetscFree(mRootsOrigNumbering);
450:   }

452:   /* Clean storage for star forest. */
453:   PetscSFDestroy(&sf);
454:   PetscFinalize();
455:   return 0;
456: }

458: /*TEST

460:    test:
461:       nsize: 4
462:       filter: grep -v "type" | grep -v "sort"
463:       args: -test_bcast -sf_type window -sf_window_sync {{fence active lock}} -sf_window_flavor {{create dynamic allocate}}
464:       requires: defined(PETSC_HAVE_MPI_ONE_SIDED) defined(PETSC_HAVE_MPI_FEATURE_DYNAMIC_WINDOW)

466:    test:
467:       suffix: 2
468:       nsize: 4
469:       filter: grep -v "type" | grep -v "sort"
470:       args: -test_reduce -sf_type window -sf_window_sync {{fence active lock}} -sf_window_flavor {{create dynamic allocate}}
471:       requires: defined(PETSC_HAVE_MPI_ONE_SIDED) defined(PETSC_HAVE_MPI_FEATURE_DYNAMIC_WINDOW)

473:    test:
474:       suffix: 2_basic
475:       nsize: 4
476:       args: -test_reduce -sf_type basic

478:    test:
479:       suffix: 3
480:       nsize: 4
481:       filter: grep -v "type" | grep -v "sort"
482:       args: -test_degree -sf_type window -sf_window_sync {{fence active lock}} -sf_window_flavor {{create dynamic allocate}}
483:       requires: defined(PETSC_HAVE_MPI_ONE_SIDED) defined(PETSC_HAVE_MPI_FEATURE_DYNAMIC_WINDOW)

485:    test:
486:       suffix: 3_basic
487:       nsize: 4
488:       args: -test_degree -sf_type basic

490:    test:
491:       suffix: 4
492:       nsize: 4
493:       filter: grep -v "type" | grep -v "sort"
494:       args: -test_gather -sf_type window -sf_window_sync {{fence active lock}} -sf_window_flavor {{create dynamic allocate}}
495:       requires: defined(PETSC_HAVE_MPI_ONE_SIDED) defined(PETSC_HAVE_MPI_FEATURE_DYNAMIC_WINDOW)

497:    test:
498:       suffix: 4_basic
499:       nsize: 4
500:       args: -test_gather -sf_type basic

502:    test:
503:       suffix: 4_stride
504:       nsize: 4
505:       args: -test_gather -sf_type basic -stride 2

507:    test:
508:       suffix: 5
509:       nsize: 4
510:       filter: grep -v "type" | grep -v "sort"
511:       args: -test_scatter -sf_type window -sf_window_sync {{fence active lock}} -sf_window_flavor {{create dynamic allocate}}
512:       requires: defined(PETSC_HAVE_MPI_ONE_SIDED) defined(PETSC_HAVE_MPI_FEATURE_DYNAMIC_WINDOW)

514:    test:
515:       suffix: 5_basic
516:       nsize: 4
517:       args: -test_scatter -sf_type basic

519:    test:
520:       suffix: 5_stride
521:       nsize: 4
522:       args: -test_scatter -sf_type basic -stride 2

524:    test:
525:       suffix: 6
526:       nsize: 4
527:       filter: grep -v "type" | grep -v "sort"
528:       # No -sf_window_flavor dynamic due to bug https://gitlab.com/petsc/petsc/issues/555
529:       args: -test_embed -sf_type window -sf_window_sync {{fence active lock}} -sf_window_flavor {{create allocate}}
530:       requires: defined(PETSC_HAVE_MPI_ONE_SIDED) defined(PETSC_HAVE_MPI_FEATURE_DYNAMIC_WINDOW)

532:    test:
533:       suffix: 6_basic
534:       nsize: 4
535:       args: -test_embed -sf_type basic

537:    test:
538:       suffix: 7
539:       nsize: 4
540:       filter: grep -v "type" | grep -v "sort"
541:       args: -test_invert -sf_type window -sf_window_sync {{fence active lock}} -sf_window_flavor {{create dynamic allocate}}
542:       requires: defined(PETSC_HAVE_MPI_ONE_SIDED) defined(PETSC_HAVE_MPI_FEATURE_DYNAMIC_WINDOW)

544:    test:
545:       suffix: 7_basic
546:       nsize: 4
547:       args: -test_invert -sf_type basic

549:    test:
550:       suffix: basic
551:       nsize: 4
552:       args: -test_bcast -sf_type basic
553:       output_file: output/ex1_1_basic.out

555:    test:
556:       suffix: bcastop_basic
557:       nsize: 4
558:       args: -test_bcastop -sf_type basic
559:       output_file: output/ex1_bcastop_basic.out

561:    test:
562:       suffix: 8
563:       nsize: 3
564:       filter: grep -v "type" | grep -v "sort"
565:       args: -test_bcast -test_sf_distribute -sf_type window -sf_window_sync {{fence active lock}} -sf_window_flavor {{create dynamic allocate}}
566:       requires: defined(PETSC_HAVE_MPI_ONE_SIDED) defined(PETSC_HAVE_MPI_FEATURE_DYNAMIC_WINDOW)

568:    test:
569:       suffix: 8_basic
570:       nsize: 3
571:       args: -test_bcast -test_sf_distribute -sf_type basic

573:    test:
574:       suffix: 9_char
575:       nsize: 4
576:       args: -sf_type basic -test_bcast -test_reduce -test_op max -test_char

578:    # Here we do not test -sf_window_flavor dynamic since it is designed for repeated SFs with few different rootdata pointers
579:    test:
580:       suffix: 10
581:       filter: grep -v "type" | grep -v "sort"
582:       nsize: 4
583:       args: -sf_type window -sf_window_sync {{fence active lock}} -sf_window_flavor {{create allocate}} -test_all -test_bcastop 0 -test_fetchandop 0
584:       requires: defined(PETSC_HAVE_MPI_ONE_SIDED) defined(PETSC_HAVE_MPI_FEATURE_DYNAMIC_WINDOW)

586:    # The nightly test suite with MPICH uses ch3:sock, which is broken when winsize == 0 in some of the processes
587:    test:
588:       suffix: 10_shared
589:       output_file: output/ex1_10.out
590:       filter: grep -v "type" | grep -v "sort"
591:       nsize: 4
592:       args: -sf_type window -sf_window_sync {{fence active lock}} -sf_window_flavor shared -test_all -test_bcastop 0 -test_fetchandop 0
593:       requires: defined(PETSC_HAVE_MPI_PROCESS_SHARED_MEMORY) !defined(PETSC_HAVE_MPICH_NUMVERSION) defined(PETSC_HAVE_MPI_ONE_SIDED) defined(PETSC_HAVE_MPI_FEATURE_DYNAMIC_WINDOW)

595:    test:
596:       suffix: 10_basic
597:       nsize: 4
598:       args: -sf_type basic -test_all -test_bcastop 0 -test_fetchandop 0

600: TEST*/