Actual source code: ex1.c

petsc-3.12.5 2020-03-29
Report Typos and Errors
  1: static const char help[] = "Test star forest communication (PetscSF)\n\n";

  3: /*T
  4:     Description: A star is a simple tree with one root and zero or more leaves.
  5:     A star forest is a union of disjoint stars.
  6:     Many common communication patterns can be expressed as updates of rootdata using leafdata and vice-versa.
  7:     This example creates a star forest, communicates values using the graph (see options for types of communication), views the graph, then destroys it.
  8: T*/

 10: /*
 11:   Include petscsf.h so we can use PetscSF objects. Note that this automatically
 12:   includes petscsys.h.
 13: */
 14:  #include <petscsf.h>
 15:  #include <petscviewer.h>

 17: /* like PetscSFView() but with alternative array of local indices */
 18: static PetscErrorCode PetscSFViewCustomLocals_Private(PetscSF sf,const PetscInt locals[],PetscViewer viewer)
 19: {
 20:   const PetscSFNode *iremote;
 21:   PetscInt          i,nroots,nleaves,nranks;
 22:   PetscMPIInt       rank;
 23:   PetscErrorCode    ierr;

 26:   MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);
 27:   PetscSFGetGraph(sf,&nroots,&nleaves,NULL,&iremote);
 28:   PetscSFGetRootRanks(sf,&nranks,NULL,NULL,NULL,NULL);
 29:   PetscViewerASCIIPushTab(viewer);
 30:   PetscViewerASCIIPushSynchronized(viewer);
 31:   PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number of roots=%D, leaves=%D, remote ranks=%D\n",rank,nroots,nleaves,nranks);
 32:   for (i=0; i<nleaves; i++) {
 33:     PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D <- (%D,%D)\n",rank,locals[i],iremote[i].rank,iremote[i].index);
 34:   }
 35:   PetscViewerFlush(viewer);
 36:   PetscViewerASCIIPopTab(viewer);
 37:   PetscViewerASCIIPopSynchronized(viewer);
 38:   return(0);
 39: }

 41: int main(int argc,char **argv)
 42: {
 44:   PetscInt       i,nroots,nrootsalloc,nleaves,nleavesalloc,*mine,stride;
 45:   PetscSFNode    *remote;
 46:   PetscMPIInt    rank,size;
 47:   PetscSF        sf;
 48:   PetscBool      test_bcast,test_bcastop,test_reduce,test_degree,test_fetchandop,test_gather,test_scatter,test_embed,test_invert,test_sf_distribute,test_char;
 49:   MPI_Op         mop=MPI_OP_NULL; /* initialize to prevent compiler warnings with cxx_quad build */
 50:   char           opstring[256];
 51:   PetscBool      strflg;

 53:   PetscInitialize(&argc,&argv,(char*)0,help);if (ierr) return ierr;
 54:   MPI_Comm_rank(PETSC_COMM_WORLD,&rank);
 55:   MPI_Comm_size(PETSC_COMM_WORLD,&size);

 57:   PetscOptionsBegin(PETSC_COMM_WORLD,"","PetscSF Test Options","none");
 58:   test_bcast      = PETSC_FALSE;
 59:   PetscOptionsBool("-test_bcast","Test broadcast","",test_bcast,&test_bcast,NULL);
 60:   test_bcastop    = PETSC_FALSE;
 61:   PetscOptionsBool("-test_bcastop","Test broadcast and reduce","",test_bcastop,&test_bcastop,NULL);
 62:   test_reduce     = PETSC_FALSE;
 63:   PetscOptionsBool("-test_reduce","Test reduction","",test_reduce,&test_reduce,NULL);
 64:   test_char       = PETSC_FALSE;
 65:   PetscOptionsBool("-test_char","Test signed char, unsigned char, and char","",test_char,&test_char,NULL);
 66:   mop             = MPI_SUM;
 67:   PetscStrcpy(opstring,"sum");
 68:   PetscOptionsString("-test_op","Designate which MPI_Op to use","",opstring,opstring,256,NULL);
 69:   PetscStrcmp("sum",opstring,&strflg);
 70:   if (strflg) {
 71:     mop = MPIU_SUM;
 72:   }
 73:   PetscStrcmp("prod",opstring,&strflg);
 74:   if (strflg) {
 75:     mop = MPI_PROD;
 76:   }
 77:   PetscStrcmp("max",opstring,&strflg);
 78:   if (strflg) {
 79:     mop = MPI_MAX;
 80:   }
 81:   PetscStrcmp("min",opstring,&strflg);
 82:   if (strflg) {
 83:     mop = MPI_MIN;
 84:   }
 85:   PetscStrcmp("land",opstring,&strflg);
 86:   if (strflg) {
 87:     mop = MPI_LAND;
 88:   }
 89:   PetscStrcmp("band",opstring,&strflg);
 90:   if (strflg) {
 91:     mop = MPI_BAND;
 92:   }
 93:   PetscStrcmp("lor",opstring,&strflg);
 94:   if (strflg) {
 95:     mop = MPI_LOR;
 96:   }
 97:   PetscStrcmp("bor",opstring,&strflg);
 98:   if (strflg) {
 99:     mop = MPI_BOR;
100:   }
101:   PetscStrcmp("lxor",opstring,&strflg);
102:   if (strflg) {
103:     mop = MPI_LXOR;
104:   }
105:   PetscStrcmp("bxor",opstring,&strflg);
106:   if (strflg) {
107:     mop = MPI_BXOR;
108:   }
109:   test_degree     = PETSC_FALSE;
110:   PetscOptionsBool("-test_degree","Test computation of vertex degree","",test_degree,&test_degree,NULL);
111:   test_fetchandop = PETSC_FALSE;
112:   PetscOptionsBool("-test_fetchandop","Test atomic Fetch-And-Op","",test_fetchandop,&test_fetchandop,NULL);
113:   test_gather     = PETSC_FALSE;
114:   PetscOptionsBool("-test_gather","Test point gather","",test_gather,&test_gather,NULL);
115:   test_scatter    = PETSC_FALSE;
116:   PetscOptionsBool("-test_scatter","Test point scatter","",test_scatter,&test_scatter,NULL);
117:   test_embed      = PETSC_FALSE;
118:   PetscOptionsBool("-test_embed","Test point embed","",test_embed,&test_embed,NULL);
119:   test_invert     = PETSC_FALSE;
120:   PetscOptionsBool("-test_invert","Test point invert","",test_invert,&test_invert,NULL);
121:   stride          = 1;
122:   PetscOptionsInt("-stride","Stride for leaf and root data","",stride,&stride,NULL);
123:   test_sf_distribute = PETSC_FALSE;
124:   PetscOptionsBool("-test_sf_distribute","Create an SF that 'distributes' to each process, like an alltoall","",test_sf_distribute,&test_sf_distribute,NULL);
125:   PetscOptionsEnd();

127:   if (test_sf_distribute) {
128:     nroots = size;
129:     nrootsalloc = size;
130:     nleaves = size;
131:     nleavesalloc = size;
132:     mine = NULL;
133:     PetscMalloc1(nleaves,&remote);
134:     for (i=0; i<size; i++) {
135:       remote[i].rank = i;
136:       remote[i].index = rank;
137:     }
138:   } else {
139:     nroots       = 2 + (PetscInt)(rank == 0);
140:     nrootsalloc  = nroots * stride;
141:     nleaves      = 2 + (PetscInt)(rank > 0);
142:     nleavesalloc = nleaves * stride;
143:     mine         = NULL;
144:     if (stride > 1) {
145:       PetscInt i;

147:       PetscMalloc1(nleaves,&mine);
148:       for (i = 0; i < nleaves; i++) {
149:         mine[i] = stride * i;
150:       }
151:     }
152:     PetscMalloc1(nleaves,&remote);
153:     /* Left periodic neighbor */
154:     remote[0].rank  = (rank+size-1)%size;
155:     remote[0].index = 1 * stride;
156:     /* Right periodic neighbor */
157:     remote[1].rank  = (rank+1)%size;
158:     remote[1].index = 0 * stride;
159:     if (rank > 0) {               /* All processes reference rank 0, index 1 */
160:       remote[2].rank  = 0;
161:       remote[2].index = 2 * stride;
162:     }
163:   }

165:   /* Create a star forest for communication. In this example, the leaf space is dense, so we pass NULL. */
166:   PetscSFCreate(PETSC_COMM_WORLD,&sf);
167:   PetscSFSetFromOptions(sf);
168:   PetscSFSetGraph(sf,nrootsalloc,nleaves,mine,PETSC_OWN_POINTER,remote,PETSC_OWN_POINTER);
169:   PetscSFSetUp(sf);

171:   /* View graph, mostly useful for debugging purposes. */
172:   PetscViewerPushFormat(PETSC_VIEWER_STDOUT_WORLD,PETSC_VIEWER_ASCII_INFO_DETAIL);
173:   PetscSFView(sf,PETSC_VIEWER_STDOUT_WORLD);
174:   PetscViewerPopFormat(PETSC_VIEWER_STDOUT_WORLD);


177:   if (test_bcast) {             /* broadcast rootdata into leafdata */
178:     PetscInt *rootdata,*leafdata;
179:     /* Allocate space for send and recieve buffers. This example communicates PetscInt, but other types, including
180:      * user-defined structures, could also be used. */
181:     PetscMalloc2(nrootsalloc,&rootdata,nleavesalloc,&leafdata);
182:     /* Set rootdata buffer to be broadcast */
183:     for (i=0; i<nrootsalloc; i++) rootdata[i] = -1;
184:     for (i=0; i<nroots; i++) rootdata[i*stride] = 100*(rank+1) + i;
185:     /* Initialize local buffer, these values are never used. */
186:     for (i=0; i<nleavesalloc; i++) leafdata[i] = -1;
187:     /* Broadcast entries from rootdata to leafdata. Computation or other communication can be performed between the begin and end calls. */
188:     PetscSFBcastBegin(sf,MPIU_INT,rootdata,leafdata);
189:     PetscSFBcastEnd(sf,MPIU_INT,rootdata,leafdata);
190:     PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Bcast Rootdata\n");
191:     PetscIntView(nrootsalloc,rootdata,PETSC_VIEWER_STDOUT_WORLD);
192:     PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Bcast Leafdata\n");
193:     PetscIntView(nleavesalloc,leafdata,PETSC_VIEWER_STDOUT_WORLD);
194:     PetscFree2(rootdata,leafdata);
195:   }

197:   if (test_bcast && test_char) { /* Bcast with char */
198:     PetscInt len;
199:     char buf[256];
200:     char *rootdata,*leafdata;
201:     PetscMalloc2(nrootsalloc,&rootdata,nleavesalloc,&leafdata);
202:     /* Set rootdata buffer to be broadcast */
203:     for (i=0; i<nrootsalloc; i++) rootdata[i] = '*';
204:     for (i=0; i<nroots; i++) rootdata[i*stride] = 'A' + rank*3 + i; /* rank is very small, so it is fine to compute a char */
205:     /* Initialize local buffer, these values are never used. */
206:     for (i=0; i<nleavesalloc; i++) leafdata[i] = '?';

208:     PetscSFBcastBegin(sf,MPI_CHAR,rootdata,leafdata);
209:     PetscSFBcastEnd(sf,MPI_CHAR,rootdata,leafdata);

211:     PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Bcast Rootdata in type of char\n");
212:     len  = 0; PetscSNPrintf(buf,256,"%4d:",rank); len += 5;
213:     for (i=0; i<nrootsalloc; i++) {PetscSNPrintf(buf+len,256-len,"%5c",rootdata[i]); len += 5;}
214:     PetscSynchronizedPrintf(PETSC_COMM_WORLD,"%s\n",buf);
215:     PetscSynchronizedFlush(PETSC_COMM_WORLD,PETSC_STDOUT);

217:     PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Bcast Leafdata in type of char\n");
218:     len = 0; PetscSNPrintf(buf,256,"%4d:",rank); len += 5;
219:     for (i=0; i<nleavesalloc; i++) {PetscSNPrintf(buf+len,256-len,"%5c",leafdata[i]); len += 5;}
220:     PetscSynchronizedPrintf(PETSC_COMM_WORLD,"%s\n",buf);
221:     PetscSynchronizedFlush(PETSC_COMM_WORLD,PETSC_STDOUT);

223:     PetscFree2(rootdata,leafdata);
224:   }

226:   if (test_bcastop) {         /* Reduce rootdata into leafdata */
227:     PetscInt *rootdata,*leafdata;
228:     /* Allocate space for send and recieve buffers. This example communicates PetscInt, but other types, including
229:      * user-defined structures, could also be used. */
230:     PetscMalloc2(nrootsalloc,&rootdata,nleavesalloc,&leafdata);
231:     /* Set rootdata buffer to be broadcast */
232:     for (i=0; i<nrootsalloc; i++) rootdata[i] = -1;
233:     for (i=0; i<nroots; i++) rootdata[i*stride] = 100*(rank+1) + i;
234:     /* Set leaf values to reduce with */
235:     for (i=0; i<nleavesalloc; i++) leafdata[i] = -10*(rank+1) - i;
236:     PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Pre-BcastAndOp Leafdata\n");
237:     PetscIntView(nleavesalloc,leafdata,PETSC_VIEWER_STDOUT_WORLD);
238:     /* Broadcast entries from rootdata to leafdata. Computation or other communication can be performed between the begin and end calls. */
239:     PetscSFBcastAndOpBegin(sf,MPIU_INT,rootdata,leafdata,mop);
240:     PetscSFBcastAndOpEnd(sf,MPIU_INT,rootdata,leafdata,mop);
241:     PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## BcastAndOp Rootdata\n");
242:     PetscIntView(nrootsalloc,rootdata,PETSC_VIEWER_STDOUT_WORLD);
243:     PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## BcastAndOp Leafdata\n");
244:     PetscIntView(nleavesalloc,leafdata,PETSC_VIEWER_STDOUT_WORLD);
245:     PetscFree2(rootdata,leafdata);
246:   }

248:   if (test_reduce) {            /* Reduce leafdata into rootdata */
249:     PetscInt *rootdata,*leafdata;
250:     PetscMalloc2(nrootsalloc,&rootdata,nleavesalloc,&leafdata);
251:     /* Initialize rootdata buffer in which the result of the reduction will appear. */
252:     for (i=0; i<nrootsalloc; i++) rootdata[i] = -1;
253:     for (i=0; i<nroots; i++) rootdata[i*stride] = 100*(rank+1) + i;
254:     /* Set leaf values to reduce. */
255:     for (i=0; i<nleavesalloc; i++) leafdata[i] = -1;
256:     for (i=0; i<nleaves; i++) leafdata[i*stride] = 1000*(rank+1) + 10*i;
257:     PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Pre-Reduce Rootdata\n");
258:     PetscIntView(nrootsalloc,rootdata,PETSC_VIEWER_STDOUT_WORLD);
259:     /* Perform reduction. Computation or other communication can be performed between the begin and end calls.
260:      * This example sums the values, but other MPI_Ops can be used (e.g MPI_MAX, MPI_PROD). */
261:     PetscSFReduceBegin(sf,MPIU_INT,leafdata,rootdata,mop);
262:     PetscSFReduceEnd(sf,MPIU_INT,leafdata,rootdata,mop);
263:     PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Reduce Leafdata\n");
264:     PetscIntView(nleavesalloc,leafdata,PETSC_VIEWER_STDOUT_WORLD);
265:     PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Reduce Rootdata\n");
266:     PetscIntView(nrootsalloc,rootdata,PETSC_VIEWER_STDOUT_WORLD);
267:     PetscFree2(rootdata,leafdata);
268:   }

270:   if (test_reduce && test_char) { /* Reduce with signed char */
271:     PetscInt len;
272:     char buf[256];
273:     signed char *rootdata,*leafdata;
274:     PetscMalloc2(nrootsalloc,&rootdata,nleavesalloc,&leafdata);
275:     /* Initialize rootdata buffer in which the result of the reduction will appear. */
276:     for (i=0; i<nrootsalloc; i++) rootdata[i] = -1;
277:     for (i=0; i<nroots; i++) rootdata[i*stride] = 10*(rank+1) + i;
278:     /* Set leaf values to reduce. */
279:     for (i=0; i<nleavesalloc; i++) leafdata[i] = -1;
280:     for (i=0; i<nleaves; i++) leafdata[i*stride] = 50*(rank+1) + 10*i;
281:     PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Pre-Reduce Rootdata in type of signed char\n");

283:     len = 0; PetscSNPrintf(buf,256,"%4d:",rank); len += 5;
284:     for (i=0; i<nrootsalloc; i++) {PetscSNPrintf(buf+len,256-len,"%5d",rootdata[i]); len += 5;}
285:     PetscSynchronizedPrintf(PETSC_COMM_WORLD,"%s\n",buf);
286:     PetscSynchronizedFlush(PETSC_COMM_WORLD,PETSC_STDOUT);

288:     /* Using MPI_CHAR should trigger an error since MPI standard does not support reduction on MPI_CHAR.
289:        Testing with -test_op max, one can see the sign does take effect in MPI_MAX.
290:      */
291:     PetscSFReduceBegin(sf,MPI_SIGNED_CHAR,leafdata,rootdata,mop);
292:     PetscSFReduceEnd(sf,MPI_SIGNED_CHAR,leafdata,rootdata,mop);

294:     PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Reduce Leafdata in type of signed char\n");
295:     len  = 0; PetscSNPrintf(buf,256,"%4d:",rank); len += 5;
296:     for (i=0; i<nleavesalloc; i++) {PetscSNPrintf(buf+len,256-len,"%5d",leafdata[i]); len += 5;}
297:     PetscSynchronizedPrintf(PETSC_COMM_WORLD,"%s\n",buf);
298:     PetscSynchronizedFlush(PETSC_COMM_WORLD,PETSC_STDOUT);

300:     PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Reduce Rootdata in type of signed char\n");
301:     len = 0; PetscSNPrintf(buf,256,"%4d:",rank); len += 5;
302:     for (i=0; i<nrootsalloc; i++) {PetscSNPrintf(buf+len,256-len,"%5d",rootdata[i]); len += 5;}
303:     PetscSynchronizedPrintf(PETSC_COMM_WORLD,"%s\n",buf);
304:     PetscSynchronizedFlush(PETSC_COMM_WORLD,PETSC_STDOUT);

306:     PetscFree2(rootdata,leafdata);
307:   }

309:   if (test_reduce && test_char) { /* Reduce with unsigned char */
310:     PetscInt len;
311:     char buf[256];
312:     unsigned char *rootdata,*leafdata;
313:     PetscMalloc2(nrootsalloc,&rootdata,nleavesalloc,&leafdata);
314:     /* Initialize rootdata buffer in which the result of the reduction will appear. */
315:     for (i=0; i<nrootsalloc; i++) rootdata[i] = -1;
316:     for (i=0; i<nroots; i++) rootdata[i*stride] = 10*(rank+1) + i;
317:     /* Set leaf values to reduce. */
318:     for (i=0; i<nleavesalloc; i++) leafdata[i] = -1;
319:     for (i=0; i<nleaves; i++) leafdata[i*stride] = 50*(rank+1) + 10*i;
320:     PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Pre-Reduce Rootdata in type of unsigned char\n");

322:     len = 0; PetscSNPrintf(buf,256,"%4d:",rank); len += 5;
323:     for (i=0; i<nrootsalloc; i++) {PetscSNPrintf(buf+len,256-len,"%5u",rootdata[i]); len += 5;}
324:     PetscSynchronizedPrintf(PETSC_COMM_WORLD,"%s\n",buf);
325:     PetscSynchronizedFlush(PETSC_COMM_WORLD,PETSC_STDOUT);

327:     /* Using MPI_CHAR should trigger an error since MPI standard does not support reduction on MPI_CHAR.
328:        Testing with -test_op max, one can see the sign does take effect in MPI_MAX.
329:      */
330:     PetscSFReduceBegin(sf,MPI_UNSIGNED_CHAR,leafdata,rootdata,mop);
331:     PetscSFReduceEnd(sf,MPI_UNSIGNED_CHAR,leafdata,rootdata,mop);

333:     PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Reduce Leafdata in type of unsigned char\n");
334:     len  = 0; PetscSNPrintf(buf,256,"%4d:",rank); len += 5;
335:     for (i=0; i<nleavesalloc; i++) {PetscSNPrintf(buf+len,256-len,"%5u",leafdata[i]); len += 5;}
336:     PetscSynchronizedPrintf(PETSC_COMM_WORLD,"%s\n",buf);
337:     PetscSynchronizedFlush(PETSC_COMM_WORLD,PETSC_STDOUT);

339:     PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Reduce Rootdata in type of unsigned char\n");
340:     len = 0; PetscSNPrintf(buf,256,"%4d:",rank); len += 5;
341:     for (i=0; i<nrootsalloc; i++) {PetscSNPrintf(buf+len,256-len,"%5u",rootdata[i]); len += 5;}
342:     PetscSynchronizedPrintf(PETSC_COMM_WORLD,"%s\n",buf);
343:     PetscSynchronizedFlush(PETSC_COMM_WORLD,PETSC_STDOUT);

345:     PetscFree2(rootdata,leafdata);
346:   }

348:   if (test_degree) {
349:     const PetscInt *degree;
350:     PetscSFComputeDegreeBegin(sf,&degree);
351:     PetscSFComputeDegreeEnd(sf,&degree);
352:     PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Root degrees\n");
353:     PetscIntView(nrootsalloc,degree,PETSC_VIEWER_STDOUT_WORLD);
354:   }

356:   if (test_fetchandop) {
357:     /* Cannot use text compare here because token ordering is not deterministic */
358:     PetscInt *leafdata,*leafupdate,*rootdata;
359:     PetscMalloc3(nleavesalloc,&leafdata,nleavesalloc,&leafupdate,nrootsalloc,&rootdata);
360:     for (i=0; i<nleavesalloc; i++) leafdata[i] = -1;
361:     for (i=0; i<nleaves; i++) leafdata[i*stride] = 1;
362:     for (i=0; i<nrootsalloc; i++) rootdata[i] = -1;
363:     for (i=0; i<nroots; i++) rootdata[i*stride] = 0;
364:     PetscSFFetchAndOpBegin(sf,MPIU_INT,rootdata,leafdata,leafupdate,mop);
365:     PetscSFFetchAndOpEnd(sf,MPIU_INT,rootdata,leafdata,leafupdate,mop);
366:     PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Rootdata (sum of 1 from each leaf)\n");
367:     PetscIntView(nrootsalloc,rootdata,PETSC_VIEWER_STDOUT_WORLD);
368:     PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Leafupdate (value at roots prior to my atomic update)\n");
369:     PetscIntView(nleavesalloc,leafupdate,PETSC_VIEWER_STDOUT_WORLD);
370:     PetscFree3(leafdata,leafupdate,rootdata);
371:   }

373:   if (test_gather) {
374:     const PetscInt *degree;
375:     PetscInt       inedges,*indata,*outdata;
376:     PetscSFComputeDegreeBegin(sf,&degree);
377:     PetscSFComputeDegreeEnd(sf,&degree);
378:     for (i=0,inedges=0; i<nrootsalloc; i++) inedges += degree[i];
379:     PetscMalloc2(inedges,&indata,nleavesalloc,&outdata);
380:     for (i=0; i<nleavesalloc; i++) outdata[i] = -1;
381:     for (i=0; i<nleaves; i++) outdata[i*stride] = 1000*(rank+1) + i;
382:     PetscSFGatherBegin(sf,MPIU_INT,outdata,indata);
383:     PetscSFGatherEnd(sf,MPIU_INT,outdata,indata);
384:     PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Gathered data at multi-roots from leaves\n");
385:     PetscIntView(inedges,indata,PETSC_VIEWER_STDOUT_WORLD);
386:     PetscFree2(indata,outdata);
387:   }

389:   if (test_scatter) {
390:     const PetscInt *degree;
391:     PetscInt       j,count,inedges,*indata,*outdata;
392:     PetscSFComputeDegreeBegin(sf,&degree);
393:     PetscSFComputeDegreeEnd(sf,&degree);
394:     for (i=0,inedges=0; i<nrootsalloc; i++) inedges += degree[i];
395:     PetscMalloc2(inedges,&indata,nleavesalloc,&outdata);
396:     for (i=0; i<nleavesalloc; i++) outdata[i] = -1;
397:     for (i=0,count=0; i<nrootsalloc; i++) {
398:       for (j=0; j<degree[i]; j++) indata[count++] = 1000*(rank+1) + 100*i + j;
399:     }
400:     PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Data at multi-roots, to scatter to leaves\n");
401:     PetscIntView(inedges,indata,PETSC_VIEWER_STDOUT_WORLD);

403:     PetscSFScatterBegin(sf,MPIU_INT,indata,outdata);
404:     PetscSFScatterEnd(sf,MPIU_INT,indata,outdata);
405:     PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Scattered data at leaves\n");
406:     PetscIntView(nleavesalloc,outdata,PETSC_VIEWER_STDOUT_WORLD);
407:     PetscFree2(indata,outdata);
408:   }

410:   if (test_embed) {
411:     const PetscInt nroots = 1 + (PetscInt) !rank;
412:     PetscInt       selected[2];
413:     PetscSF        esf;

415:     selected[0] = stride;
416:     selected[1] = 2*stride;
417:     PetscSFCreateEmbeddedSF(sf,nroots,selected,&esf);
418:     PetscSFSetUp(esf);
419:     PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Embedded PetscSF\n");
420:     PetscViewerPushFormat(PETSC_VIEWER_STDOUT_WORLD,PETSC_VIEWER_ASCII_INFO_DETAIL);
421:     PetscSFView(esf,PETSC_VIEWER_STDOUT_WORLD);
422:     PetscViewerPopFormat(PETSC_VIEWER_STDOUT_WORLD);
423:     PetscSFDestroy(&esf);
424:   }

426:   if (test_invert) {
427:     const PetscInt *degree;
428:     PetscInt *mRootsOrigNumbering;
429:     PetscInt inedges;
430:     PetscSF msf,imsf;

432:     PetscSFGetMultiSF(sf,&msf);
433:     PetscSFCreateInverseSF(msf,&imsf);
434:     PetscSFSetUp(msf);
435:     PetscSFSetUp(imsf);
436:     PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Multi-SF\n");
437:     PetscSFView(msf,PETSC_VIEWER_STDOUT_WORLD);
438:     PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Multi-SF roots indices in original SF roots numbering\n");
439:     PetscSFComputeDegreeBegin(sf,&degree);
440:     PetscSFComputeDegreeEnd(sf,&degree);
441:     PetscSFComputeMultiRootOriginalNumbering(sf,degree,&inedges,&mRootsOrigNumbering);
442:     PetscIntView(inedges,mRootsOrigNumbering,PETSC_VIEWER_STDOUT_WORLD);
443:     PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Inverse of Multi-SF\n");
444:     PetscSFView(imsf,PETSC_VIEWER_STDOUT_WORLD);
445:     PetscViewerASCIIPrintf(PETSC_VIEWER_STDOUT_WORLD,"## Inverse of Multi-SF, original numbering\n");
446:     PetscSFViewCustomLocals_Private(imsf,mRootsOrigNumbering,PETSC_VIEWER_STDOUT_WORLD);
447:     PetscSFDestroy(&imsf);
448:     PetscFree(mRootsOrigNumbering);
449:   }

451:   /* Clean storage for star forest. */
452:   PetscSFDestroy(&sf);
453:   PetscFinalize();
454:   return ierr;
455: }

457: /*TEST

459:    test:
460:       nsize: 4
461:       args: -test_bcast -sf_type window
462:       requires: define(PETSC_HAVE_MPI_WIN_CREATE) define(PETSC_HAVE_MPICH_NUMVERSION)


465:    test:
466:       suffix: 2
467:       nsize: 4
468:       args: -test_reduce -sf_type window
469:       requires: define(PETSC_HAVE_MPI_WIN_CREATE) define(PETSC_HAVE_MPICH_NUMVERSION)

471:    test:
472:       suffix: 2_basic
473:       nsize: 4
474:       args: -test_reduce -sf_type basic

476:    test:
477:       suffix: 3
478:       nsize: 4
479:       args: -test_degree -sf_type window
480:       requires: define(PETSC_HAVE_MPI_WIN_CREATE) define(PETSC_HAVE_MPICH_NUMVERSION)

482:    test:
483:       suffix: 3_basic
484:       nsize: 4
485:       args: -test_degree -sf_type basic

487:    test:
488:       suffix: 4
489:       nsize: 4
490:       args: -test_gather -sf_type window
491:       requires: define(PETSC_HAVE_MPI_WIN_CREATE) define(PETSC_HAVE_MPICH_NUMVERSION)

493:    test:
494:       suffix: 4_basic
495:       nsize: 4
496:       args: -test_gather -sf_type basic

498:    test:
499:       suffix: 4_stride
500:       nsize: 4
501:       args: -test_gather -sf_type basic -stride 2

503:    test:
504:       suffix: 5
505:       nsize: 4
506:       args: -test_scatter -sf_type window
507:       requires: define(PETSC_HAVE_MPI_WIN_CREATE) define(PETSC_HAVE_MPICH_NUMVERSION)

509:    test:
510:       suffix: 5_basic
511:       nsize: 4
512:       args: -test_scatter -sf_type basic

514:    test:
515:       suffix: 5_stride
516:       nsize: 4
517:       args: -test_scatter -sf_type basic -stride 2

519:    test:
520:       suffix: 6
521:       nsize: 4
522:       args: -test_embed -sf_type window
523:       requires: define(PETSC_HAVE_MPI_WIN_CREATE) define(PETSC_HAVE_MPICH_NUMVERSION)

525:    test:
526:       suffix: 6_basic
527:       nsize: 4
528:       args: -test_embed -sf_type basic

530:    test:
531:       suffix: 7
532:       nsize: 4
533:       args: -test_invert -sf_type window
534:       requires: define(PETSC_HAVE_MPI_WIN_CREATE) define(PETSC_HAVE_MPICH_NUMVERSION)

536:    test:
537:       suffix: 7_basic
538:       nsize: 4
539:       args: -test_invert -sf_type basic

541:    test:
542:       suffix: basic
543:       nsize: 4
544:       args: -test_bcast -sf_type basic
545:       output_file: output/ex1_1_basic.out

547:    test:
548:       suffix: bcastop_basic
549:       nsize: 4
550:       args: -test_bcastop -sf_type basic
551:       output_file: output/ex1_bcastop_basic.out

553:    test:
554:       suffix: 8
555:       nsize: 3
556:       args: -test_bcast -test_sf_distribute -sf_type window
557:       requires: define(PETSC_HAVE_MPI_WIN_CREATE)

559:    test:
560:       suffix: 8_basic
561:       nsize: 3
562:       args: -test_bcast -test_sf_distribute -sf_type basic

564:    test:
565:       suffix: 9_char
566:       nsize: 4
567:       args: -sf_type basic -test_bcast -test_reduce -test_op max -test_char
568: TEST*/