Actual source code: vscatsf.c
petsc-3.11.4 2019-09-28
1: #include <petsc/private/vecscatterimpl.h>
2: #include <petsc/private/sfimpl.h>
4: typedef struct {
5: PetscSF sf; /* the whole scatter, including local and remote */
6: PetscSF lsf; /* the local part of the scatter, used for SCATTER_LOCAL */
7: PetscInt bs; /* block size */
8: MPI_Datatype unit; /* one unit = bs PetscScalars */
9: } VecScatter_SF;
11: static PetscErrorCode VecScatterBegin_SF(VecScatter vscat,Vec x,Vec y,InsertMode addv,ScatterMode mode)
12: {
13: VecScatter_SF *data=(VecScatter_SF*)vscat->data;
14: PetscSF sf;
15: MPI_Op mop=MPI_OP_NULL;
19: if (x != y) {VecLockReadPush(x);}
21: {
22: #if defined(PETSC_HAVE_CUDA)
23: PetscBool is_cudatype = PETSC_FALSE;
24: PetscObjectTypeCompareAny((PetscObject)x,&is_cudatype,VECSEQCUDA,VECMPICUDA,VECCUDA,"");
25: if (is_cudatype) {
26: VecCUDAAllocateCheckHost(x);
27: if (x->valid_GPU_array == PETSC_OFFLOAD_GPU) {
28: if (x->spptr && vscat->spptr) {VecCUDACopyFromGPUSome_Public(x,(PetscCUDAIndices)vscat->spptr,mode);}
29: else {VecCUDACopyFromGPU(x);}
30: }
31: vscat->xdata = *((PetscScalar**)x->data);
32: } else
33: #endif
34: {
35: VecGetArrayRead(x,&vscat->xdata);
36: }
37: }
39: if (x != y) {VecGetArray(y,&vscat->ydata);}
40: else vscat->ydata = (PetscScalar *)vscat->xdata;
41: VecLockWriteSet_Private(y,PETSC_TRUE);
43: /* SCATTER_LOCAL indicates ignoring inter-process communication */
44: sf = (mode & SCATTER_LOCAL) ? data->lsf : data->sf;
46: if (addv == INSERT_VALUES) mop = MPI_REPLACE;
47: else if (addv == ADD_VALUES) mop = MPI_SUM;
48: else if (addv == MAX_VALUES) mop = MPI_MAX;
49: else SETERRQ1(PetscObjectComm((PetscObject)sf),PETSC_ERR_SUP,"Unsupported InsertMode %D in VecScatterBegin/End",addv);
51: if (mode & SCATTER_REVERSE) { /* reverse scatter sends root to leaf. Note that x and y are swapped in input */
52: PetscSFBcastAndOpBegin(sf,data->unit,vscat->xdata,vscat->ydata,mop);
53: } else { /* forward scatter sends leaf to root, i.e., x to y */
54: PetscSFReduceBegin(sf,data->unit,vscat->xdata,vscat->ydata,mop);
55: }
56: return(0);
57: }
59: static PetscErrorCode VecScatterEnd_SF(VecScatter vscat,Vec x,Vec y,InsertMode addv,ScatterMode mode)
60: {
61: VecScatter_SF *data=(VecScatter_SF*)vscat->data;
62: PetscSF sf;
63: MPI_Op mop=MPI_OP_NULL;
67: /* SCATTER_LOCAL indicates ignoring inter-process communication */
68: sf = (mode & SCATTER_LOCAL) ? data->lsf : data->sf;
70: if (addv == INSERT_VALUES) mop = MPI_REPLACE;
71: else if (addv == ADD_VALUES) mop = MPI_SUM;
72: else if (addv == MAX_VALUES) mop = MPI_MAX;
73: else SETERRQ1(PetscObjectComm((PetscObject)sf),PETSC_ERR_SUP,"Unsupported InsertMode %D in VecScatterBegin/End",addv);
75: if (mode & SCATTER_REVERSE) {/* reverse scatter sends root to leaf. Note that x and y are swapped in input */
76: PetscSFBcastAndOpEnd(sf,data->unit,vscat->xdata,vscat->ydata,mop);
77: } else { /* forward scatter sends leaf to root, i.e., x to y */
78: PetscSFReduceEnd(sf,data->unit,vscat->xdata,vscat->ydata,mop);
79: }
81: if (x != y) {
82: VecRestoreArrayRead(x,&vscat->xdata);
83: VecLockReadPop(x);
84: }
85: VecRestoreArray(y,&vscat->ydata);
86: VecLockWriteSet_Private(y,PETSC_FALSE);
87: return(0);
88: }
90: static PetscErrorCode VecScatterCopy_SF(VecScatter vscat,VecScatter ctx)
91: {
92: VecScatter_SF *data=(VecScatter_SF*)vscat->data,*out;
96: PetscMemcpy(ctx->ops,vscat->ops,sizeof(vscat->ops));
97: PetscNewLog(ctx,&out);
98: PetscSFDuplicate(data->sf,PETSCSF_DUPLICATE_GRAPH,&out->sf);
99: PetscSFDuplicate(data->lsf,PETSCSF_DUPLICATE_GRAPH,&out->lsf);
100: PetscSFSetUp(out->sf);
101: PetscSFSetUp(out->lsf);
103: out->bs = data->bs;
104: if (out->bs > 1) {
105: MPI_Type_dup(data->unit,&out->unit); /* Since oldtype is committed, so is newtype, according to MPI */
106: } else {
107: out->unit = MPIU_SCALAR;
108: }
109: ctx->data = (void*)out;
110: return(0);
111: }
113: static PetscErrorCode VecScatterDestroy_SF(VecScatter vscat)
114: {
115: VecScatter_SF *data = (VecScatter_SF *)vscat->data;
119: PetscSFDestroy(&data->sf);
120: PetscSFDestroy(&data->lsf);
121: if (data->bs > 1) {MPI_Type_free(&data->unit);}
122: PetscFree(vscat->data);
123: return(0);
124: }
126: static PetscErrorCode VecScatterView_SF(VecScatter vscat,PetscViewer viewer)
127: {
128: VecScatter_SF *data = (VecScatter_SF *)vscat->data;
132: PetscSFView(data->sf,viewer);
133: return(0);
134: }
136: /* VecScatterRemap provides a light way to slightly modify a VecScatter. Suppose the input vscat scatters
137: x[i] to y[j], tomap gives a plan to change vscat to scatter x[tomap[i]] to y[j].
138: */
139: static PetscErrorCode VecScatterRemap_SF(VecScatter vscat,const PetscInt *tomap,const PetscInt *frommap)
140: {
141: VecScatter_SF *data = (VecScatter_SF *)vscat->data;
142: PetscSF sfs[2],sf;
143: PetscInt i,j;
144: PetscBool ident;
148: sfs[0] = data->sf;
149: sfs[1] = data->lsf;
151: if (tomap) {
152: /* check if it is an identity map. If it is, do nothing */
153: ident = PETSC_TRUE;
154: for (i=0; i<data->sf->nleaves; i++) {if (i != tomap[i]) {ident = PETSC_FALSE; break; } }
155: if (ident) return(0);
157: for (j=0; j<2; j++) {
158: sf = sfs[j];
159: PetscSFSetUp(sf); /* to bulid sf->rmine if SetUp is not yet called */
160: if (!sf->mine) { /* the old SF uses contiguous ilocal. After the remapping, it may not be true */
161: PetscMalloc1(sf->nleaves,&sf->mine);
162: PetscMemcpy(sf->mine,tomap,sizeof(PetscInt)*sf->nleaves);
163: sf->mine_alloc = sf->mine;
164: } else {
165: for (i=0; i<sf->nleaves; i++) sf->mine[i] = tomap[sf->mine[i]];
166: }
167: for (i=0; i<sf->roffset[sf->nranks]; i++) sf->rmine[i] = tomap[sf->rmine[i]];
168: }
169: }
171: if (frommap) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unable to remap the FROM in scatters yet");
172: return(0);
173: }
175: static PetscErrorCode VecScatterGetRemoteCount_SF(VecScatter vscat,PetscBool send,PetscInt *num_procs,PetscInt *num_entries)
176: {
177: VecScatter_SF *data = (VecScatter_SF *)vscat->data;
178: PetscSF sf = data->sf;
179: PetscInt nranks,remote_start;
180: PetscMPIInt myrank;
181: const PetscInt *offset;
182: const PetscMPIInt *ranks;
183: PetscErrorCode ierr;
186: MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&myrank);
188: if (send) { PetscSFGetRanks(sf,&nranks,&ranks,&offset,NULL,NULL); }
189: else { PetscSFGetLeafRanks(sf,&nranks,&ranks,&offset,NULL); }
191: if (nranks) {
192: remote_start = (myrank == ranks[0])? 1 : 0;
193: if (num_procs) *num_procs = nranks - remote_start;
194: if (num_entries) *num_entries = offset[nranks] - offset[remote_start];
195: } else {
196: if (num_procs) *num_procs = 0;
197: if (num_entries) *num_entries = 0;
198: }
199: return(0);
200: }
202: static PetscErrorCode VecScatterGetRemote_SF(VecScatter vscat,PetscBool send,PetscInt *n,const PetscInt **starts,const PetscInt **indices,const PetscMPIInt **procs,PetscInt *bs)
203: {
204: VecScatter_SF *data = (VecScatter_SF *)vscat->data;
205: PetscSF sf = data->sf;
206: PetscInt nranks,remote_start;
207: PetscMPIInt myrank;
208: const PetscInt *offset,*location;
209: const PetscMPIInt *ranks;
210: PetscErrorCode ierr;
213: MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&myrank);
215: if (send) { PetscSFGetRanks(sf,&nranks,&ranks,&offset,&location,NULL); }
216: else { PetscSFGetLeafRanks(sf,&nranks,&ranks,&offset,&location); }
218: if (nranks) {
219: remote_start = (myrank == ranks[0])? 1 : 0;
220: if (n) *n = nranks - remote_start;
221: if (starts) *starts = &offset[remote_start];
222: if (indices) *indices = location; /* not &location[offset[remote_start]]. Starts[0] may point to the middle of indices[] */
223: if (procs) *procs = &ranks[remote_start];
224: } else {
225: if (n) *n = 0;
226: if (starts) *starts = NULL;
227: if (indices) *indices = NULL;
228: if (procs) *procs = NULL;
229: }
231: if (bs) *bs = 1;
232: return(0);
233: }
235: static PetscErrorCode VecScatterGetRemoteOrdered_SF(VecScatter vscat,PetscBool send,PetscInt *n,const PetscInt **starts,const PetscInt **indices,const PetscMPIInt **procs,PetscInt *bs)
236: {
240: VecScatterGetRemote_SF(vscat,send,n,starts,indices,procs,bs);
241: return(0);
242: }
244: static PetscErrorCode VecScatterRestoreRemote_SF(VecScatter vscat,PetscBool send,PetscInt *n,const PetscInt **starts,const PetscInt **indices,const PetscMPIInt **procs,PetscInt *bs)
245: {
247: if (starts) *starts = NULL;
248: if (indices) *indices = NULL;
249: if (procs) *procs = NULL;
250: return(0);
251: }
253: static PetscErrorCode VecScatterRestoreRemoteOrdered_SF(VecScatter vscat,PetscBool send,PetscInt *n,const PetscInt **starts,const PetscInt **indices,const PetscMPIInt **procs,PetscInt *bs)
254: {
257: VecScatterRestoreRemote_SF(vscat,send,n,starts,indices,procs,bs);
258: return(0);
259: }
261: typedef enum {IS_INVALID, IS_GENERAL, IS_BLOCK, IS_STRIDE} ISTypeID;
263: PETSC_STATIC_INLINE PetscErrorCode ISGetTypeID_Private(IS is,ISTypeID *id)
264: {
266: PetscBool same;
269: *id = IS_INVALID;
270: PetscObjectTypeCompare((PetscObject)is,ISGENERAL,&same);
271: if (same) {*id = IS_GENERAL; goto functionend;}
272: PetscObjectTypeCompare((PetscObject)is,ISBLOCK,&same);
273: if (same) {*id = IS_BLOCK; goto functionend;}
274: PetscObjectTypeCompare((PetscObject)is,ISSTRIDE,&same);
275: if (same) {*id = IS_STRIDE; goto functionend;}
276: functionend:
277: return(0);
278: }
280: static PetscErrorCode VecScatterSetUp_SF(VecScatter vscat)
281: {
282: VecScatter_SF *data;
283: MPI_Comm comm,xcomm,ycomm,bigcomm;
284: Vec x=vscat->from_v,y=vscat->to_v,xx,yy;
285: IS ix=vscat->from_is,iy=vscat->to_is,ixx,iyy;
286: PetscMPIInt size,xcommsize,ycommsize,myrank;
287: PetscInt i,j,n,N,nroots,nleaves,inedges=0,*leafdata,*rootdata,*ilocal,*lilocal,xstart,ystart,lnleaves,ixsize,iysize,xlen,ylen;
288: const PetscInt *xindices,*yindices,*degree;
289: PetscSFNode *iremote,*liremote;
290: PetscLayout xlayout,ylayout;
291: PetscSF tmpsf;
292: ISTypeID ixid,iyid;
293: PetscInt bs,bsx,bsy,min=PETSC_MIN_INT,max=PETSC_MAX_INT,ixfirst,ixstep,iyfirst,iystep;
294: PetscBool can_do_block_opt=PETSC_FALSE;
298: PetscNewLog(vscat,&data);
300: /* Let P and S stand for parallel and sequential vectors respectively, there are four combinations of vecscatters: PtoP, PtoS, StoP and StoS.
301: The assumption of VecScatterCreate(Vec x,IS ix,Vec y,IS iy,VecScatter *newctx) is: if x is parallel, then ix contains global
302: indices of x. If x is sequential, ix contains local indices of x. Similarily for y and iy.
304: SF builds around concepts of local leaves and remote roots, which correspond to an StoP scatter. We transform PtoP and PtoS to StoP, and
305: treat StoS as a trivial StoP.
306: */
307: PetscObjectGetComm((PetscObject)x,&xcomm);
308: PetscObjectGetComm((PetscObject)y,&ycomm);
309: MPI_Comm_size(xcomm,&xcommsize);
310: MPI_Comm_size(ycomm,&ycommsize);
312: /* NULL ix or iy in VecScatterCreate(x,ix,y,iy,newctx) has special meaning. Recover them for these cases */
313: if (!ix) {
314: if (xcommsize > 1 && ycommsize == 1) { /* PtoS: null ix means the whole x will be scattered to each seq y */
315: VecGetSize(x,&N);
316: ISCreateStride(PETSC_COMM_SELF,N,0,1,&ix);
317: } else { /* PtoP, StoP or StoS: null ix means the whole local part of x will be scattered */
318: VecGetLocalSize(x,&n);
319: VecGetOwnershipRange(x,&xstart,NULL);
320: ISCreateStride(PETSC_COMM_SELF,n,xstart,1,&ix);
321: }
322: }
324: if (!iy) {
325: if (xcommsize == 1 && ycommsize > 1) { /* StoP: null iy means the whole y will be scattered to from each seq x */
326: VecGetSize(y,&N);
327: ISCreateStride(PETSC_COMM_SELF,N,0,1,&iy);
328: } else { /* PtoP, StoP or StoS: null iy means the whole local part of y will be scattered to */
329: VecGetLocalSize(y,&n);
330: VecGetOwnershipRange(y,&ystart,NULL);
331: ISCreateStride(PETSC_COMM_SELF,n,ystart,1,&iy);
332: }
333: }
335: /* Do error checking immediately after we have non-empty ix, iy */
336: ISGetLocalSize(ix,&ixsize);
337: ISGetLocalSize(iy,&iysize);
338: VecGetSize(x,&xlen);
339: VecGetSize(y,&ylen);
340: if (ixsize != iysize) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Scatter sizes of ix and iy don't match locally");
341: ISGetMinMax(ix,&min,&max);
342: if (min < 0 || max >= xlen) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Scatter indices in ix are out of range");
343: ISGetMinMax(iy,&min,&max);
344: if (min < 0 || max >= ylen) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Scatter indices in iy are out of range");
346: /* Do block optimization by taking advantage of high level info available in ix, iy.
347: The block optimization is valid when all of the following conditions are met:
348: 1) ix, iy are blocked or can be blocked (i.e., strided with step=1);
349: 2) ix, iy have the same block size;
350: 3) all processors agree on one block size;
351: 4) no blocks span more than one process;
352: */
353: data->bs = 1; /* default, no blocking */
354: data->unit = MPIU_SCALAR;
355: ISGetTypeID_Private(ix,&ixid);
356: ISGetTypeID_Private(iy,&iyid);
357: bigcomm = (ycommsize == 1) ? xcomm : ycomm;
359: if (ixid == IS_BLOCK) {ISGetBlockSize(ix,&bsx);}
360: else if (ixid == IS_STRIDE) {ISStrideGetInfo(ix,&ixfirst,&ixstep);}
362: if ( iyid == IS_BLOCK) {ISGetBlockSize(iy,&bsy);}
363: else if (iyid == IS_STRIDE) {ISStrideGetInfo(iy,&iyfirst,&iystep);}
365: /* Processors could go through different path in this if-else test */
366: if (ixid == IS_BLOCK && iyid == IS_BLOCK) {
367: min = PetscMin(bsx,bsy);
368: max = PetscMax(bsx,bsy);
369: } else if (ixid == IS_BLOCK && iyid == IS_STRIDE && iystep==1 && iyfirst%bsx==0) {
370: min = max = bsx;
371: } else if (ixid == IS_STRIDE && iyid == IS_BLOCK && ixstep==1 && ixfirst%bsy==0) {
372: min = max = bsy;
373: }
374: MPIU_Allreduce(MPI_IN_PLACE,&min,1,MPIU_INT,MPI_MIN,bigcomm);
375: MPIU_Allreduce(MPI_IN_PLACE,&max,1,MPIU_INT,MPI_MAX,bigcomm);
377: /* Since we used allreduce above, all ranks will have the same min and max. min==max
378: implies all ranks have the same bs. Do further test to see if local vectors are dividable
379: by bs on ALL ranks. If they are, we are ensured that no blocks span more than one processor.
380: */
381: if (min == max && min > 1) {
382: PetscInt m[2];
383: VecGetLocalSize(x,&xlen);
384: VecGetLocalSize(y,&ylen);
385: m[0] = xlen%min;
386: m[1] = ylen%min;
387: MPIU_Allreduce(MPI_IN_PLACE,m,2,MPIU_INT,MPI_LOR,bigcomm);
388: if (!m[0] && !m[1]) can_do_block_opt = PETSC_TRUE;
389: }
391: /* If can_do_block_opt, then shrink x, y, ix and iy by bs to get xx, yy, ixx and iyy, whose indices
392: and layout are actually used in building SF. Suppose blocked ix representing {0,1,2,6,7,8} has
393: indices {0,2} and bs=3, then ixx = {0,2}; suppose strided iy={3,4,5,6,7,8}, then iyy={1,2}.
395: yy is a little special. If y is seq, then yy is the concatenation of seq y's on xcomm. In this way,
396: we can treat PtoP and PtoS uniformly as PtoP.
397: */
398: if (can_do_block_opt) {
399: const PetscInt *indices;
401: data->bs = bs = min;
402: MPI_Type_contiguous(bs,MPIU_SCALAR,&data->unit);
403: MPI_Type_commit(&data->unit);
405: /* Shrink x and ix */
406: VecCreateMPIWithArray(xcomm,1,xlen/bs,PETSC_DECIDE,NULL,&xx); /* We only care xx's layout */
407: if (ixid == IS_BLOCK) {
408: ISBlockGetIndices(ix,&indices);
409: ISBlockGetLocalSize(ix,&ixsize);
410: ISCreateGeneral(PETSC_COMM_SELF,ixsize,indices,PETSC_COPY_VALUES,&ixx);
411: ISBlockRestoreIndices(ix,&indices);
412: } else if (ixid == IS_STRIDE) {
413: ISGetLocalSize(ix,&ixsize);
414: ISCreateStride(PETSC_COMM_SELF,ixsize/bs,ixfirst/bs,1,&ixx);
415: }
417: /* Shrink y and iy */
418: VecCreateMPIWithArray(bigcomm,1,ylen/bs,PETSC_DECIDE,NULL,&yy);
419: if (iyid == IS_BLOCK) {
420: ISBlockGetIndices(iy,&indices);
421: ISBlockGetLocalSize(iy,&iysize);
422: ISCreateGeneral(PETSC_COMM_SELF,iysize,indices,PETSC_COPY_VALUES,&iyy);
423: ISBlockRestoreIndices(iy,&indices);
424: } else if (iyid == IS_STRIDE) {
425: ISGetLocalSize(iy,&iysize);
426: ISCreateStride(PETSC_COMM_SELF,iysize/bs,iyfirst/bs,1,&iyy);
427: }
428: } else {
429: ixx = ix;
430: iyy = iy;
431: xx = x;
432: if (ycommsize == 1) {VecCreateMPIWithArray(bigcomm,1,ylen,PETSC_DECIDE,NULL,&yy);} else yy = y;
433: }
435: /* Now it is ready to build SF with preprocessed (xx, yy) and (ixx, iyy) */
436: ISGetIndices(ixx,&xindices);
437: ISGetIndices(iyy,&yindices);
439: if (xcommsize > 1) {
440: /* PtoP or PtoS */
441: VecGetLayout(xx,&xlayout);
442: VecGetOwnershipRange(xx,&xstart,NULL);
443: VecGetLayout(yy,&ylayout);
444: VecGetOwnershipRange(yy,&ystart,NULL);
446: /* Each process has a set of global index pairs (i, j) to scatter xx[i] to yy[j]. We first shift (i, j) to owner process of i through a tmp SF */
447: VecGetLocalSize(xx,&nroots);
448: ISGetLocalSize(ixx,&nleaves);
449: PetscMalloc2(nleaves,&iremote,nleaves*2,&leafdata);
451: for (i=0; i<nleaves; i++) {
452: PetscLayoutFindOwnerIndex(xlayout,xindices[i],&iremote[i].rank,&iremote[i].index);
453: leafdata[2*i] = xindices[i];
454: leafdata[2*i+1] = (ycommsize > 1)? yindices[i] : yindices[i] + ystart;
455: }
457: PetscSFCreate(xcomm,&tmpsf);
458: PetscSFSetGraph(tmpsf,nroots,nleaves,NULL,PETSC_USE_POINTER,iremote,PETSC_USE_POINTER);
460: PetscSFComputeDegreeBegin(tmpsf,°ree);
461: PetscSFComputeDegreeEnd(tmpsf,°ree);
463: for (i=0; i<nroots; i++) inedges += degree[i];
464: PetscMalloc1(inedges*2,&rootdata);
465: PetscSFGatherBegin(tmpsf,MPIU_2INT,leafdata,rootdata);
466: PetscSFGatherEnd(tmpsf,MPIU_2INT,leafdata,rootdata);
468: PetscFree2(iremote,leafdata);
469: PetscSFDestroy(&tmpsf);
471: /* rootdata contains global index pairs (i, j). i's are owned by the current process, but j's can point to anywhere.
472: We convert i to local, and convert j to (rank, index). In the end, we get an StoP suitable for building SF.
473: */
474: nleaves = inedges;
475: VecGetLocalSize(yy,&nroots);
476: PetscMalloc1(nleaves,&ilocal);
477: PetscMalloc1(nleaves,&iremote);
479: for (i=0; i<inedges; i++) {
480: ilocal[i] = rootdata[2*i] - xstart; /* covert x's global index to local index */
481: PetscLayoutFindOwnerIndex(ylayout,rootdata[2*i+1],&iremote[i].rank,&iremote[i].index); /* convert y's global index to (rank, index) */
482: }
484: /* MUST build SF on yy's comm, which is not necessarily identical to xx's comm.
485: In SF's view, yy contains the roots (i.e., the remote) and iremote[].rank are ranks in yy's comm.
486: xx contains leaves, which are local and can be thought as part of PETSC_COMM_SELF. */
487: PetscSFCreate(PetscObjectComm((PetscObject)yy),&data->sf);
488: PetscSFSetGraph(data->sf,nroots,nleaves,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);
489: PetscFree(rootdata);
490: } else {
491: /* StoP or StoS */
492: VecGetLayout(yy,&ylayout);
493: ISGetLocalSize(ixx,&nleaves);
494: VecGetLocalSize(yy,&nroots);
495: PetscMalloc1(nleaves,&ilocal);
496: PetscMalloc1(nleaves,&iremote);
497: PetscMemcpy(ilocal,xindices,nleaves*sizeof(PetscInt));
498: for (i=0; i<nleaves; i++) {PetscLayoutFindOwnerIndex(ylayout,yindices[i],&iremote[i].rank,&iremote[i].index);}
499: PetscSFCreate(PetscObjectComm((PetscObject)yy),&data->sf);
500: PetscSFSetGraph(data->sf,nroots,nleaves,ilocal,PETSC_OWN_POINTER,iremote,PETSC_OWN_POINTER);
501: }
503: /* Free memory no longer needed */
504: ISRestoreIndices(ixx,&xindices);
505: ISRestoreIndices(iyy,&yindices);
506: if (can_do_block_opt) {
507: VecDestroy(&xx);
508: VecDestroy(&yy);
509: ISDestroy(&ixx);
510: ISDestroy(&iyy);
511: } else if (ycommsize == 1) {
512: VecDestroy(&yy);
513: }
514: if (!vscat->from_is) {ISDestroy(&ix);}
515: if (!vscat->to_is ) {ISDestroy(&iy);}
517: /* Create lsf, the local scatter. Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */
518: PetscObjectGetComm((PetscObject)data->sf,&comm);
519: MPI_Comm_size(comm,&size);
520: MPI_Comm_rank(comm,&myrank);
522: /* Find out local edges and build a local SF */
523: {
524: const PetscInt *ilocal;
525: const PetscSFNode *iremote;
526: PetscSFGetGraph(data->sf,&nroots,&nleaves,&ilocal,&iremote);
527: for (i=lnleaves=0; i<nleaves; i++) {if (iremote[i].rank == (PetscInt)myrank) lnleaves++;}
528: PetscMalloc1(lnleaves,&lilocal);
529: PetscMalloc1(lnleaves,&liremote);
531: for (i=j=0; i<nleaves; i++) {
532: if (iremote[i].rank == (PetscInt)myrank) {
533: lilocal[j] = ilocal? ilocal[i] : i; /* ilocal=NULL for contiguous storage */
534: liremote[j].rank = 0; /* rank in PETSC_COMM_SELF */
535: liremote[j].index = iremote[i].index;
536: j++;
537: }
538: }
539: PetscSFCreate(PETSC_COMM_SELF,&data->lsf);
540: PetscSFSetGraph(data->lsf,nroots,lnleaves,lilocal,PETSC_OWN_POINTER,liremote,PETSC_OWN_POINTER);
541: }
543: /* vecscatter uses eager setup */
544: PetscSFSetUp(data->sf);
545: PetscSFSetUp(data->lsf);
547: vscat->data = (void*)data;
548: vscat->ops->begin = VecScatterBegin_SF;
549: vscat->ops->end = VecScatterEnd_SF;
550: vscat->ops->remap = VecScatterRemap_SF;
551: vscat->ops->copy = VecScatterCopy_SF;
552: vscat->ops->destroy = VecScatterDestroy_SF;
553: vscat->ops->view = VecScatterView_SF;
554: vscat->ops->getremotecount = VecScatterGetRemoteCount_SF;
555: vscat->ops->getremote = VecScatterGetRemote_SF;
556: vscat->ops->getremoteordered = VecScatterGetRemoteOrdered_SF;
557: vscat->ops->restoreremote = VecScatterRestoreRemote_SF;
558: vscat->ops->restoreremoteordered = VecScatterRestoreRemoteOrdered_SF;
559: return(0);
560: }
562: PetscErrorCode VecScatterCreate_SF(VecScatter ctx)
563: {
567: ctx->ops->setup = VecScatterSetUp_SF;
568: PetscObjectChangeTypeName((PetscObject)ctx,VECSCATTERSF);
569: PetscInfo(ctx,"Using StarForest for vector scatter\n");
570: return(0);
571: }