Actual source code: vpscat.c
1: #define PETSCVEC_DLL
3: /*
4: Defines parallel vector scatters.
5: */
7: #include private/isimpl.h
8: #include private/vecimpl.h
9: #include ../src/vec/vec/impls/dvecimpl.h
10: #include ../src/vec/vec/impls/mpi/pvecimpl.h
14: PetscErrorCode VecScatterView_MPI(VecScatter ctx,PetscViewer viewer)
15: {
16: VecScatter_MPI_General *to=(VecScatter_MPI_General*)ctx->todata;
17: VecScatter_MPI_General *from=(VecScatter_MPI_General*)ctx->fromdata;
18: PetscErrorCode ierr;
19: PetscInt i;
20: PetscMPIInt rank;
21: PetscViewerFormat format;
22: PetscTruth iascii;
25: PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
26: if (iascii) {
27: MPI_Comm_rank(((PetscObject)ctx)->comm,&rank);
28: PetscViewerGetFormat(viewer,&format);
29: if (format == PETSC_VIEWER_ASCII_INFO) {
30: PetscInt nsend_max,nrecv_max,lensend_max,lenrecv_max,alldata,itmp;
32: MPI_Reduce(&to->n,&nsend_max,1,MPIU_INT,MPI_MAX,0,((PetscObject)ctx)->comm);
33: MPI_Reduce(&from->n,&nrecv_max,1,MPIU_INT,MPI_MAX,0,((PetscObject)ctx)->comm);
34: itmp = to->starts[to->n+1];
35: MPI_Reduce(&itmp,&lensend_max,1,MPIU_INT,MPI_MAX,0,((PetscObject)ctx)->comm);
36: itmp = from->starts[from->n+1];
37: MPI_Reduce(&itmp,&lenrecv_max,1,MPIU_INT,MPI_MAX,0,((PetscObject)ctx)->comm);
38: MPI_Reduce(&itmp,&alldata,1,MPIU_INT,MPI_SUM,0,((PetscObject)ctx)->comm);
40: PetscViewerASCIIPrintf(viewer,"VecScatter statistics\n");
41: PetscViewerASCIIPrintf(viewer," Maximum number sends %D\n",nsend_max);
42: PetscViewerASCIIPrintf(viewer," Maximum number receives %D\n",nrecv_max);
43: PetscViewerASCIIPrintf(viewer," Maximum data sent %D\n",(int)(lensend_max*to->bs*sizeof(PetscScalar)));
44: PetscViewerASCIIPrintf(viewer," Maximum data received %D\n",(int)(lenrecv_max*to->bs*sizeof(PetscScalar)));
45: PetscViewerASCIIPrintf(viewer," Total data sent %D\n",(int)(alldata*to->bs*sizeof(PetscScalar)));
47: } else {
48: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number sends = %D; Number to self = %D\n",rank,to->n,to->local.n);
49: if (to->n) {
50: for (i=0; i<to->n; i++){
51: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D length = %D to whom %D\n",rank,i,to->starts[i+1]-to->starts[i],to->procs[i]);
52: }
53: PetscViewerASCIISynchronizedPrintf(viewer,"Now the indices for all remote sends (in order by process sent to)\n");
54: for (i=0; i<to->starts[to->n]; i++){
55: PetscViewerASCIISynchronizedPrintf(viewer,"[%d]%D \n",rank,to->indices[i]);
56: }
57: }
59: PetscViewerASCIISynchronizedPrintf(viewer,"[%d]Number receives = %D; Number from self = %D\n",rank,from->n,from->local.n);
60: if (from->n) {
61: for (i=0; i<from->n; i++){
62: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D length %D from whom %D\n",rank,i,from->starts[i+1]-from->starts[i],from->procs[i]);
63: }
65: PetscViewerASCIISynchronizedPrintf(viewer,"Now the indices for all remote receives (in order by process received from)\n");
66: for (i=0; i<from->starts[from->n]; i++){
67: PetscViewerASCIISynchronizedPrintf(viewer,"[%d]%D \n",rank,from->indices[i]);
68: }
69: }
70: if (to->local.n) {
71: PetscViewerASCIISynchronizedPrintf(viewer,"[%d]Indices for local part of scatter\n",rank);
72: for (i=0; i<to->local.n; i++){
73: PetscViewerASCIISynchronizedPrintf(viewer,"[%d]From %D to %D \n",rank,from->local.vslots[i],to->local.vslots[i]);
74: }
75: }
77: PetscViewerFlush(viewer);
78: }
79: } else {
80: SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported for this scatter",((PetscObject)viewer)->type_name);
81: }
82: return(0);
83: }
85: /* -----------------------------------------------------------------------------------*/
86: /*
87: The next routine determines what part of the local part of the scatter is an
88: exact copy of values into their current location. We check this here and
89: then know that we need not perform that portion of the scatter when the vector is
90: scattering to itself with INSERT_VALUES.
92: This is currently not used but would speed up, for example DALocalToLocalBegin/End()
94: */
97: PetscErrorCode VecScatterLocalOptimize_Private(VecScatter scatter,VecScatter_Seq_General *to,VecScatter_Seq_General *from)
98: {
99: PetscInt n = to->n,n_nonmatching = 0,i,*to_slots = to->vslots,*from_slots = from->vslots;
101: PetscInt *nto_slots,*nfrom_slots,j = 0;
102:
104: for (i=0; i<n; i++) {
105: if (to_slots[i] != from_slots[i]) n_nonmatching++;
106: }
108: if (!n_nonmatching) {
109: to->nonmatching_computed = PETSC_TRUE;
110: to->n_nonmatching = from->n_nonmatching = 0;
111: PetscInfo1(scatter,"Reduced %D to 0\n", n);
112: } else if (n_nonmatching == n) {
113: to->nonmatching_computed = PETSC_FALSE;
114: PetscInfo(scatter,"All values non-matching\n");
115: } else {
116: to->nonmatching_computed= PETSC_TRUE;
117: to->n_nonmatching = from->n_nonmatching = n_nonmatching;
118: PetscMalloc(n_nonmatching*sizeof(PetscInt),&nto_slots);
119: PetscMalloc(n_nonmatching*sizeof(PetscInt),&nfrom_slots);
120: to->slots_nonmatching = nto_slots;
121: from->slots_nonmatching = nfrom_slots;
122: for (i=0; i<n; i++) {
123: if (to_slots[i] != from_slots[i]) {
124: nto_slots[j] = to_slots[i];
125: nfrom_slots[j] = from_slots[i];
126: j++;
127: }
128: }
129: PetscInfo2(scatter,"Reduced %D to %D\n",n,n_nonmatching);
130: }
131: return(0);
132: }
134: /* --------------------------------------------------------------------------------------*/
136: /* -------------------------------------------------------------------------------------*/
139: PetscErrorCode VecScatterDestroy_PtoP(VecScatter ctx)
140: {
141: VecScatter_MPI_General *to = (VecScatter_MPI_General*)ctx->todata;
142: VecScatter_MPI_General *from = (VecScatter_MPI_General*)ctx->fromdata;
143: PetscErrorCode ierr;
144: PetscInt i;
147: if (to->use_readyreceiver) {
148: /*
149: Since we have already posted sends we must cancel them before freeing
150: the requests
151: */
152: for (i=0; i<from->n; i++) {
153: MPI_Cancel(from->requests+i);
154: }
155: for (i=0; i<to->n; i++) {
156: MPI_Cancel(to->rev_requests+i);
157: }
158: MPI_Waitall(from->n,from->requests,to->rstatus);
159: MPI_Waitall(to->n,to->rev_requests,to->rstatus);
160: }
162: #if defined(PETSC_HAVE_MPI_ALLTOALLW) && !defined(PETSC_USE_64BIT_INDICES)
163: if (to->use_alltoallw) {
164: PetscFree3(to->wcounts,to->wdispls,to->types);
165: PetscFree3(from->wcounts,from->wdispls,from->types);
166: }
167: #endif
169: #if defined(PETSC_HAVE_MPI_WIN_CREATE)
170: if (to->use_window) {
171: MPI_Win_free(&from->window);
172: MPI_Win_free(&to->window);
173: PetscFree(from->winstarts);
174: PetscFree(to->winstarts);
175: }
176: #endif
178: if (to->use_alltoallv) {
179: PetscFree2(to->counts,to->displs);
180: PetscFree2(from->counts,from->displs);
181: }
183: /* release MPI resources obtained with MPI_Send_init() and MPI_Recv_init() */
184: /*
185: IBM's PE version of MPI has a bug where freeing these guys will screw up later
186: message passing.
187: */
188: #if !defined(PETSC_HAVE_BROKEN_REQUEST_FREE)
189: if (!to->use_alltoallv && !to->use_window) { /* currently the to->requests etc are ALWAYS allocated even if not used */
190: if (to->requests) {
191: for (i=0; i<to->n; i++) {
192: MPI_Request_free(to->requests + i);
193: }
194: }
195: if (to->rev_requests) {
196: for (i=0; i<to->n; i++) {
197: MPI_Request_free(to->rev_requests + i);
198: }
199: }
200: }
201: /*
202: MPICH could not properly cancel requests thus with ready receiver mode we
203: cannot free the requests. It may be fixed now, if not then put the following
204: code inside a if !to->use_readyreceiver) {
205: */
206: if (!to->use_alltoallv && !to->use_window) { /* currently the from->requests etc are ALWAYS allocated even if not used */
207: if (from->requests) {
208: for (i=0; i<from->n; i++) {
209: MPI_Request_free(from->requests + i);
210: }
211: }
213: if (from->rev_requests) {
214: for (i=0; i<from->n; i++) {
215: MPI_Request_free(from->rev_requests + i);
216: }
217: }
218: }
219: #endif
221: PetscFree(to->local.vslots);
222: PetscFree(from->local.vslots);
223: PetscFree2(to->counts,to->displs);
224: PetscFree2(from->counts,from->displs);
225: PetscFree(to->local.slots_nonmatching);
226: PetscFree(from->local.slots_nonmatching);
227: PetscFree(to->rev_requests);
228: PetscFree(from->rev_requests);
229: PetscFree(to->requests);
230: PetscFree(from->requests);
231: PetscFree4(to->values,to->indices,to->starts,to->procs);
232: PetscFree2(to->sstatus,to->rstatus);
233: PetscFree4(from->values,from->indices,from->starts,from->procs);
234: PetscFree(from);
235: PetscFree(to);
236: PetscHeaderDestroy(ctx);
237: return(0);
238: }
242: /* --------------------------------------------------------------------------------------*/
243: /*
244: Special optimization to see if the local part of the scatter is actually
245: a copy. The scatter routines call PetscMemcpy() instead.
246:
247: */
250: PetscErrorCode VecScatterLocalOptimizeCopy_Private(VecScatter scatter,VecScatter_Seq_General *to,VecScatter_Seq_General *from,PetscInt bs)
251: {
252: PetscInt n = to->n,i,*to_slots = to->vslots,*from_slots = from->vslots;
253: PetscInt to_start,from_start;
257: to_start = to_slots[0];
258: from_start = from_slots[0];
260: for (i=1; i<n; i++) {
261: to_start += bs;
262: from_start += bs;
263: if (to_slots[i] != to_start) return(0);
264: if (from_slots[i] != from_start) return(0);
265: }
266: to->is_copy = PETSC_TRUE;
267: to->copy_start = to_slots[0];
268: to->copy_length = bs*sizeof(PetscScalar)*n;
269: from->is_copy = PETSC_TRUE;
270: from->copy_start = from_slots[0];
271: from->copy_length = bs*sizeof(PetscScalar)*n;
272: PetscInfo(scatter,"Local scatter is a copy, optimizing for it\n");
273: return(0);
274: }
276: /* --------------------------------------------------------------------------------------*/
280: PetscErrorCode VecScatterCopy_PtoP_X(VecScatter in,VecScatter out)
281: {
282: VecScatter_MPI_General *in_to = (VecScatter_MPI_General*)in->todata;
283: VecScatter_MPI_General *in_from = (VecScatter_MPI_General*)in->fromdata,*out_to,*out_from;
284: PetscErrorCode ierr;
285: PetscInt ny,bs = in_from->bs;
288: out->begin = in->begin;
289: out->end = in->end;
290: out->copy = in->copy;
291: out->destroy = in->destroy;
292: out->view = in->view;
294: /* allocate entire send scatter context */
295: PetscNewLog(out,VecScatter_MPI_General,&out_to);
296: PetscNewLog(out,VecScatter_MPI_General,&out_from);
298: ny = in_to->starts[in_to->n];
299: out_to->n = in_to->n;
300: out_to->type = in_to->type;
301: out_to->sendfirst = in_to->sendfirst;
303: PetscMalloc(out_to->n*sizeof(MPI_Request),&out_to->requests);
304: PetscMalloc4(bs*ny,PetscScalar,&out_to->values,ny,PetscInt,&out_to->indices,out_to->n+1,PetscInt,&out_to->starts,out_to->n,PetscMPIInt,&out_to->procs);
305: PetscMalloc2(PetscMax(in_to->n,in_from->n),MPI_Status,&out_to->sstatus,PetscMax(in_to->n,in_from->n),MPI_Status,&out_to->rstatus);
306: PetscMemcpy(out_to->indices,in_to->indices,ny*sizeof(PetscInt));
307: PetscMemcpy(out_to->starts,in_to->starts,(out_to->n+1)*sizeof(PetscInt));
308: PetscMemcpy(out_to->procs,in_to->procs,(out_to->n)*sizeof(PetscMPIInt));
309:
310: out->todata = (void*)out_to;
311: out_to->local.n = in_to->local.n;
312: out_to->local.nonmatching_computed = PETSC_FALSE;
313: out_to->local.n_nonmatching = 0;
314: out_to->local.slots_nonmatching = 0;
315: if (in_to->local.n) {
316: PetscMalloc(in_to->local.n*sizeof(PetscInt),&out_to->local.vslots);
317: PetscMalloc(in_from->local.n*sizeof(PetscInt),&out_from->local.vslots);
318: PetscMemcpy(out_to->local.vslots,in_to->local.vslots,in_to->local.n*sizeof(PetscInt));
319: PetscMemcpy(out_from->local.vslots,in_from->local.vslots,in_from->local.n*sizeof(PetscInt));
320: } else {
321: out_to->local.vslots = 0;
322: out_from->local.vslots = 0;
323: }
325: /* allocate entire receive context */
326: out_from->type = in_from->type;
327: ny = in_from->starts[in_from->n];
328: out_from->n = in_from->n;
329: out_from->sendfirst = in_from->sendfirst;
331: PetscMalloc(out_from->n*sizeof(MPI_Request),&out_from->requests);
332: PetscMalloc4(ny*bs,PetscScalar,&out_from->values,ny,PetscInt,&out_from->indices,out_from->n+1,PetscInt,&out_from->starts,out_from->n,PetscMPIInt,&out_from->procs);
333: PetscMemcpy(out_from->indices,in_from->indices,ny*sizeof(PetscInt));
334: PetscMemcpy(out_from->starts,in_from->starts,(out_from->n+1)*sizeof(PetscInt));
335: PetscMemcpy(out_from->procs,in_from->procs,(out_from->n)*sizeof(PetscMPIInt));
336: out->fromdata = (void*)out_from;
337: out_from->local.n = in_from->local.n;
338: out_from->local.nonmatching_computed = PETSC_FALSE;
339: out_from->local.n_nonmatching = 0;
340: out_from->local.slots_nonmatching = 0;
342: /*
343: set up the request arrays for use with isend_init() and irecv_init()
344: */
345: {
346: PetscMPIInt tag;
347: MPI_Comm comm;
348: PetscInt *sstarts = out_to->starts, *rstarts = out_from->starts;
349: PetscMPIInt *sprocs = out_to->procs, *rprocs = out_from->procs;
350: PetscInt i;
351: PetscTruth flg;
352: MPI_Request *swaits = out_to->requests,*rwaits = out_from->requests;
353: MPI_Request *rev_swaits,*rev_rwaits;
354: PetscScalar *Ssvalues = out_to->values, *Srvalues = out_from->values;
356: PetscMalloc(in_to->n*sizeof(MPI_Request),&out_to->rev_requests);
357: PetscMalloc(in_from->n*sizeof(MPI_Request),&out_from->rev_requests);
359: rev_rwaits = out_to->rev_requests;
360: rev_swaits = out_from->rev_requests;
362: out_from->bs = out_to->bs = bs;
363: tag = ((PetscObject)out)->tag;
364: comm = ((PetscObject)out)->comm;
366: /* Register the receives that you will use later (sends for scatter reverse) */
367: for (i=0; i<out_from->n; i++) {
368: MPI_Recv_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);
369: MPI_Send_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tag,comm,rev_swaits+i);
370: }
372: flg = PETSC_FALSE;
373: PetscOptionsGetTruth(PETSC_NULL,"-vecscatter_rsend",&flg,PETSC_NULL);
374: if (flg) {
375: out_to->use_readyreceiver = PETSC_TRUE;
376: out_from->use_readyreceiver = PETSC_TRUE;
377: for (i=0; i<out_to->n; i++) {
378: MPI_Rsend_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
379: }
380: if (out_from->n) {MPI_Startall_irecv(out_from->starts[out_from->n]*out_from->bs,out_from->n,out_from->requests);}
381: MPI_Barrier(comm);
382: PetscInfo(in,"Using VecScatter ready receiver mode\n");
383: } else {
384: out_to->use_readyreceiver = PETSC_FALSE;
385: out_from->use_readyreceiver = PETSC_FALSE;
386: flg = PETSC_FALSE;
387: PetscOptionsGetTruth(PETSC_NULL,"-vecscatter_ssend",&flg,PETSC_NULL);
388: if (flg) {
389: PetscInfo(in,"Using VecScatter Ssend mode\n");
390: }
391: for (i=0; i<out_to->n; i++) {
392: if (!flg) {
393: MPI_Send_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
394: } else {
395: MPI_Ssend_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
396: }
397: }
398: }
399: /* Register receives for scatter reverse */
400: for (i=0; i<out_to->n; i++) {
401: MPI_Recv_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,rev_rwaits+i);
402: }
403: }
405: return(0);
406: }
410: PetscErrorCode VecScatterCopy_PtoP_AllToAll(VecScatter in,VecScatter out)
411: {
412: VecScatter_MPI_General *in_to = (VecScatter_MPI_General*)in->todata;
413: VecScatter_MPI_General *in_from = (VecScatter_MPI_General*)in->fromdata,*out_to,*out_from;
414: PetscErrorCode ierr;
415: PetscInt ny,bs = in_from->bs;
416: PetscMPIInt size;
419: MPI_Comm_size(((PetscObject)in)->comm,&size);
420: out->begin = in->begin;
421: out->end = in->end;
422: out->copy = in->copy;
423: out->destroy = in->destroy;
424: out->view = in->view;
426: /* allocate entire send scatter context */
427: PetscNewLog(out,VecScatter_MPI_General,&out_to);
428: PetscNewLog(out,VecScatter_MPI_General,&out_from);
430: ny = in_to->starts[in_to->n];
431: out_to->n = in_to->n;
432: out_to->type = in_to->type;
433: out_to->sendfirst = in_to->sendfirst;
435: PetscMalloc(out_to->n*sizeof(MPI_Request),&out_to->requests);
436: PetscMalloc4(bs*ny,PetscScalar,&out_to->values,ny,PetscInt,&out_to->indices,out_to->n+1,PetscInt,&out_to->starts,out_to->n,PetscMPIInt,&out_to->procs);
437: PetscMalloc2(PetscMax(in_to->n,in_from->n),MPI_Status,&out_to->sstatus,PetscMax(in_to->n,in_from->n),MPI_Status,&out_to->rstatus);
438: PetscMemcpy(out_to->indices,in_to->indices,ny*sizeof(PetscInt));
439: PetscMemcpy(out_to->starts,in_to->starts,(out_to->n+1)*sizeof(PetscInt));
440: PetscMemcpy(out_to->procs,in_to->procs,(out_to->n)*sizeof(PetscMPIInt));
441:
442: out->todata = (void*)out_to;
443: out_to->local.n = in_to->local.n;
444: out_to->local.nonmatching_computed = PETSC_FALSE;
445: out_to->local.n_nonmatching = 0;
446: out_to->local.slots_nonmatching = 0;
447: if (in_to->local.n) {
448: PetscMalloc(in_to->local.n*sizeof(PetscInt),&out_to->local.vslots);
449: PetscMalloc(in_from->local.n*sizeof(PetscInt),&out_from->local.vslots);
450: PetscMemcpy(out_to->local.vslots,in_to->local.vslots,in_to->local.n*sizeof(PetscInt));
451: PetscMemcpy(out_from->local.vslots,in_from->local.vslots,in_from->local.n*sizeof(PetscInt));
452: } else {
453: out_to->local.vslots = 0;
454: out_from->local.vslots = 0;
455: }
457: /* allocate entire receive context */
458: out_from->type = in_from->type;
459: ny = in_from->starts[in_from->n];
460: out_from->n = in_from->n;
461: out_from->sendfirst = in_from->sendfirst;
463: PetscMalloc(out_from->n*sizeof(MPI_Request),&out_from->requests);
464: PetscMalloc4(ny*bs,PetscScalar,&out_from->values,ny,PetscInt,&out_from->indices,out_from->n+1,PetscInt,&out_from->starts,out_from->n,PetscMPIInt,&out_from->procs);
465: PetscMemcpy(out_from->indices,in_from->indices,ny*sizeof(PetscInt));
466: PetscMemcpy(out_from->starts,in_from->starts,(out_from->n+1)*sizeof(PetscInt));
467: PetscMemcpy(out_from->procs,in_from->procs,(out_from->n)*sizeof(PetscMPIInt));
468: out->fromdata = (void*)out_from;
469: out_from->local.n = in_from->local.n;
470: out_from->local.nonmatching_computed = PETSC_FALSE;
471: out_from->local.n_nonmatching = 0;
472: out_from->local.slots_nonmatching = 0;
474: out_to->use_alltoallv = out_from->use_alltoallv = PETSC_TRUE;
476: PetscMalloc2(size,PetscMPIInt,&out_to->counts,size,PetscMPIInt,&out_to->displs);
477: PetscMemcpy(out_to->counts,in_to->counts,size*sizeof(PetscMPIInt));
478: PetscMemcpy(out_to->displs,in_to->displs,size*sizeof(PetscMPIInt));
480: PetscMalloc2(size,PetscMPIInt,&out_from->counts,size,PetscMPIInt,&out_from->displs);
481: PetscMemcpy(out_from->counts,in_from->counts,size*sizeof(PetscMPIInt));
482: PetscMemcpy(out_from->displs,in_from->displs,size*sizeof(PetscMPIInt));
483: return(0);
484: }
485: /* --------------------------------------------------------------------------------------------------
486: Packs and unpacks the message data into send or from receive buffers.
488: These could be generated automatically.
490: Fortran kernels etc. could be used.
491: */
492: PETSC_STATIC_INLINE void Pack_1(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y)
493: {
494: PetscInt i;
495: for (i=0; i<n; i++) {
496: y[i] = x[indicesx[i]];
497: }
498: }
499: PETSC_STATIC_INLINE void UnPack_1(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
500: {
501: PetscInt i;
502: switch (addv) {
503: case INSERT_VALUES:
504: for (i=0; i<n; i++) {
505: y[indicesy[i]] = x[i];
506: }
507: break;
508: case ADD_VALUES:
509: for (i=0; i<n; i++) {
510: y[indicesy[i]] += x[i];
511: }
512: break;
513: #if !defined(PETSC_USE_COMPLEX)
514: case MAX_VALUES:
515: for (i=0; i<n; i++) {
516: y[indicesy[i]] = PetscMax(y[indicesy[i]],x[i]);
517: }
518: #else
519: case MAX_VALUES:
520: #endif
521: case NOT_SET_VALUES:
522: break;
523: }
524: }
526: PETSC_STATIC_INLINE void Scatter_1(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
527: {
528: PetscInt i;
529: switch (addv) {
530: case INSERT_VALUES:
531: for (i=0; i<n; i++) {
532: y[indicesy[i]] = x[indicesx[i]];
533: }
534: break;
535: case ADD_VALUES:
536: for (i=0; i<n; i++) {
537: y[indicesy[i]] += x[indicesx[i]];
538: }
539: break;
540: #if !defined(PETSC_USE_COMPLEX)
541: case MAX_VALUES:
542: for (i=0; i<n; i++) {
543: y[indicesy[i]] = PetscMax(y[indicesy[i]],x[indicesx[i]]);
544: }
545: #else
546: case MAX_VALUES:
547: #endif
548: case NOT_SET_VALUES:
549: break;
550: }
551: }
553: /* ----------------------------------------------------------------------------------------------- */
554: PETSC_STATIC_INLINE void Pack_2(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y)
555: {
556: PetscInt i,idx;
558: for (i=0; i<n; i++) {
559: idx = *indicesx++;
560: y[0] = x[idx];
561: y[1] = x[idx+1];
562: y += 2;
563: }
564: }
565: PETSC_STATIC_INLINE void UnPack_2(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
566: {
567: PetscInt i,idy;
569: switch (addv) {
570: case INSERT_VALUES:
571: for (i=0; i<n; i++) {
572: idy = *indicesy++;
573: y[idy] = x[0];
574: y[idy+1] = x[1];
575: x += 2;
576: }
577: break;
578: case ADD_VALUES:
579: for (i=0; i<n; i++) {
580: idy = *indicesy++;
581: y[idy] += x[0];
582: y[idy+1] += x[1];
583: x += 2;
584: }
585: break;
586: #if !defined(PETSC_USE_COMPLEX)
587: case MAX_VALUES:
588: for (i=0; i<n; i++) {
589: idy = *indicesy++;
590: y[idy] = PetscMax(y[idy],x[0]);
591: y[idy+1] = PetscMax(y[idy+1],x[1]);
592: x += 2;
593: }
594: #else
595: case MAX_VALUES:
596: #endif
597: case NOT_SET_VALUES:
598: break;
599: }
600: }
602: PETSC_STATIC_INLINE void Scatter_2(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
603: {
604: PetscInt i,idx,idy;
606: switch (addv) {
607: case INSERT_VALUES:
608: for (i=0; i<n; i++) {
609: idx = *indicesx++;
610: idy = *indicesy++;
611: y[idy] = x[idx];
612: y[idy+1] = x[idx+1];
613: }
614: break;
615: case ADD_VALUES:
616: for (i=0; i<n; i++) {
617: idx = *indicesx++;
618: idy = *indicesy++;
619: y[idy] += x[idx];
620: y[idy+1] += x[idx+1];
621: }
622: break;
623: #if !defined(PETSC_USE_COMPLEX)
624: case MAX_VALUES:
625: for (i=0; i<n; i++) {
626: idx = *indicesx++;
627: idy = *indicesy++;
628: y[idy] = PetscMax(y[idy],x[idx]);
629: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
630: }
631: #else
632: case MAX_VALUES:
633: #endif
634: case NOT_SET_VALUES:
635: break;
636: }
637: }
638: /* ----------------------------------------------------------------------------------------------- */
639: PETSC_STATIC_INLINE void Pack_3(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y)
640: {
641: PetscInt i,idx;
643: for (i=0; i<n; i++) {
644: idx = *indicesx++;
645: y[0] = x[idx];
646: y[1] = x[idx+1];
647: y[2] = x[idx+2];
648: y += 3;
649: }
650: }
651: PETSC_STATIC_INLINE void UnPack_3(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
652: {
653: PetscInt i,idy;
655: switch (addv) {
656: case INSERT_VALUES:
657: for (i=0; i<n; i++) {
658: idy = *indicesy++;
659: y[idy] = x[0];
660: y[idy+1] = x[1];
661: y[idy+2] = x[2];
662: x += 3;
663: }
664: break;
665: case ADD_VALUES:
666: for (i=0; i<n; i++) {
667: idy = *indicesy++;
668: y[idy] += x[0];
669: y[idy+1] += x[1];
670: y[idy+2] += x[2];
671: x += 3;
672: }
673: break;
674: #if !defined(PETSC_USE_COMPLEX)
675: case MAX_VALUES:
676: for (i=0; i<n; i++) {
677: idy = *indicesy++;
678: y[idy] = PetscMax(y[idy],x[0]);
679: y[idy+1] = PetscMax(y[idy+1],x[1]);
680: y[idy+2] = PetscMax(y[idy+2],x[2]);
681: x += 3;
682: }
683: #else
684: case MAX_VALUES:
685: #endif
686: case NOT_SET_VALUES:
687: break;
688: }
689: }
691: PETSC_STATIC_INLINE void Scatter_3(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
692: {
693: PetscInt i,idx,idy;
695: switch (addv) {
696: case INSERT_VALUES:
697: for (i=0; i<n; i++) {
698: idx = *indicesx++;
699: idy = *indicesy++;
700: y[idy] = x[idx];
701: y[idy+1] = x[idx+1];
702: y[idy+2] = x[idx+2];
703: }
704: break;
705: case ADD_VALUES:
706: for (i=0; i<n; i++) {
707: idx = *indicesx++;
708: idy = *indicesy++;
709: y[idy] += x[idx];
710: y[idy+1] += x[idx+1];
711: y[idy+2] += x[idx+2];
712: }
713: break;
714: #if !defined(PETSC_USE_COMPLEX)
715: case MAX_VALUES:
716: for (i=0; i<n; i++) {
717: idx = *indicesx++;
718: idy = *indicesy++;
719: y[idy] = PetscMax(y[idy],x[idx]);
720: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
721: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
722: }
723: #else
724: case MAX_VALUES:
725: #endif
726: case NOT_SET_VALUES:
727: break;
728: }
729: }
730: /* ----------------------------------------------------------------------------------------------- */
731: PETSC_STATIC_INLINE void Pack_4(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y)
732: {
733: PetscInt i,idx;
735: for (i=0; i<n; i++) {
736: idx = *indicesx++;
737: y[0] = x[idx];
738: y[1] = x[idx+1];
739: y[2] = x[idx+2];
740: y[3] = x[idx+3];
741: y += 4;
742: }
743: }
744: PETSC_STATIC_INLINE void UnPack_4(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
745: {
746: PetscInt i,idy;
748: switch (addv) {
749: case INSERT_VALUES:
750: for (i=0; i<n; i++) {
751: idy = *indicesy++;
752: y[idy] = x[0];
753: y[idy+1] = x[1];
754: y[idy+2] = x[2];
755: y[idy+3] = x[3];
756: x += 4;
757: }
758: break;
759: case ADD_VALUES:
760: for (i=0; i<n; i++) {
761: idy = *indicesy++;
762: y[idy] += x[0];
763: y[idy+1] += x[1];
764: y[idy+2] += x[2];
765: y[idy+3] += x[3];
766: x += 4;
767: }
768: break;
769: #if !defined(PETSC_USE_COMPLEX)
770: case MAX_VALUES:
771: for (i=0; i<n; i++) {
772: idy = *indicesy++;
773: y[idy] = PetscMax(y[idy],x[0]);
774: y[idy+1] = PetscMax(y[idy+1],x[1]);
775: y[idy+2] = PetscMax(y[idy+2],x[2]);
776: y[idy+3] = PetscMax(y[idy+3],x[3]);
777: x += 4;
778: }
779: #else
780: case MAX_VALUES:
781: #endif
782: case NOT_SET_VALUES:
783: break;
784: }
785: }
787: PETSC_STATIC_INLINE void Scatter_4(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
788: {
789: PetscInt i,idx,idy;
791: switch (addv) {
792: case INSERT_VALUES:
793: for (i=0; i<n; i++) {
794: idx = *indicesx++;
795: idy = *indicesy++;
796: y[idy] = x[idx];
797: y[idy+1] = x[idx+1];
798: y[idy+2] = x[idx+2];
799: y[idy+3] = x[idx+3];
800: }
801: break;
802: case ADD_VALUES:
803: for (i=0; i<n; i++) {
804: idx = *indicesx++;
805: idy = *indicesy++;
806: y[idy] += x[idx];
807: y[idy+1] += x[idx+1];
808: y[idy+2] += x[idx+2];
809: y[idy+3] += x[idx+3];
810: }
811: break;
812: #if !defined(PETSC_USE_COMPLEX)
813: case MAX_VALUES:
814: for (i=0; i<n; i++) {
815: idx = *indicesx++;
816: idy = *indicesy++;
817: y[idy] = PetscMax(y[idy],x[idx]);
818: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
819: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
820: y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
821: }
822: #else
823: case MAX_VALUES:
824: #endif
825: case NOT_SET_VALUES:
826: break;
827: }
828: }
829: /* ----------------------------------------------------------------------------------------------- */
830: PETSC_STATIC_INLINE void Pack_5(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y)
831: {
832: PetscInt i,idx;
834: for (i=0; i<n; i++) {
835: idx = *indicesx++;
836: y[0] = x[idx];
837: y[1] = x[idx+1];
838: y[2] = x[idx+2];
839: y[3] = x[idx+3];
840: y[4] = x[idx+4];
841: y += 5;
842: }
843: }
844: PETSC_STATIC_INLINE void UnPack_5(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
845: {
846: PetscInt i,idy;
848: switch (addv) {
849: case INSERT_VALUES:
850: for (i=0; i<n; i++) {
851: idy = *indicesy++;
852: y[idy] = x[0];
853: y[idy+1] = x[1];
854: y[idy+2] = x[2];
855: y[idy+3] = x[3];
856: y[idy+4] = x[4];
857: x += 5;
858: }
859: break;
860: case ADD_VALUES:
861: for (i=0; i<n; i++) {
862: idy = *indicesy++;
863: y[idy] += x[0];
864: y[idy+1] += x[1];
865: y[idy+2] += x[2];
866: y[idy+3] += x[3];
867: y[idy+4] += x[4];
868: x += 5;
869: }
870: break;
871: #if !defined(PETSC_USE_COMPLEX)
872: case MAX_VALUES:
873: for (i=0; i<n; i++) {
874: idy = *indicesy++;
875: y[idy] = PetscMax(y[idy],x[0]);
876: y[idy+1] = PetscMax(y[idy+1],x[1]);
877: y[idy+2] = PetscMax(y[idy+2],x[2]);
878: y[idy+3] = PetscMax(y[idy+3],x[3]);
879: y[idy+4] = PetscMax(y[idy+4],x[4]);
880: x += 5;
881: }
882: #else
883: case MAX_VALUES:
884: #endif
885: case NOT_SET_VALUES:
886: break;
887: }
888: }
890: PETSC_STATIC_INLINE void Scatter_5(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
891: {
892: PetscInt i,idx,idy;
894: switch (addv) {
895: case INSERT_VALUES:
896: for (i=0; i<n; i++) {
897: idx = *indicesx++;
898: idy = *indicesy++;
899: y[idy] = x[idx];
900: y[idy+1] = x[idx+1];
901: y[idy+2] = x[idx+2];
902: y[idy+3] = x[idx+3];
903: y[idy+4] = x[idx+4];
904: }
905: break;
906: case ADD_VALUES:
907: for (i=0; i<n; i++) {
908: idx = *indicesx++;
909: idy = *indicesy++;
910: y[idy] += x[idx];
911: y[idy+1] += x[idx+1];
912: y[idy+2] += x[idx+2];
913: y[idy+3] += x[idx+3];
914: y[idy+4] += x[idx+4];
915: }
916: break;
917: #if !defined(PETSC_USE_COMPLEX)
918: case MAX_VALUES:
919: for (i=0; i<n; i++) {
920: idx = *indicesx++;
921: idy = *indicesy++;
922: y[idy] = PetscMax(y[idy],x[idx]);
923: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
924: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
925: y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
926: y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
927: }
928: #else
929: case MAX_VALUES:
930: #endif
931: case NOT_SET_VALUES:
932: break;
933: }
934: }
935: /* ----------------------------------------------------------------------------------------------- */
936: PETSC_STATIC_INLINE void Pack_6(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y)
937: {
938: PetscInt i,idx;
940: for (i=0; i<n; i++) {
941: idx = *indicesx++;
942: y[0] = x[idx];
943: y[1] = x[idx+1];
944: y[2] = x[idx+2];
945: y[3] = x[idx+3];
946: y[4] = x[idx+4];
947: y[5] = x[idx+5];
948: y += 6;
949: }
950: }
951: PETSC_STATIC_INLINE void UnPack_6(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
952: {
953: PetscInt i,idy;
955: switch (addv) {
956: case INSERT_VALUES:
957: for (i=0; i<n; i++) {
958: idy = *indicesy++;
959: y[idy] = x[0];
960: y[idy+1] = x[1];
961: y[idy+2] = x[2];
962: y[idy+3] = x[3];
963: y[idy+4] = x[4];
964: y[idy+5] = x[5];
965: x += 6;
966: }
967: break;
968: case ADD_VALUES:
969: for (i=0; i<n; i++) {
970: idy = *indicesy++;
971: y[idy] += x[0];
972: y[idy+1] += x[1];
973: y[idy+2] += x[2];
974: y[idy+3] += x[3];
975: y[idy+4] += x[4];
976: y[idy+5] += x[5];
977: x += 6;
978: }
979: break;
980: #if !defined(PETSC_USE_COMPLEX)
981: case MAX_VALUES:
982: for (i=0; i<n; i++) {
983: idy = *indicesy++;
984: y[idy] = PetscMax(y[idy],x[0]);
985: y[idy+1] = PetscMax(y[idy+1],x[1]);
986: y[idy+2] = PetscMax(y[idy+2],x[2]);
987: y[idy+3] = PetscMax(y[idy+3],x[3]);
988: y[idy+4] = PetscMax(y[idy+4],x[4]);
989: y[idy+5] = PetscMax(y[idy+5],x[5]);
990: x += 6;
991: }
992: #else
993: case MAX_VALUES:
994: #endif
995: case NOT_SET_VALUES:
996: break;
997: }
998: }
1000: PETSC_STATIC_INLINE void Scatter_6(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
1001: {
1002: PetscInt i,idx,idy;
1004: switch (addv) {
1005: case INSERT_VALUES:
1006: for (i=0; i<n; i++) {
1007: idx = *indicesx++;
1008: idy = *indicesy++;
1009: y[idy] = x[idx];
1010: y[idy+1] = x[idx+1];
1011: y[idy+2] = x[idx+2];
1012: y[idy+3] = x[idx+3];
1013: y[idy+4] = x[idx+4];
1014: y[idy+5] = x[idx+5];
1015: }
1016: break;
1017: case ADD_VALUES:
1018: for (i=0; i<n; i++) {
1019: idx = *indicesx++;
1020: idy = *indicesy++;
1021: y[idy] += x[idx];
1022: y[idy+1] += x[idx+1];
1023: y[idy+2] += x[idx+2];
1024: y[idy+3] += x[idx+3];
1025: y[idy+4] += x[idx+4];
1026: y[idy+5] += x[idx+5];
1027: }
1028: break;
1029: #if !defined(PETSC_USE_COMPLEX)
1030: case MAX_VALUES:
1031: for (i=0; i<n; i++) {
1032: idx = *indicesx++;
1033: idy = *indicesy++;
1034: y[idy] = PetscMax(y[idy],x[idx]);
1035: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
1036: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
1037: y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
1038: y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1039: y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
1040: }
1041: #else
1042: case MAX_VALUES:
1043: #endif
1044: case NOT_SET_VALUES:
1045: break;
1046: }
1047: }
1048: /* ----------------------------------------------------------------------------------------------- */
1049: PETSC_STATIC_INLINE void Pack_7(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y)
1050: {
1051: PetscInt i,idx;
1053: for (i=0; i<n; i++) {
1054: idx = *indicesx++;
1055: y[0] = x[idx];
1056: y[1] = x[idx+1];
1057: y[2] = x[idx+2];
1058: y[3] = x[idx+3];
1059: y[4] = x[idx+4];
1060: y[5] = x[idx+5];
1061: y[6] = x[idx+6];
1062: y += 7;
1063: }
1064: }
1065: PETSC_STATIC_INLINE void UnPack_7(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
1066: {
1067: PetscInt i,idy;
1069: switch (addv) {
1070: case INSERT_VALUES:
1071: for (i=0; i<n; i++) {
1072: idy = *indicesy++;
1073: y[idy] = x[0];
1074: y[idy+1] = x[1];
1075: y[idy+2] = x[2];
1076: y[idy+3] = x[3];
1077: y[idy+4] = x[4];
1078: y[idy+5] = x[5];
1079: y[idy+6] = x[6];
1080: x += 7;
1081: }
1082: break;
1083: case ADD_VALUES:
1084: for (i=0; i<n; i++) {
1085: idy = *indicesy++;
1086: y[idy] += x[0];
1087: y[idy+1] += x[1];
1088: y[idy+2] += x[2];
1089: y[idy+3] += x[3];
1090: y[idy+4] += x[4];
1091: y[idy+5] += x[5];
1092: y[idy+6] += x[6];
1093: x += 7;
1094: }
1095: break;
1096: #if !defined(PETSC_USE_COMPLEX)
1097: case MAX_VALUES:
1098: for (i=0; i<n; i++) {
1099: idy = *indicesy++;
1100: y[idy] = PetscMax(y[idy],x[0]);
1101: y[idy+1] = PetscMax(y[idy+1],x[1]);
1102: y[idy+2] = PetscMax(y[idy+2],x[2]);
1103: y[idy+3] = PetscMax(y[idy+3],x[3]);
1104: y[idy+4] = PetscMax(y[idy+4],x[4]);
1105: y[idy+5] = PetscMax(y[idy+5],x[5]);
1106: y[idy+6] = PetscMax(y[idy+6],x[6]);
1107: x += 7;
1108: }
1109: #else
1110: case MAX_VALUES:
1111: #endif
1112: case NOT_SET_VALUES:
1113: break;
1114: }
1115: }
1117: PETSC_STATIC_INLINE void Scatter_7(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
1118: {
1119: PetscInt i,idx,idy;
1121: switch (addv) {
1122: case INSERT_VALUES:
1123: for (i=0; i<n; i++) {
1124: idx = *indicesx++;
1125: idy = *indicesy++;
1126: y[idy] = x[idx];
1127: y[idy+1] = x[idx+1];
1128: y[idy+2] = x[idx+2];
1129: y[idy+3] = x[idx+3];
1130: y[idy+4] = x[idx+4];
1131: y[idy+5] = x[idx+5];
1132: y[idy+6] = x[idx+6];
1133: }
1134: break;
1135: case ADD_VALUES:
1136: for (i=0; i<n; i++) {
1137: idx = *indicesx++;
1138: idy = *indicesy++;
1139: y[idy] += x[idx];
1140: y[idy+1] += x[idx+1];
1141: y[idy+2] += x[idx+2];
1142: y[idy+3] += x[idx+3];
1143: y[idy+4] += x[idx+4];
1144: y[idy+5] += x[idx+5];
1145: y[idy+6] += x[idx+6];
1146: }
1147: break;
1148: #if !defined(PETSC_USE_COMPLEX)
1149: case MAX_VALUES:
1150: for (i=0; i<n; i++) {
1151: idx = *indicesx++;
1152: idy = *indicesy++;
1153: y[idy] = PetscMax(y[idy],x[idx]);
1154: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
1155: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
1156: y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
1157: y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1158: y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
1159: y[idy+6] = PetscMax(y[idy+6],x[idx+6]);
1160: }
1161: #else
1162: case MAX_VALUES:
1163: #endif
1164: case NOT_SET_VALUES:
1165: break;
1166: }
1167: }
1168: /* ----------------------------------------------------------------------------------------------- */
1169: PETSC_STATIC_INLINE void Pack_8(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y)
1170: {
1171: PetscInt i,idx;
1173: for (i=0; i<n; i++) {
1174: idx = *indicesx++;
1175: y[0] = x[idx];
1176: y[1] = x[idx+1];
1177: y[2] = x[idx+2];
1178: y[3] = x[idx+3];
1179: y[4] = x[idx+4];
1180: y[5] = x[idx+5];
1181: y[6] = x[idx+6];
1182: y[7] = x[idx+7];
1183: y += 8;
1184: }
1185: }
1186: PETSC_STATIC_INLINE void UnPack_8(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
1187: {
1188: PetscInt i,idy;
1190: switch (addv) {
1191: case INSERT_VALUES:
1192: for (i=0; i<n; i++) {
1193: idy = *indicesy++;
1194: y[idy] = x[0];
1195: y[idy+1] = x[1];
1196: y[idy+2] = x[2];
1197: y[idy+3] = x[3];
1198: y[idy+4] = x[4];
1199: y[idy+5] = x[5];
1200: y[idy+6] = x[6];
1201: y[idy+7] = x[7];
1202: x += 8;
1203: }
1204: break;
1205: case ADD_VALUES:
1206: for (i=0; i<n; i++) {
1207: idy = *indicesy++;
1208: y[idy] += x[0];
1209: y[idy+1] += x[1];
1210: y[idy+2] += x[2];
1211: y[idy+3] += x[3];
1212: y[idy+4] += x[4];
1213: y[idy+5] += x[5];
1214: y[idy+6] += x[6];
1215: y[idy+7] += x[7];
1216: x += 8;
1217: }
1218: break;
1219: #if !defined(PETSC_USE_COMPLEX)
1220: case MAX_VALUES:
1221: for (i=0; i<n; i++) {
1222: idy = *indicesy++;
1223: y[idy] = PetscMax(y[idy],x[0]);
1224: y[idy+1] = PetscMax(y[idy+1],x[1]);
1225: y[idy+2] = PetscMax(y[idy+2],x[2]);
1226: y[idy+3] = PetscMax(y[idy+3],x[3]);
1227: y[idy+4] = PetscMax(y[idy+4],x[4]);
1228: y[idy+5] = PetscMax(y[idy+5],x[5]);
1229: y[idy+6] = PetscMax(y[idy+6],x[6]);
1230: y[idy+7] = PetscMax(y[idy+7],x[7]);
1231: x += 8;
1232: }
1233: #else
1234: case MAX_VALUES:
1235: #endif
1236: case NOT_SET_VALUES:
1237: break;
1238: }
1239: }
1241: PETSC_STATIC_INLINE void Scatter_8(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
1242: {
1243: PetscInt i,idx,idy;
1245: switch (addv) {
1246: case INSERT_VALUES:
1247: for (i=0; i<n; i++) {
1248: idx = *indicesx++;
1249: idy = *indicesy++;
1250: y[idy] = x[idx];
1251: y[idy+1] = x[idx+1];
1252: y[idy+2] = x[idx+2];
1253: y[idy+3] = x[idx+3];
1254: y[idy+4] = x[idx+4];
1255: y[idy+5] = x[idx+5];
1256: y[idy+6] = x[idx+6];
1257: y[idy+7] = x[idx+7];
1258: }
1259: break;
1260: case ADD_VALUES:
1261: for (i=0; i<n; i++) {
1262: idx = *indicesx++;
1263: idy = *indicesy++;
1264: y[idy] += x[idx];
1265: y[idy+1] += x[idx+1];
1266: y[idy+2] += x[idx+2];
1267: y[idy+3] += x[idx+3];
1268: y[idy+4] += x[idx+4];
1269: y[idy+5] += x[idx+5];
1270: y[idy+6] += x[idx+6];
1271: y[idy+7] += x[idx+7];
1272: }
1273: break;
1274: #if !defined(PETSC_USE_COMPLEX)
1275: case MAX_VALUES:
1276: for (i=0; i<n; i++) {
1277: idx = *indicesx++;
1278: idy = *indicesy++;
1279: y[idy] = PetscMax(y[idy],x[idx]);
1280: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
1281: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
1282: y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
1283: y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1284: y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
1285: y[idy+6] = PetscMax(y[idy+6],x[idx+6]);
1286: y[idy+7] = PetscMax(y[idy+7],x[idx+7]);
1287: }
1288: #else
1289: case MAX_VALUES:
1290: #endif
1291: case NOT_SET_VALUES:
1292: break;
1293: }
1294: }
1296: /* ----------------------------------------------------------------------------------------------- */
1297: PETSC_STATIC_INLINE void Pack_12(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y)
1298: {
1299: PetscInt i,idx;
1301: for (i=0; i<n; i++) {
1302: idx = *indicesx++;
1303: y[0] = x[idx];
1304: y[1] = x[idx+1];
1305: y[2] = x[idx+2];
1306: y[3] = x[idx+3];
1307: y[4] = x[idx+4];
1308: y[5] = x[idx+5];
1309: y[6] = x[idx+6];
1310: y[7] = x[idx+7];
1311: y[8] = x[idx+8];
1312: y[9] = x[idx+9];
1313: y[10] = x[idx+10];
1314: y[11] = x[idx+11];
1315: y += 12;
1316: }
1317: }
1318: PETSC_STATIC_INLINE void UnPack_12(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
1319: {
1320: PetscInt i,idy;
1322: switch (addv) {
1323: case INSERT_VALUES:
1324: for (i=0; i<n; i++) {
1325: idy = *indicesy++;
1326: y[idy] = x[0];
1327: y[idy+1] = x[1];
1328: y[idy+2] = x[2];
1329: y[idy+3] = x[3];
1330: y[idy+4] = x[4];
1331: y[idy+5] = x[5];
1332: y[idy+6] = x[6];
1333: y[idy+7] = x[7];
1334: y[idy+8] = x[8];
1335: y[idy+9] = x[9];
1336: y[idy+10] = x[10];
1337: y[idy+11] = x[11];
1338: x += 12;
1339: }
1340: break;
1341: case ADD_VALUES:
1342: for (i=0; i<n; i++) {
1343: idy = *indicesy++;
1344: y[idy] += x[0];
1345: y[idy+1] += x[1];
1346: y[idy+2] += x[2];
1347: y[idy+3] += x[3];
1348: y[idy+4] += x[4];
1349: y[idy+5] += x[5];
1350: y[idy+6] += x[6];
1351: y[idy+7] += x[7];
1352: y[idy+8] += x[8];
1353: y[idy+9] += x[9];
1354: y[idy+10] += x[10];
1355: y[idy+11] += x[11];
1356: x += 12;
1357: }
1358: break;
1359: #if !defined(PETSC_USE_COMPLEX)
1360: case MAX_VALUES:
1361: for (i=0; i<n; i++) {
1362: idy = *indicesy++;
1363: y[idy] = PetscMax(y[idy],x[0]);
1364: y[idy+1] = PetscMax(y[idy+1],x[1]);
1365: y[idy+2] = PetscMax(y[idy+2],x[2]);
1366: y[idy+3] = PetscMax(y[idy+3],x[3]);
1367: y[idy+4] = PetscMax(y[idy+4],x[4]);
1368: y[idy+5] = PetscMax(y[idy+5],x[5]);
1369: y[idy+6] = PetscMax(y[idy+6],x[6]);
1370: y[idy+7] = PetscMax(y[idy+7],x[7]);
1371: y[idy+8] = PetscMax(y[idy+8],x[8]);
1372: y[idy+9] = PetscMax(y[idy+9],x[9]);
1373: y[idy+10] = PetscMax(y[idy+10],x[10]);
1374: y[idy+11] = PetscMax(y[idy+11],x[11]);
1375: x += 12;
1376: }
1377: #else
1378: case MAX_VALUES:
1379: #endif
1380: case NOT_SET_VALUES:
1381: break;
1382: }
1383: }
1385: PETSC_STATIC_INLINE void Scatter_12(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
1386: {
1387: PetscInt i,idx,idy;
1389: switch (addv) {
1390: case INSERT_VALUES:
1391: for (i=0; i<n; i++) {
1392: idx = *indicesx++;
1393: idy = *indicesy++;
1394: y[idy] = x[idx];
1395: y[idy+1] = x[idx+1];
1396: y[idy+2] = x[idx+2];
1397: y[idy+3] = x[idx+3];
1398: y[idy+4] = x[idx+4];
1399: y[idy+5] = x[idx+5];
1400: y[idy+6] = x[idx+6];
1401: y[idy+7] = x[idx+7];
1402: y[idy+8] = x[idx+8];
1403: y[idy+9] = x[idx+9];
1404: y[idy+10] = x[idx+10];
1405: y[idy+11] = x[idx+11];
1406: }
1407: break;
1408: case ADD_VALUES:
1409: for (i=0; i<n; i++) {
1410: idx = *indicesx++;
1411: idy = *indicesy++;
1412: y[idy] += x[idx];
1413: y[idy+1] += x[idx+1];
1414: y[idy+2] += x[idx+2];
1415: y[idy+3] += x[idx+3];
1416: y[idy+4] += x[idx+4];
1417: y[idy+5] += x[idx+5];
1418: y[idy+6] += x[idx+6];
1419: y[idy+7] += x[idx+7];
1420: y[idy+8] += x[idx+8];
1421: y[idy+9] += x[idx+9];
1422: y[idy+10] += x[idx+10];
1423: y[idy+11] += x[idx+11];
1424: }
1425: break;
1426: #if !defined(PETSC_USE_COMPLEX)
1427: case MAX_VALUES:
1428: for (i=0; i<n; i++) {
1429: idx = *indicesx++;
1430: idy = *indicesy++;
1431: y[idy] = PetscMax(y[idy],x[idx]);
1432: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
1433: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
1434: y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
1435: y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1436: y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
1437: y[idy+6] = PetscMax(y[idy+6],x[idx+6]);
1438: y[idy+7] = PetscMax(y[idy+7],x[idx+7]);
1439: y[idy+8] = PetscMax(y[idy+8],x[idx+8]);
1440: y[idy+9] = PetscMax(y[idy+9],x[idx+9]);
1441: y[idy+10] = PetscMax(y[idy+10],x[idx+10]);
1442: y[idy+11] = PetscMax(y[idy+11],x[idx+11]);
1443: }
1444: #else
1445: case MAX_VALUES:
1446: #endif
1447: case NOT_SET_VALUES:
1448: break;
1449: }
1450: }
1452: /* Create the VecScatterBegin/End_P for our chosen block sizes */
1453: #define BS 1
1454: #include ../src/vec/vec/utils/vpscat.h
1455: #define BS 2
1456: #include ../src/vec/vec/utils/vpscat.h
1457: #define BS 3
1458: #include ../src/vec/vec/utils/vpscat.h
1459: #define BS 4
1460: #include ../src/vec/vec/utils/vpscat.h
1461: #define BS 5
1462: #include ../src/vec/vec/utils/vpscat.h
1463: #define BS 6
1464: #include ../src/vec/vec/utils/vpscat.h
1465: #define BS 7
1466: #include ../src/vec/vec/utils/vpscat.h
1467: #define BS 8
1468: #include ../src/vec/vec/utils/vpscat.h
1469: #define BS 12
1470: #include ../src/vec/vec/utils/vpscat.h
1472: /* ==========================================================================================*/
1474: /* create parallel to sequential scatter context */
1476: PetscErrorCode VecScatterCreateCommon_PtoS(VecScatter_MPI_General *,VecScatter_MPI_General *,VecScatter);
1480: /*@
1481: VecScatterCreateLocal - Creates a VecScatter from a list of messages it must send and receive.
1483: Collective on VecScatter
1485: Input Parameters:
1486: + VecScatter - obtained with VecScatterCreateEmpty()
1487: . nsends -
1488: . sendSizes -
1489: . sendProcs -
1490: . sendIdx - indices where the sent entries are obtained from (in local, on process numbering), this is one long array of size \sum_{i=0,i<nsends} sendSizes[i]
1491: . nrecvs - number of receives to expect
1492: . recvSizes -
1493: . recvProcs - processes who are sending to me
1494: . recvIdx - indices of where received entries are to be put, (in local, on process numbering), this is one long array of size \sum_{i=0,i<nrecvs} recvSizes[i]
1495: - bs - size of block
1497: Notes: sendSizes[] and recvSizes[] cannot have any 0 entries. If you want to support having 0 entries you need
1498: to change the code below to "compress out" the sendProcs[] and recvProcs[] entries that have 0 entries.
1500: Probably does not handle sends to self properly. It should remove those from the counts that are used
1501: in allocating space inside of the from struct
1503: Level: intermediate
1505: @*/
1506: PetscErrorCode VecScatterCreateLocal(VecScatter ctx,PetscInt nsends,const PetscInt sendSizes[],const PetscInt sendProcs[],const PetscInt sendIdx[],PetscInt nrecvs,const PetscInt recvSizes[],const PetscInt recvProcs[],const PetscInt recvIdx[],PetscInt bs)
1507: {
1508: VecScatter_MPI_General *from, *to;
1509: PetscInt sendSize, recvSize;
1510: PetscInt n, i;
1511: PetscErrorCode ierr;
1513: /* allocate entire send scatter context */
1514: PetscNewLog(ctx,VecScatter_MPI_General,&to);
1515: to->n = nsends;
1516: for(n = 0, sendSize = 0; n < to->n; n++) {sendSize += sendSizes[n];}
1517: PetscMalloc(to->n*sizeof(MPI_Request),&to->requests);
1518: PetscMalloc4(bs*sendSize,PetscScalar,&to->values,sendSize,PetscInt,&to->indices,to->n+1,PetscInt,&to->starts,to->n,PetscMPIInt,&to->procs);
1519: PetscMalloc2(PetscMax(to->n,nrecvs),MPI_Status,&to->sstatus,PetscMax(to->n,nrecvs),MPI_Status,&to->rstatus);
1520: to->starts[0] = 0;
1521: for(n = 0; n < to->n; n++) {
1522: if (sendSizes[n] <=0 ) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"sendSizes[n=%D] = %D cannot be less than 1",n,sendSizes[n]);
1523: to->starts[n+1] = to->starts[n] + sendSizes[n];
1524: to->procs[n] = sendProcs[n];
1525: for(i = to->starts[n]; i < to->starts[n]+sendSizes[n]; i++) {
1526: to->indices[i] = sendIdx[i];
1527: }
1528: }
1529: ctx->todata = (void *) to;
1531: /* allocate entire receive scatter context */
1532: PetscNewLog(ctx,VecScatter_MPI_General,&from);
1533: from->n = nrecvs;
1534: for(n = 0, recvSize = 0; n < from->n; n++) {recvSize += recvSizes[n];}
1535: PetscMalloc(from->n*sizeof(MPI_Request),&from->requests);
1536: PetscMalloc4(bs*recvSize,PetscScalar,&from->values,recvSize,PetscInt,&from->indices,from->n+1,PetscInt,&from->starts,from->n,PetscMPIInt,&from->procs);
1537: from->starts[0] = 0;
1538: for(n = 0; n < from->n; n++) {
1539: if (recvSizes[n] <=0 ) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"recvSizes[n=%D] = %D cannot be less than 1",n,recvSizes[n]);
1540: from->starts[n+1] = from->starts[n] + recvSizes[n];
1541: from->procs[n] = recvProcs[n];
1542: for(i = from->starts[n]; i < from->starts[n]+recvSizes[n]; i++) {
1543: from->indices[i] = recvIdx[i];
1544: }
1545: }
1546: ctx->fromdata = (void *)from;
1548: /* No local scatter optimization */
1549: from->local.n = 0;
1550: from->local.vslots = 0;
1551: to->local.n = 0;
1552: to->local.vslots = 0;
1553: from->local.nonmatching_computed = PETSC_FALSE;
1554: from->local.n_nonmatching = 0;
1555: from->local.slots_nonmatching = 0;
1556: to->local.nonmatching_computed = PETSC_FALSE;
1557: to->local.n_nonmatching = 0;
1558: to->local.slots_nonmatching = 0;
1560: from->type = VEC_SCATTER_MPI_GENERAL;
1561: to->type = VEC_SCATTER_MPI_GENERAL;
1562: from->bs = bs;
1563: to->bs = bs;
1564: VecScatterCreateCommon_PtoS(from, to, ctx);
1565: return(0);
1566: }
1568: /*
1569: bs indicates how many elements there are in each block. Normally this would be 1.
1571: contains check that PetscMPIInt can handle the sizes needed
1572: */
1575: PetscErrorCode VecScatterCreate_PtoS(PetscInt nx,const PetscInt *inidx,PetscInt ny,const PetscInt *inidy,Vec xin,Vec yin,PetscInt bs,VecScatter ctx)
1576: {
1577: VecScatter_MPI_General *from,*to;
1578: PetscMPIInt size,rank,imdex,tag,n;
1579: PetscInt *source = PETSC_NULL,*owners = PETSC_NULL;
1580: PetscInt *lowner = PETSC_NULL,*start = PETSC_NULL,lengthy,lengthx;
1581: PetscMPIInt *nprocs = PETSC_NULL,nrecvs;
1582: PetscInt i,j,idx,nsends;
1583: PetscInt *owner = PETSC_NULL,*starts = PETSC_NULL,count,slen;
1584: PetscInt *rvalues,*svalues,base,*values,nprocslocal,recvtotal,*rsvalues;
1585: PetscMPIInt *onodes1,*olengths1;
1586: MPI_Comm comm;
1587: MPI_Request *send_waits = PETSC_NULL,*recv_waits = PETSC_NULL;
1588: MPI_Status recv_status,*send_status;
1589: PetscErrorCode ierr;
1592: PetscObjectGetNewTag((PetscObject)ctx,&tag);
1593: PetscObjectGetComm((PetscObject)xin,&comm);
1594: MPI_Comm_rank(comm,&rank);
1595: MPI_Comm_size(comm,&size);
1596: owners = xin->map->range;
1597: VecGetSize(yin,&lengthy);
1598: VecGetSize(xin,&lengthx);
1600: /* first count number of contributors to each processor */
1601: PetscMalloc2(size,PetscMPIInt,&nprocs,nx,PetscInt,&owner);
1602: PetscMemzero(nprocs,size*sizeof(PetscMPIInt));
1603: j = 0;
1604: nsends = 0;
1605: for (i=0; i<nx; i++) {
1606: idx = inidx[i];
1607: if (idx < owners[j]) j = 0;
1608: for (; j<size; j++) {
1609: if (idx < owners[j+1]) {
1610: if (!nprocs[j]++) nsends++;
1611: owner[i] = j;
1612: break;
1613: }
1614: }
1615: }
1616: nprocslocal = nprocs[rank];
1617: nprocs[rank] = 0;
1618: if (nprocslocal) nsends--;
1619: /* inform other processors of number of messages and max length*/
1620: PetscGatherNumberOfMessages(comm,PETSC_NULL,nprocs,&nrecvs);
1621: PetscGatherMessageLengths(comm,nsends,nrecvs,nprocs,&onodes1,&olengths1);
1622: PetscSortMPIIntWithArray(nrecvs,onodes1,olengths1);
1623: recvtotal = 0; for (i=0; i<nrecvs; i++) recvtotal += olengths1[i];
1625: /* post receives: */
1626: PetscMalloc3(recvtotal,PetscInt,&rvalues,nrecvs,PetscInt,&source,nrecvs,MPI_Request,&recv_waits);
1627: count = 0;
1628: for (i=0; i<nrecvs; i++) {
1629: MPI_Irecv((rvalues+count),olengths1[i],MPIU_INT,onodes1[i],tag,comm,recv_waits+i);
1630: count += olengths1[i];
1631: }
1633: /* do sends:
1634: 1) starts[i] gives the starting index in svalues for stuff going to
1635: the ith processor
1636: */
1637: PetscMalloc3(nx,PetscInt,&svalues,nsends,MPI_Request,&send_waits,size+1,PetscInt,&starts);
1638: starts[0] = 0;
1639: for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
1640: for (i=0; i<nx; i++) {
1641: if (owner[i] != rank) {
1642: svalues[starts[owner[i]]++] = inidx[i];
1643: }
1644: }
1646: starts[0] = 0;
1647: for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
1648: count = 0;
1649: for (i=0; i<size; i++) {
1650: if (nprocs[i]) {
1651: MPI_Isend(svalues+starts[i],nprocs[i],MPIU_INT,i,tag,comm,send_waits+count++);
1652: }
1653: }
1655: /* wait on receives */
1656: count = nrecvs;
1657: slen = 0;
1658: while (count) {
1659: MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
1660: /* unpack receives into our local space */
1661: MPI_Get_count(&recv_status,MPIU_INT,&n);
1662: slen += n;
1663: count--;
1664: }
1666: if (slen != recvtotal) SETERRQ2(PETSC_ERR_PLIB,"Total message lengths %D not expected %D",slen,recvtotal);
1667:
1668: /* allocate entire send scatter context */
1669: PetscNewLog(ctx,VecScatter_MPI_General,&to);
1670: to->n = nrecvs;
1671: PetscMalloc(nrecvs*sizeof(MPI_Request),&to->requests);
1672: PetscMalloc4(bs*slen,PetscScalar,&to->values,slen,PetscInt,&to->indices,nrecvs+1,PetscInt,&to->starts,nrecvs,PetscMPIInt,&to->procs);
1673: PetscMalloc2(PetscMax(to->n,nsends),MPI_Status,&to->sstatus,PetscMax(to->n,nsends),MPI_Status,&to->rstatus);
1674: ctx->todata = (void*)to;
1675: to->starts[0] = 0;
1677: if (nrecvs) {
1679: /* move the data into the send scatter */
1680: base = owners[rank];
1681: rsvalues = rvalues;
1682: for (i=0; i<nrecvs; i++) {
1683: to->starts[i+1] = to->starts[i] + olengths1[i];
1684: to->procs[i] = onodes1[i];
1685: values = rsvalues;
1686: rsvalues += olengths1[i];
1687: for (j=0; j<olengths1[i]; j++) {
1688: to->indices[to->starts[i] + j] = values[j] - base;
1689: }
1690: }
1691: }
1692: PetscFree(olengths1);
1693: PetscFree(onodes1);
1694: PetscFree3(rvalues,source,recv_waits);
1696: /* allocate entire receive scatter context */
1697: PetscNewLog(ctx,VecScatter_MPI_General,&from);
1698: from->n = nsends;
1700: PetscMalloc(nsends*sizeof(MPI_Request),&from->requests);
1701: PetscMalloc4((ny-nprocslocal)*bs,PetscScalar,&from->values,ny-nprocslocal,PetscInt,&from->indices,nsends+1,PetscInt,&from->starts,from->n,PetscMPIInt,&from->procs);
1702: ctx->fromdata = (void*)from;
1704: /* move data into receive scatter */
1705: PetscMalloc2(size,PetscInt,&lowner,nsends+1,PetscInt,&start);
1706: count = 0; from->starts[0] = start[0] = 0;
1707: for (i=0; i<size; i++) {
1708: if (nprocs[i]) {
1709: lowner[i] = count;
1710: from->procs[count++] = i;
1711: from->starts[count] = start[count] = start[count-1] + nprocs[i];
1712: }
1713: }
1715: for (i=0; i<nx; i++) {
1716: if (owner[i] != rank) {
1717: from->indices[start[lowner[owner[i]]]++] = inidy[i];
1718: if (inidy[i] >= lengthy) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Scattering past end of TO vector");
1719: }
1720: }
1721: PetscFree2(lowner,start);
1722: PetscFree2(nprocs,owner);
1723:
1724: /* wait on sends */
1725: if (nsends) {
1726: PetscMalloc(nsends*sizeof(MPI_Status),&send_status);
1727: MPI_Waitall(nsends,send_waits,send_status);
1728: PetscFree(send_status);
1729: }
1730: PetscFree3(svalues,send_waits,starts);
1732: if (nprocslocal) {
1733: PetscInt nt = from->local.n = to->local.n = nprocslocal;
1734: /* we have a scatter to ourselves */
1735: PetscMalloc(nt*sizeof(PetscInt),&to->local.vslots);
1736: PetscMalloc(nt*sizeof(PetscInt),&from->local.vslots);
1737: nt = 0;
1738: for (i=0; i<nx; i++) {
1739: idx = inidx[i];
1740: if (idx >= owners[rank] && idx < owners[rank+1]) {
1741: to->local.vslots[nt] = idx - owners[rank];
1742: from->local.vslots[nt++] = inidy[i];
1743: if (inidy[i] >= lengthy) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Scattering past end of TO vector");
1744: }
1745: }
1746: } else {
1747: from->local.n = 0;
1748: from->local.vslots = 0;
1749: to->local.n = 0;
1750: to->local.vslots = 0;
1751: }
1753: from->local.nonmatching_computed = PETSC_FALSE;
1754: from->local.n_nonmatching = 0;
1755: from->local.slots_nonmatching = 0;
1756: to->local.nonmatching_computed = PETSC_FALSE;
1757: to->local.n_nonmatching = 0;
1758: to->local.slots_nonmatching = 0;
1760: from->type = VEC_SCATTER_MPI_GENERAL;
1761: to->type = VEC_SCATTER_MPI_GENERAL;
1762: from->bs = bs;
1763: to->bs = bs;
1764: VecScatterCreateCommon_PtoS(from,to,ctx);
1765: return(0);
1766: }
1768: /*
1769: bs indicates how many elements there are in each block. Normally this would be 1.
1770: */
1773: PetscErrorCode VecScatterCreateCommon_PtoS(VecScatter_MPI_General *from,VecScatter_MPI_General *to,VecScatter ctx)
1774: {
1775: MPI_Comm comm = ((PetscObject)ctx)->comm;
1776: PetscMPIInt tag = ((PetscObject)ctx)->tag, tagr;
1777: PetscInt bs = to->bs;
1778: PetscMPIInt size;
1779: PetscInt i, n;
1781:
1783: PetscObjectGetNewTag((PetscObject)ctx,&tagr);
1784: ctx->destroy = VecScatterDestroy_PtoP;
1786: ctx->reproduce = PETSC_FALSE;
1787: to->sendfirst = PETSC_FALSE;
1788: PetscOptionsGetTruth(PETSC_NULL,"-vecscatter_reproduce",&ctx->reproduce,PETSC_NULL);
1789: PetscOptionsGetTruth(PETSC_NULL,"-vecscatter_sendfirst",&to->sendfirst,PETSC_NULL);
1790: from->sendfirst = to->sendfirst;
1792: MPI_Comm_size(comm,&size);
1793: /* check if the receives are ALL going into contiguous locations; if so can skip indexing */
1794: to->contiq = PETSC_FALSE;
1795: n = from->starts[from->n];
1796: from->contiq = PETSC_TRUE;
1797: for (i=1; i<n; i++) {
1798: if (from->indices[i] != from->indices[i-1] + bs) {
1799: from->contiq = PETSC_FALSE;
1800: break;
1801: }
1802: }
1804: to->use_alltoallv = PETSC_FALSE;
1805: PetscOptionsGetTruth(PETSC_NULL,"-vecscatter_alltoall",&to->use_alltoallv,PETSC_NULL);
1806: from->use_alltoallv = to->use_alltoallv;
1807: if (from->use_alltoallv) PetscInfo(ctx,"Using MPI_Alltoallv() for scatter\n");
1808: #if defined(PETSC_HAVE_MPI_ALLTOALLW) && !defined(PETSC_USE_64BIT_INDICES)
1809: if (to->use_alltoallv) {
1810: to->use_alltoallw = PETSC_FALSE;
1811: PetscOptionsGetTruth(PETSC_NULL,"-vecscatter_nopack",&to->use_alltoallw,PETSC_NULL);
1812: }
1813: from->use_alltoallw = to->use_alltoallw;
1814: if (from->use_alltoallw) PetscInfo(ctx,"Using MPI_Alltoallw() for scatter\n");
1815: #endif
1817: #if defined(PETSC_HAVE_MPI_WIN_CREATE)
1818: to->use_window = PETSC_FALSE;
1819: PetscOptionsGetTruth(PETSC_NULL,"-vecscatter_window",&to->use_window,PETSC_NULL);
1820: from->use_window = to->use_window;
1821: #endif
1823: if (to->use_alltoallv) {
1825: PetscMalloc2(size,PetscMPIInt,&to->counts,size,PetscMPIInt,&to->displs);
1826: PetscMemzero(to->counts,size*sizeof(PetscMPIInt));
1827: for (i=0; i<to->n; i++) {
1828: to->counts[to->procs[i]] = bs*(to->starts[i+1] - to->starts[i]);
1829: }
1830: to->displs[0] = 0;
1831: for (i=1; i<size; i++) {
1832: to->displs[i] = to->displs[i-1] + to->counts[i-1];
1833: }
1835: PetscMalloc2(size,PetscMPIInt,&from->counts,size,PetscMPIInt,&from->displs);
1836: PetscMemzero(from->counts,size*sizeof(PetscMPIInt));
1837: for (i=0; i<from->n; i++) {
1838: from->counts[from->procs[i]] = bs*(from->starts[i+1] - from->starts[i]);
1839: }
1840: from->displs[0] = 0;
1841: for (i=1; i<size; i++) {
1842: from->displs[i] = from->displs[i-1] + from->counts[i-1];
1843: }
1844: #if defined(PETSC_HAVE_MPI_ALLTOALLW) && !defined(PETSC_USE_64BIT_INDICES)
1845: if (to->use_alltoallw) {
1846: PetscMPIInt mpibs = PetscMPIIntCast(bs), mpilen;
1847: ctx->packtogether = PETSC_FALSE;
1848: PetscMalloc3(size,PetscMPIInt,&to->wcounts,size,PetscMPIInt,&to->wdispls,size,MPI_Datatype,&to->types);
1849: PetscMemzero(to->wcounts,size*sizeof(PetscMPIInt));
1850: PetscMemzero(to->wdispls,size*sizeof(PetscMPIInt));
1851: for (i=0; i<size; i++) {
1852: to->types[i] = MPIU_SCALAR;
1853: }
1855: for (i=0; i<to->n; i++) {
1856: to->wcounts[to->procs[i]] = 1;
1857: mpilen = PetscMPIIntCast(to->starts[i+1]-to->starts[i]);
1858: MPI_Type_create_indexed_block(mpilen,mpibs,to->indices+to->starts[i],MPIU_SCALAR,to->types+to->procs[i]);
1859: MPI_Type_commit(to->types+to->procs[i]);
1860: }
1861: PetscMalloc3(size,PetscMPIInt,&from->wcounts,size,PetscMPIInt,&from->wdispls,size,MPI_Datatype,&from->types);
1862: PetscMemzero(from->wcounts,size*sizeof(PetscMPIInt));
1863: PetscMemzero(from->wdispls,size*sizeof(PetscMPIInt));
1864: for (i=0; i<size; i++) {
1865: from->types[i] = MPIU_SCALAR;
1866: }
1867: if (from->contiq) {
1868: PetscInfo(ctx,"Scattered vector entries are stored contiquously, taking advantage of this with -vecscatter_alltoall\n");
1869: for (i=0; i<from->n; i++) {
1870: from->wcounts[from->procs[i]] = bs*(from->starts[i+1] - from->starts[i]);
1871: }
1872: if (from->n) from->wdispls[from->procs[0]] = sizeof(PetscScalar)*from->indices[0];
1873: for (i=1; i<from->n; i++) {
1874: from->wdispls[from->procs[i]] = from->wdispls[from->procs[i-1]] + sizeof(PetscScalar)*from->wcounts[from->procs[i-1]];
1875: }
1876: } else {
1877: for (i=0; i<from->n; i++) {
1878: from->wcounts[from->procs[i]] = 1;
1879: mpilen = PetscMPIIntCast(from->starts[i+1]-from->starts[i]);
1880: MPI_Type_create_indexed_block(mpilen,mpibs,from->indices+from->starts[i],MPIU_SCALAR,from->types+from->procs[i]);
1881: MPI_Type_commit(from->types+from->procs[i]);
1882: }
1883: }
1884: } else {
1885: ctx->copy = VecScatterCopy_PtoP_AllToAll;
1886: }
1887: #else
1888: to->use_alltoallw = PETSC_FALSE;
1889: from->use_alltoallw = PETSC_FALSE;
1890: ctx->copy = VecScatterCopy_PtoP_AllToAll;
1891: #endif
1892: #if defined(PETSC_HAVE_MPI_WIN_CREATE)
1893: } else if (to->use_window) {
1894: PetscMPIInt temptag,winsize;
1895: MPI_Request *request;
1896: MPI_Status *status;
1897:
1898: PetscObjectGetNewTag((PetscObject)ctx,&temptag);
1899: winsize = (to->n ? to->starts[to->n] : 0)*bs*sizeof(PetscScalar);
1900: MPI_Win_create(to->values ? to->values : MPI_BOTTOM,winsize,sizeof(PetscScalar),MPI_INFO_NULL,comm,&to->window);
1901: PetscMalloc(to->n,&to->winstarts);
1902: PetscMalloc2(to->n,MPI_Request,&request,to->n,MPI_Status,&status);
1903: for (i=0; i<to->n; i++) {
1904: MPI_Irecv(to->winstarts+i,1,MPIU_INT,to->procs[i],temptag,comm,request+i);
1905: }
1906: for (i=0; i<from->n; i++) {
1907: MPI_Send(from->starts+i,1,MPIU_INT,from->procs[i],temptag,comm);
1908: }
1909: MPI_Waitall(to->n,request,status);
1910: PetscFree2(request,status);
1912: winsize = (from->n ? from->starts[from->n] : 0)*bs*sizeof(PetscScalar);
1913: MPI_Win_create(from->values ? from->values : MPI_BOTTOM,winsize,sizeof(PetscScalar),MPI_INFO_NULL,comm,&from->window);
1914: PetscMalloc(from->n,&from->winstarts);
1915: PetscMalloc2(from->n,MPI_Request,&request,from->n,MPI_Status,&status);
1916: for (i=0; i<from->n; i++) {
1917: MPI_Irecv(from->winstarts+i,1,MPIU_INT,from->procs[i],temptag,comm,request+i);
1918: }
1919: for (i=0; i<to->n; i++) {
1920: MPI_Send(to->starts+i,1,MPIU_INT,to->procs[i],temptag,comm);
1921: }
1922: MPI_Waitall(from->n,request,status);
1923: PetscFree2(request,status);
1924: #endif
1925: } else {
1926: PetscTruth use_rsend = PETSC_FALSE, use_ssend = PETSC_FALSE;
1927: PetscInt *sstarts = to->starts, *rstarts = from->starts;
1928: PetscMPIInt *sprocs = to->procs, *rprocs = from->procs;
1929: MPI_Request *swaits = to->requests,*rwaits = from->requests;
1930: MPI_Request *rev_swaits,*rev_rwaits;
1931: PetscScalar *Ssvalues = to->values, *Srvalues = from->values;
1933: /* allocate additional wait variables for the "reverse" scatter */
1934: PetscMalloc(to->n*sizeof(MPI_Request),&rev_rwaits);
1935: PetscMalloc(from->n*sizeof(MPI_Request),&rev_swaits);
1936: to->rev_requests = rev_rwaits;
1937: from->rev_requests = rev_swaits;
1939: /* Register the receives that you will use later (sends for scatter reverse) */
1940: PetscOptionsGetTruth(PETSC_NULL,"-vecscatter_rsend",&use_rsend,PETSC_NULL);
1941: PetscOptionsGetTruth(PETSC_NULL,"-vecscatter_ssend",&use_ssend,PETSC_NULL);
1942: if (use_rsend) {
1943: PetscInfo(ctx,"Using VecScatter ready receiver mode\n");
1944: to->use_readyreceiver = PETSC_TRUE;
1945: from->use_readyreceiver = PETSC_TRUE;
1946: } else {
1947: to->use_readyreceiver = PETSC_FALSE;
1948: from->use_readyreceiver = PETSC_FALSE;
1949: }
1950: if (use_ssend) {
1951: PetscInfo(ctx,"Using VecScatter Ssend mode\n");
1952: }
1954: for (i=0; i<from->n; i++) {
1955: if (use_rsend) {
1956: MPI_Rsend_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tagr,comm,rev_swaits+i);
1957: } else if (use_ssend) {
1958: MPI_Ssend_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tagr,comm,rev_swaits+i);
1959: } else {
1960: MPI_Send_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tagr,comm,rev_swaits+i);
1961: }
1962: }
1964: for (i=0; i<to->n; i++) {
1965: if (use_rsend) {
1966: MPI_Rsend_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
1967: } else if (use_ssend) {
1968: MPI_Ssend_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
1969: } else {
1970: MPI_Send_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
1971: }
1972: }
1973: /* Register receives for scatter and reverse */
1974: for (i=0; i<from->n; i++) {
1975: MPI_Recv_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);
1976: }
1977: for (i=0; i<to->n; i++) {
1978: MPI_Recv_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tagr,comm,rev_rwaits+i);
1979: }
1980: if (use_rsend) {
1981: if (to->n) {MPI_Startall_irecv(to->starts[to->n]*to->bs,to->n,to->rev_requests);}
1982: if (from->n) {MPI_Startall_irecv(from->starts[from->n]*from->bs,from->n,from->requests);}
1983: MPI_Barrier(comm);
1984: }
1986: ctx->copy = VecScatterCopy_PtoP_X;
1987: }
1988: PetscInfo1(ctx,"Using blocksize %D scatter\n",bs);
1989: switch (bs) {
1990: case 12:
1991: ctx->begin = VecScatterBegin_12;
1992: ctx->end = VecScatterEnd_12;
1993: break;
1994: case 8:
1995: ctx->begin = VecScatterBegin_8;
1996: ctx->end = VecScatterEnd_8;
1997: break;
1998: case 7:
1999: ctx->begin = VecScatterBegin_7;
2000: ctx->end = VecScatterEnd_7;
2001: break;
2002: case 6:
2003: ctx->begin = VecScatterBegin_6;
2004: ctx->end = VecScatterEnd_6;
2005: break;
2006: case 5:
2007: ctx->begin = VecScatterBegin_5;
2008: ctx->end = VecScatterEnd_5;
2009: break;
2010: case 4:
2011: ctx->begin = VecScatterBegin_4;
2012: ctx->end = VecScatterEnd_4;
2013: break;
2014: case 3:
2015: ctx->begin = VecScatterBegin_3;
2016: ctx->end = VecScatterEnd_3;
2017: break;
2018: case 2:
2019: ctx->begin = VecScatterBegin_2;
2020: ctx->end = VecScatterEnd_2;
2021: break;
2022: case 1:
2023: ctx->begin = VecScatterBegin_1;
2024: ctx->end = VecScatterEnd_1;
2025: break;
2026: default:
2027: SETERRQ(PETSC_ERR_SUP,"Blocksize not supported");
2028: }
2029: ctx->view = VecScatterView_MPI;
2030: /* Check if the local scatter is actually a copy; important special case */
2031: if (to->local.n) {
2032: VecScatterLocalOptimizeCopy_Private(ctx,&to->local,&from->local,bs);
2033: }
2034: return(0);
2035: }
2039: /* ------------------------------------------------------------------------------------*/
2040: /*
2041: Scatter from local Seq vectors to a parallel vector.
2042: Reverses the order of the arguments, calls VecScatterCreate_PtoS() then
2043: reverses the result.
2044: */
2047: PetscErrorCode VecScatterCreate_StoP(PetscInt nx,const PetscInt *inidx,PetscInt ny,const PetscInt *inidy,Vec xin,Vec yin,PetscInt bs,VecScatter ctx)
2048: {
2049: PetscErrorCode ierr;
2050: MPI_Request *waits;
2051: VecScatter_MPI_General *to,*from;
2054: VecScatterCreate_PtoS(ny,inidy,nx,inidx,yin,xin,bs,ctx);
2055: to = (VecScatter_MPI_General*)ctx->fromdata;
2056: from = (VecScatter_MPI_General*)ctx->todata;
2057: ctx->todata = (void*)to;
2058: ctx->fromdata = (void*)from;
2059: /* these two are special, they are ALWAYS stored in to struct */
2060: to->sstatus = from->sstatus;
2061: to->rstatus = from->rstatus;
2063: from->sstatus = 0;
2064: from->rstatus = 0;
2066: waits = from->rev_requests;
2067: from->rev_requests = from->requests;
2068: from->requests = waits;
2069: waits = to->rev_requests;
2070: to->rev_requests = to->requests;
2071: to->requests = waits;
2072: return(0);
2073: }
2075: /* ---------------------------------------------------------------------------------*/
2078: PetscErrorCode VecScatterCreate_PtoP(PetscInt nx,const PetscInt *inidx,PetscInt ny,const PetscInt *inidy,Vec xin,Vec yin,VecScatter ctx)
2079: {
2081: PetscMPIInt size,rank,tag,imdex,n;
2082: PetscInt *owners = xin->map->range;
2083: PetscMPIInt *nprocs = PETSC_NULL;
2084: PetscInt i,j,idx,nsends,*local_inidx = PETSC_NULL,*local_inidy = PETSC_NULL;
2085: PetscInt *owner = PETSC_NULL,*starts = PETSC_NULL,count,slen;
2086: PetscInt *rvalues = PETSC_NULL,*svalues = PETSC_NULL,base,*values = PETSC_NULL,*rsvalues,recvtotal,lastidx;
2087: PetscMPIInt *onodes1,*olengths1,nrecvs;
2088: MPI_Comm comm;
2089: MPI_Request *send_waits = PETSC_NULL,*recv_waits = PETSC_NULL;
2090: MPI_Status recv_status,*send_status = PETSC_NULL;
2091: PetscTruth duplicate = PETSC_FALSE;
2092: #if defined(PETSC_USE_DEBUG)
2093: PetscTruth found = PETSC_FALSE;
2094: #endif
2097: PetscObjectGetNewTag((PetscObject)ctx,&tag);
2098: PetscObjectGetComm((PetscObject)xin,&comm);
2099: MPI_Comm_size(comm,&size);
2100: MPI_Comm_rank(comm,&rank);
2101: if (size == 1) {
2102: VecScatterCreate_StoP(nx,inidx,ny,inidy,xin,yin,1,ctx);
2103: return(0);
2104: }
2106: /*
2107: Each processor ships off its inidx[j] and inidy[j] to the appropriate processor
2108: They then call the StoPScatterCreate()
2109: */
2110: /* first count number of contributors to each processor */
2111: PetscMalloc3(size,PetscMPIInt,&nprocs,nx,PetscInt,&owner,(size+1),PetscInt,&starts);
2112: PetscMemzero(nprocs,size*sizeof(PetscMPIInt));
2113: lastidx = -1;
2114: j = 0;
2115: for (i=0; i<nx; i++) {
2116: /* if indices are NOT locally sorted, need to start search at the beginning */
2117: if (lastidx > (idx = inidx[i])) j = 0;
2118: lastidx = idx;
2119: for (; j<size; j++) {
2120: if (idx >= owners[j] && idx < owners[j+1]) {
2121: nprocs[j]++;
2122: owner[i] = j;
2123: #if defined(PETSC_USE_DEBUG)
2124: found = PETSC_TRUE;
2125: #endif
2126: break;
2127: }
2128: }
2129: #if defined(PETSC_USE_DEBUG)
2130: if (!found) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Index %D out of range",idx);
2131: found = PETSC_FALSE;
2132: #endif
2133: }
2134: nsends = 0; for (i=0; i<size; i++) { nsends += (nprocs[i] > 0);}
2136: /* inform other processors of number of messages and max length*/
2137: PetscGatherNumberOfMessages(comm,PETSC_NULL,nprocs,&nrecvs);
2138: PetscGatherMessageLengths(comm,nsends,nrecvs,nprocs,&onodes1,&olengths1);
2139: PetscSortMPIIntWithArray(nrecvs,onodes1,olengths1);
2140: recvtotal = 0; for (i=0; i<nrecvs; i++) recvtotal += olengths1[i];
2142: /* post receives: */
2143: PetscMalloc5(2*recvtotal,PetscInt,&rvalues,2*nx,PetscInt,&svalues,nrecvs,MPI_Request,&recv_waits,nsends,MPI_Request,&send_waits,nsends,MPI_Status,&send_status);
2145: count = 0;
2146: for (i=0; i<nrecvs; i++) {
2147: MPI_Irecv((rvalues+2*count),2*olengths1[i],MPIU_INT,onodes1[i],tag,comm,recv_waits+i);
2148: count += olengths1[i];
2149: }
2150: PetscFree(onodes1);
2152: /* do sends:
2153: 1) starts[i] gives the starting index in svalues for stuff going to
2154: the ith processor
2155: */
2156: starts[0]= 0;
2157: for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
2158: for (i=0; i<nx; i++) {
2159: svalues[2*starts[owner[i]]] = inidx[i];
2160: svalues[1 + 2*starts[owner[i]]++] = inidy[i];
2161: }
2163: starts[0] = 0;
2164: for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
2165: count = 0;
2166: for (i=0; i<size; i++) {
2167: if (nprocs[i]) {
2168: MPI_Isend(svalues+2*starts[i],2*nprocs[i],MPIU_INT,i,tag,comm,send_waits+count);
2169: count++;
2170: }
2171: }
2172: PetscFree3(nprocs,owner,starts);
2174: /* wait on receives */
2175: count = nrecvs;
2176: slen = 0;
2177: while (count) {
2178: MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
2179: /* unpack receives into our local space */
2180: MPI_Get_count(&recv_status,MPIU_INT,&n);
2181: slen += n/2;
2182: count--;
2183: }
2184: if (slen != recvtotal) SETERRQ2(PETSC_ERR_PLIB,"Total message lengths %D not as expected %D",slen,recvtotal);
2185:
2186: PetscMalloc2(slen,PetscInt,&local_inidx,slen,PetscInt,&local_inidy);
2187: base = owners[rank];
2188: count = 0;
2189: rsvalues = rvalues;
2190: for (i=0; i<nrecvs; i++) {
2191: values = rsvalues;
2192: rsvalues += 2*olengths1[i];
2193: for (j=0; j<olengths1[i]; j++) {
2194: local_inidx[count] = values[2*j] - base;
2195: local_inidy[count++] = values[2*j+1];
2196: }
2197: }
2198: PetscFree(olengths1);
2200: /* wait on sends */
2201: if (nsends) {MPI_Waitall(nsends,send_waits,send_status);}
2202: PetscFree5(rvalues,svalues,recv_waits,send_waits,send_status);
2204: /*
2205: should sort and remove duplicates from local_inidx,local_inidy
2206: */
2208: #if defined(do_it_slow)
2209: /* sort on the from index */
2210: PetscSortIntWithArray(slen,local_inidx,local_inidy);
2211: start = 0;
2212: while (start < slen) {
2213: count = start+1;
2214: last = local_inidx[start];
2215: while (count < slen && last == local_inidx[count]) count++;
2216: if (count > start + 1) { /* found 2 or more same local_inidx[] in a row */
2217: /* sort on to index */
2218: PetscSortInt(count-start,local_inidy+start);
2219: }
2220: /* remove duplicates; not most efficient way, but probably good enough */
2221: i = start;
2222: while (i < count-1) {
2223: if (local_inidy[i] != local_inidy[i+1]) {
2224: i++;
2225: } else { /* found a duplicate */
2226: duplicate = PETSC_TRUE;
2227: for (j=i; j<slen-1; j++) {
2228: local_inidx[j] = local_inidx[j+1];
2229: local_inidy[j] = local_inidy[j+1];
2230: }
2231: slen--;
2232: count--;
2233: }
2234: }
2235: start = count;
2236: }
2237: #endif
2238: if (duplicate) {
2239: PetscInfo(ctx,"Duplicate from to indices passed in VecScatterCreate(), they are ignored\n");
2240: }
2241: VecScatterCreate_StoP(slen,local_inidx,slen,local_inidy,xin,yin,1,ctx);
2242: PetscFree2(local_inidx,local_inidy);
2243: return(0);
2244: }