Actual source code: vpscat.c

petsc-3.9.4 2018-09-11
Report Typos and Errors

  2: /*
  3:     Defines parallel vector scatters.
  4: */

  6:  #include <../src/vec/vec/impls/dvecimpl.h>
  7:  #include <../src/vec/vec/impls/mpi/pvecimpl.h>
  8:  #include <petscsf.h>

 10: #if defined(PETSC_HAVE_MPI_WIN_CREATE_FEATURE)

 12: PetscErrorCode VecScatterView_MPI(VecScatter ctx,PetscViewer viewer)
 13: {
 14:   VecScatter_MPI_General *to  =(VecScatter_MPI_General*)ctx->todata;
 15:   VecScatter_MPI_General *from=(VecScatter_MPI_General*)ctx->fromdata;
 16:   PetscErrorCode         ierr;
 17:   PetscInt               i;
 18:   PetscMPIInt            rank;
 19:   PetscViewerFormat      format;
 20:   PetscBool              iascii;

 23:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
 24:   if (iascii) {
 25:     MPI_Comm_rank(PetscObjectComm((PetscObject)ctx),&rank);
 26:     PetscViewerGetFormat(viewer,&format);
 27:     if (format ==  PETSC_VIEWER_ASCII_INFO) {
 28:       PetscInt nsend_max,nrecv_max,lensend_max,lenrecv_max,alldata,itmp;

 30:       MPI_Reduce(&to->n,&nsend_max,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)ctx));
 31:       MPI_Reduce(&from->n,&nrecv_max,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)ctx));
 32:       itmp = to->starts[to->n+1];
 33:       MPI_Reduce(&itmp,&lensend_max,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)ctx));
 34:       itmp = from->starts[from->n+1];
 35:       MPI_Reduce(&itmp,&lenrecv_max,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)ctx));
 36:       MPI_Reduce(&itmp,&alldata,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)ctx));

 38:       PetscViewerASCIIPrintf(viewer,"VecScatter statistics\n");
 39:       PetscViewerASCIIPrintf(viewer,"  Blocksize %D\n",to->bs);
 40:       PetscViewerASCIIPrintf(viewer,"  Maximum number sends %D\n",nsend_max);
 41:       PetscViewerASCIIPrintf(viewer,"  Maximum number receives %D\n",nrecv_max);
 42:       PetscViewerASCIIPrintf(viewer,"  Maximum data sent %D\n",(int)(lensend_max*to->bs*sizeof(PetscScalar)));
 43:       PetscViewerASCIIPrintf(viewer,"  Maximum data received %D\n",(int)(lenrecv_max*to->bs*sizeof(PetscScalar)));
 44:       PetscViewerASCIIPrintf(viewer,"  Total data sent %D\n",(int)(alldata*to->bs*sizeof(PetscScalar)));
 45:     } else {
 46:       PetscViewerASCIIPrintf(viewer,"  VecScatter Blocksize %D\n",to->bs);
 47:       PetscViewerASCIIPushSynchronized(viewer);
 48:       PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number sends = %D; Number to self = %D\n",rank,to->n,to->local.n);
 49:       if (to->n) {
 50:         for (i=0; i<to->n; i++) {
 51:           PetscViewerASCIISynchronizedPrintf(viewer,"[%d]   %D length = %D to whom %d\n",rank,i,to->starts[i+1]-to->starts[i],to->procs[i]);
 52:         }
 53:         PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Now the indices for all remote sends (in order by process sent to)\n",rank);
 54:         for (i=0; i<to->starts[to->n]; i++) {
 55:           PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D \n",rank,to->indices[i]);
 56:         }
 57:       }

 59:       PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number receives = %D; Number from self = %D\n",rank,from->n,from->local.n);
 60:       if (from->n) {
 61:         for (i=0; i<from->n; i++) {
 62:           PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D length %D from whom %d\n",rank,i,from->starts[i+1]-from->starts[i],from->procs[i]);
 63:         }

 65:         PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Now the indices for all remote receives (in order by process received from)\n",rank);
 66:         for (i=0; i<from->starts[from->n]; i++) {
 67:           PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D \n",rank,from->indices[i]);
 68:         }
 69:       }
 70:       if (to->local.n) {
 71:         PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Indices for local part of scatter\n",rank);
 72:         for (i=0; i<to->local.n; i++) {  /* the to and from have the opposite meaning from what you would expect */
 73:           PetscViewerASCIISynchronizedPrintf(viewer,"[%d] From %D to %D \n",rank,to->local.vslots[i],from->local.vslots[i]);
 74:         }
 75:       }

 77:      for (i=0; i<to->msize; i++) {
 78:        if (to->sharedspacestarts[i+1] > to->sharedspacestarts[i]) {
 79:           PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Via shared memory to local memory partner %d count %d\n",rank,i,to->sharedspacestarts[i+1]-to->sharedspacestarts[i]);
 80:         }
 81:       }
 82:      for (i=0; i<from->msize; i++) {
 83:        if (from->sharedspacestarts[i+1] > from->sharedspacestarts[i]) {
 84:           PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Via shared memory from local memory partner %d count %d\n",rank,i,from->sharedspacestarts[i+1]-from->sharedspacestarts[i]);
 85:         }
 86:       }

 88:       PetscViewerFlush(viewer);
 89:       PetscViewerASCIIPopSynchronized(viewer);

 91:       PetscViewerASCIIPrintf(viewer,"Method used to implement the VecScatter: ");
 92: #if defined(PETSC_HAVE_MPI_ALLTOALLW)  && !defined(PETSC_USE_64BIT_INDICES)
 93:       if (to->use_alltoallw) {
 94:         PetscViewerASCIIPrintf(viewer,"Uses MPI_alltoallw if INSERT_MODE\n");
 95:       } else
 96: #endif
 97:       if (ctx->packtogether || to->use_alltoallv || to->use_window) {
 98:         if (to->use_alltoallv) {
 99:           PetscViewerASCIIPrintf(viewer,"Uses MPI MPI_alltoallv\n");
100:         } else if (to->use_window) {
101:           PetscViewerASCIIPrintf(viewer,"Uses MPI window\n");
102:         } else {
103:           PetscViewerASCIIPrintf(viewer,"Packs all messages and then sends them\n");
104:         }
105:       }  else {
106:         PetscViewerASCIIPrintf(viewer,"Packs and sends messages one at a time\n");
107:       }
108:     }
109:   }
110:   return(0);
111: }

113: /* -----------------------------------------------------------------------------------*/
114: /*
115:       The next routine determines what part of  the local part of the scatter is an
116:   exact copy of values into their current location. We check this here and
117:   then know that we need not perform that portion of the scatter when the vector is
118:   scattering to itself with INSERT_VALUES.

120:      This is currently not used but would speed up, for example DMLocalToLocalBegin/End()

122: */
123: PetscErrorCode VecScatterLocalOptimize_Private(VecScatter scatter,VecScatter_Seq_General *to,VecScatter_Seq_General *from)
124: {
125:   PetscInt       n = to->n,n_nonmatching = 0,i,*to_slots = to->vslots,*from_slots = from->vslots;
127:   PetscInt       *nto_slots,*nfrom_slots,j = 0;

130:   for (i=0; i<n; i++) {
131:     if (to_slots[i] != from_slots[i]) n_nonmatching++;
132:   }

134:   if (!n_nonmatching) {
135:     to->nonmatching_computed = PETSC_TRUE;
136:     to->n_nonmatching        = from->n_nonmatching = 0;
137:     PetscInfo1(scatter,"Reduced %D to 0\n", n);
138:   } else if (n_nonmatching == n) {
139:     to->nonmatching_computed = PETSC_FALSE;
140:     PetscInfo(scatter,"All values non-matching\n");
141:   } else {
142:     to->nonmatching_computed= PETSC_TRUE;
143:     to->n_nonmatching       = from->n_nonmatching = n_nonmatching;

145:     PetscMalloc1(n_nonmatching,&nto_slots);
146:     PetscMalloc1(n_nonmatching,&nfrom_slots);

148:     to->slots_nonmatching   = nto_slots;
149:     from->slots_nonmatching = nfrom_slots;
150:     for (i=0; i<n; i++) {
151:       if (to_slots[i] != from_slots[i]) {
152:         nto_slots[j]   = to_slots[i];
153:         nfrom_slots[j] = from_slots[i];
154:         j++;
155:       }
156:     }
157:     PetscInfo2(scatter,"Reduced %D to %D\n",n,n_nonmatching);
158:   }
159:   return(0);
160: }

162: /* --------------------------------------------------------------------------------------*/

164: /* -------------------------------------------------------------------------------------*/
165: PetscErrorCode VecScatterDestroy_PtoP_MPI3(VecScatter ctx)
166: {
167:   VecScatter_MPI_General *to   = (VecScatter_MPI_General*)ctx->todata;
168:   VecScatter_MPI_General *from = (VecScatter_MPI_General*)ctx->fromdata;
169:   PetscErrorCode         ierr;
170:   PetscInt               i;

173:   if (to->use_readyreceiver) {
174:     /*
175:        Since we have already posted sends we must cancel them before freeing
176:        the requests
177:     */
178:     for (i=0; i<from->n; i++) {
179:       MPI_Cancel(from->requests+i);
180:     }
181:     for (i=0; i<to->n; i++) {
182:       MPI_Cancel(to->rev_requests+i);
183:     }
184:     MPI_Waitall(from->n,from->requests,to->rstatus);
185:     MPI_Waitall(to->n,to->rev_requests,to->rstatus);
186:   }

188: #if defined(PETSC_HAVE_MPI_ALLTOALLW) && !defined(PETSC_USE_64BIT_INDICES)
189:   if (to->use_alltoallw) {
190:     for (i=0; i<to->n; i++) {
191:       MPI_Type_free(to->types+to->procs[i]);
192:     }
193:     PetscFree3(to->wcounts,to->wdispls,to->types);
194:     if (!from->contiq) {
195:       for (i=0; i<from->n; i++) {
196:         MPI_Type_free(from->types+from->procs[i]);
197:       }
198:     }
199:     PetscFree3(from->wcounts,from->wdispls,from->types);
200:   }
201: #endif

203:   if (to->use_window) {
204:     MPI_Win_free(&from->window);
205:     MPI_Win_free(&to->window);
206:     PetscFree(from->winstarts);
207:     PetscFree(to->winstarts);
208:   }

210:   if (to->use_alltoallv) {
211:     PetscFree2(to->counts,to->displs);
212:     PetscFree2(from->counts,from->displs);
213:   }

215:   /* release MPI resources obtained with MPI_Send_init() and MPI_Recv_init() */
216:   /*
217:      IBM's PE version of MPI has a bug where freeing these guys will screw up later
218:      message passing.
219:   */
220: #if !defined(PETSC_HAVE_BROKEN_REQUEST_FREE)
221:   if (!to->use_alltoallv && !to->use_window) {   /* currently the to->requests etc are ALWAYS allocated even if not used */
222:     if (to->requests) {
223:       for (i=0; i<to->n; i++) {
224:         MPI_Request_free(to->requests + i);
225:       }
226:     }
227:     if (to->rev_requests) {
228:       for (i=0; i<to->n; i++) {
229:         MPI_Request_free(to->rev_requests + i);
230:       }
231:     }
232:   }
233:   /*
234:       MPICH could not properly cancel requests thus with ready receiver mode we
235:     cannot free the requests. It may be fixed now, if not then put the following
236:     code inside a if (!to->use_readyreceiver) {
237:   */
238:   if (!to->use_alltoallv && !to->use_window) {    /* currently the from->requests etc are ALWAYS allocated even if not used */
239:     if (from->requests) {
240:       for (i=0; i<from->n; i++) {
241:         MPI_Request_free(from->requests + i);
242:       }
243:     }

245:     if (from->rev_requests) {
246:       for (i=0; i<from->n; i++) {
247:         MPI_Request_free(from->rev_requests + i);
248:       }
249:     }
250:   }
251: #endif
252:   if (to->sharedwin != MPI_WIN_NULL) {MPI_Win_free(&to->sharedwin);}
253:   if (from->sharedwin != MPI_WIN_NULL) {MPI_Win_free(&from->sharedwin);}
254:   PetscFree(to->sharedspaces);
255:   PetscFree(to->sharedspacesoffset);
256:   PetscFree(to->sharedspaceindices);
257:   PetscFree(to->sharedspacestarts);

259:   PetscFree(from->sharedspaceindices);
260:   PetscFree(from->sharedspaces);
261:   PetscFree(from->sharedspacesoffset);
262:   PetscFree(from->sharedspacestarts);

264:   PetscFree(to->local.vslots);
265:   PetscFree(from->local.vslots);
266:   PetscFree2(to->counts,to->displs);
267:   PetscFree2(from->counts,from->displs);
268:   PetscFree(to->local.slots_nonmatching);
269:   PetscFree(from->local.slots_nonmatching);
270:   PetscFree(to->rev_requests);
271:   PetscFree(from->rev_requests);
272:   PetscFree(to->requests);
273:   PetscFree(from->requests);
274:   PetscFree4(to->values,to->indices,to->starts,to->procs);
275:   PetscFree2(to->sstatus,to->rstatus);
276:   PetscFree4(from->values,from->indices,from->starts,from->procs);
277:   PetscFree(from);
278:   PetscFree(to);
279:   return(0);
280: }

282: /* --------------------------------------------------------------------------------------*/
283: /*
284:     Special optimization to see if the local part of the scatter is actually
285:     a copy. The scatter routines call PetscMemcpy() instead.

287: */
288: PetscErrorCode VecScatterLocalOptimizeCopy_Private(VecScatter scatter,VecScatter_Seq_General *to,VecScatter_Seq_General *from,PetscInt bs)
289: {
290:   PetscInt       n = to->n,i,*to_slots = to->vslots,*from_slots = from->vslots;
291:   PetscInt       to_start,from_start;

295:   to_start   = to_slots[0];
296:   from_start = from_slots[0];

298:   for (i=1; i<n; i++) {
299:     to_start   += bs;
300:     from_start += bs;
301:     if (to_slots[i]   != to_start)   return(0);
302:     if (from_slots[i] != from_start) return(0);
303:   }
304:   to->is_copy       = PETSC_TRUE;
305:   to->copy_start    = to_slots[0];
306:   to->copy_length   = bs*sizeof(PetscScalar)*n;
307:   from->is_copy     = PETSC_TRUE;
308:   from->copy_start  = from_slots[0];
309:   from->copy_length = bs*sizeof(PetscScalar)*n;
310:   PetscInfo(scatter,"Local scatter is a copy, optimizing for it\n");
311:   return(0);
312: }

314: /* --------------------------------------------------------------------------------------*/

316: PetscErrorCode VecScatterCopy_PtoP_X(VecScatter in,VecScatter out)
317: {
318:   VecScatter_MPI_General *in_to   = (VecScatter_MPI_General*)in->todata;
319:   VecScatter_MPI_General *in_from = (VecScatter_MPI_General*)in->fromdata,*out_to,*out_from;
320:   PetscErrorCode         ierr;
321:   PetscInt               ny,bs = in_from->bs,jj;
322:   PetscCommShared        scomm;
323:   MPI_Comm               mscomm;
324:   MPI_Info               info;

327:   out->ops->begin   = in->ops->begin;
328:   out->ops->end     = in->ops->end;
329:   out->ops->copy    = in->ops->copy;
330:   out->ops->destroy = in->ops->destroy;
331:   out->ops->view    = in->ops->view;

333:   /* allocate entire send scatter context */
334:   PetscNewLog(out,&out_to);
335:   out_to->sharedwin       = MPI_WIN_NULL;
336:   PetscNewLog(out,&out_from);
337:   out_from->sharedwin       = MPI_WIN_NULL;

339:   ny                = in_to->starts[in_to->n];
340:   out_to->n         = in_to->n;
341:   out_to->format    = in_to->format;
342:   out_to->sendfirst = in_to->sendfirst;

344:   PetscMalloc1(out_to->n,&out_to->requests);
345:   PetscMalloc4(bs*ny,&out_to->values,ny,&out_to->indices,out_to->n+1,&out_to->starts,out_to->n,&out_to->procs);
346:   PetscMalloc2(PetscMax(in_to->n,in_from->n),&out_to->sstatus,PetscMax(in_to->n,in_from->n),&out_to->rstatus);
347:   PetscMemcpy(out_to->indices,in_to->indices,ny*sizeof(PetscInt));
348:   PetscMemcpy(out_to->starts,in_to->starts,(out_to->n+1)*sizeof(PetscInt));
349:   PetscMemcpy(out_to->procs,in_to->procs,(out_to->n)*sizeof(PetscMPIInt));

351:   out->todata                        = (void*)out_to;
352:   out_to->local.n                    = in_to->local.n;
353:   out_to->local.nonmatching_computed = PETSC_FALSE;
354:   out_to->local.n_nonmatching        = 0;
355:   out_to->local.slots_nonmatching    = 0;
356:   if (in_to->local.n) {
357:     PetscMalloc1(in_to->local.n,&out_to->local.vslots);
358:     PetscMalloc1(in_from->local.n,&out_from->local.vslots);
359:     PetscMemcpy(out_to->local.vslots,in_to->local.vslots,in_to->local.n*sizeof(PetscInt));
360:     PetscMemcpy(out_from->local.vslots,in_from->local.vslots,in_from->local.n*sizeof(PetscInt));
361:   } else {
362:     out_to->local.vslots   = 0;
363:     out_from->local.vslots = 0;
364:   }

366:   /* allocate entire receive context */
367:   out_from->format    = in_from->format;
368:   ny                  = in_from->starts[in_from->n];
369:   out_from->n         = in_from->n;
370:   out_from->sendfirst = in_from->sendfirst;

372:   PetscMalloc1(out_from->n,&out_from->requests);
373:   PetscMalloc4(ny*bs,&out_from->values,ny,&out_from->indices,out_from->n+1,&out_from->starts,out_from->n,&out_from->procs);
374:   PetscMemcpy(out_from->indices,in_from->indices,ny*sizeof(PetscInt));
375:   PetscMemcpy(out_from->starts,in_from->starts,(out_from->n+1)*sizeof(PetscInt));
376:   PetscMemcpy(out_from->procs,in_from->procs,(out_from->n)*sizeof(PetscMPIInt));

378:   out->fromdata                        = (void*)out_from;
379:   out_from->local.n                    = in_from->local.n;
380:   out_from->local.nonmatching_computed = PETSC_FALSE;
381:   out_from->local.n_nonmatching        = 0;
382:   out_from->local.slots_nonmatching    = 0;

384:   /*
385:       set up the request arrays for use with isend_init() and irecv_init()
386:   */
387:   {
388:     PetscMPIInt tag;
389:     MPI_Comm    comm;
390:     PetscInt    *sstarts = out_to->starts,  *rstarts = out_from->starts;
391:     PetscMPIInt *sprocs  = out_to->procs,   *rprocs  = out_from->procs;
392:     PetscInt    i;
393:     PetscBool   flg;
394:     MPI_Request *swaits   = out_to->requests,*rwaits  = out_from->requests;
395:     MPI_Request *rev_swaits,*rev_rwaits;
396:     PetscScalar *Ssvalues = out_to->values, *Srvalues = out_from->values;

398:     PetscMalloc1(in_to->n,&out_to->rev_requests);
399:     PetscMalloc1(in_from->n,&out_from->rev_requests);

401:     rev_rwaits = out_to->rev_requests;
402:     rev_swaits = out_from->rev_requests;

404:     out_from->bs = out_to->bs = bs;
405:     tag          = ((PetscObject)out)->tag;
406:     PetscObjectGetComm((PetscObject)out,&comm);

408:     /* Register the receives that you will use later (sends for scatter reverse) */
409:     for (i=0; i<out_from->n; i++) {
410:       MPI_Recv_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);
411:       MPI_Send_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tag,comm,rev_swaits+i);
412:     }

414:     flg  = PETSC_FALSE;
415:     PetscOptionsGetBool(NULL,NULL,"-vecscatter_rsend",&flg,NULL);
416:     if (flg) {
417:       out_to->use_readyreceiver   = PETSC_TRUE;
418:       out_from->use_readyreceiver = PETSC_TRUE;
419:       for (i=0; i<out_to->n; i++) {
420:         MPI_Rsend_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
421:       }
422:       if (out_from->n) {MPI_Startall_irecv(out_from->starts[out_from->n]*out_from->bs,out_from->n,out_from->requests);}
423:       MPI_Barrier(comm);
424:       PetscInfo(in,"Using VecScatter ready receiver mode\n");
425:     } else {
426:       out_to->use_readyreceiver   = PETSC_FALSE;
427:       out_from->use_readyreceiver = PETSC_FALSE;
428:       flg                         = PETSC_FALSE;
429:       PetscOptionsGetBool(NULL,NULL,"-vecscatter_ssend",&flg,NULL);
430:       if (flg) {
431:         PetscInfo(in,"Using VecScatter Ssend mode\n");
432:       }
433:       for (i=0; i<out_to->n; i++) {
434:         if (!flg) {
435:           MPI_Send_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
436:         } else {
437:           MPI_Ssend_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
438:         }
439:       }
440:     }
441:     /* Register receives for scatter reverse */
442:     for (i=0; i<out_to->n; i++) {
443:       MPI_Recv_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,rev_rwaits+i);
444:     }
445:   }

447:     /* since the to and from data structures are not symmetric for shared memory copies we insure they always listed in "standard" form */
448:   if (!in_to->sharedwin) {
449:     VecScatter_MPI_General *totmp = in_to,*out_totmp = out_to;
450:     in_to       = in_from;
451:     in_from     = totmp;
452:     out_to      = out_from;
453:     out_from    = out_totmp;
454:   }

456:   /* copy the to parts for the shared memory copies between processes */
457:   out_to->sharedcnt = in_to->sharedcnt;
458:   out_to->msize     = in_to->msize;
459:   PetscMalloc1(out_to->msize+1,&out_to->sharedspacestarts);
460:   PetscMemcpy(out_to->sharedspacestarts,in_to->sharedspacestarts,(out_to->msize+1)*sizeof(PetscInt));
461:   PetscMalloc1(out_to->sharedcnt,&out_to->sharedspaceindices);
462:   PetscMemcpy(out_to->sharedspaceindices,in_to->sharedspaceindices,out_to->sharedcnt*sizeof(PetscInt));

464:   PetscCommSharedGet(PetscObjectComm((PetscObject)in),&scomm);
465:   PetscCommSharedGetComm(scomm,&mscomm);
466:   MPI_Info_create(&info);
467:   MPI_Info_set(info, "alloc_shared_noncontig", "true");
468:   MPIU_Win_allocate_shared(bs*out_to->sharedcnt*sizeof(PetscScalar),sizeof(PetscScalar),info,mscomm,&out_to->sharedspace,&out_to->sharedwin);
469:   MPI_Info_free(&info);

471:   /* copy the to parts for the shared memory copies between processes */
472:   out_from->sharedcnt = in_from->sharedcnt;
473:   out_from->msize     = in_from->msize;
474:   PetscMalloc1(out_from->msize+1,&out_from->sharedspacestarts);
475:   PetscMemcpy(out_from->sharedspacestarts,in_from->sharedspacestarts,(out_from->msize+1)*sizeof(PetscInt));
476:   PetscMalloc1(out_from->sharedcnt,&out_from->sharedspaceindices);
477:   PetscMemcpy(out_from->sharedspaceindices,in_from->sharedspaceindices,out_from->sharedcnt*sizeof(PetscInt));
478:   PetscMalloc1(out_from->msize,&out_from->sharedspacesoffset);
479:   PetscMemcpy(out_from->sharedspacesoffset,in_from->sharedspacesoffset,out_from->msize*sizeof(PetscInt));
480:   PetscCalloc1(out_from->msize,&out_from->sharedspaces);
481:   for (jj=0; jj<out_from->msize; jj++) {
482:     MPI_Aint    isize;
483:     PetscMPIInt disp_unit;
484:     MPIU_Win_shared_query(out_to->sharedwin,jj,&isize,&disp_unit,&out_from->sharedspaces[jj]);
485:   }
486:   return(0);
487: }

489: PetscErrorCode VecScatterCopy_PtoP_AllToAll(VecScatter in,VecScatter out)
490: {
491:   VecScatter_MPI_General *in_to   = (VecScatter_MPI_General*)in->todata;
492:   VecScatter_MPI_General *in_from = (VecScatter_MPI_General*)in->fromdata,*out_to,*out_from;
493:   PetscErrorCode         ierr;
494:   PetscInt               ny,bs = in_from->bs;
495:   PetscMPIInt            size;

498:   MPI_Comm_size(PetscObjectComm((PetscObject)in),&size);

500:   out->ops->begin     = in->ops->begin;
501:   out->ops->end       = in->ops->end;
502:   out->ops->copy      = in->ops->copy;
503:   out->ops->destroy   = in->ops->destroy;
504:   out->ops->view      = in->ops->view;

506:   /* allocate entire send scatter context */
507:   PetscNewLog(out,&out_to);
508:   out_to->sharedwin       = MPI_WIN_NULL;
509:   PetscNewLog(out,&out_from);
510:   out_from->sharedwin       = MPI_WIN_NULL;

512:   ny                = in_to->starts[in_to->n];
513:   out_to->n         = in_to->n;
514:   out_to->format    = in_to->format;
515:   out_to->sendfirst = in_to->sendfirst;

517:   PetscMalloc1(out_to->n,&out_to->requests);
518:   PetscMalloc4(bs*ny,&out_to->values,ny,&out_to->indices,out_to->n+1,&out_to->starts,out_to->n,&out_to->procs);
519:   PetscMalloc2(PetscMax(in_to->n,in_from->n),&out_to->sstatus,PetscMax(in_to->n,in_from->n),&out_to->rstatus);
520:   PetscMemcpy(out_to->indices,in_to->indices,ny*sizeof(PetscInt));
521:   PetscMemcpy(out_to->starts,in_to->starts,(out_to->n+1)*sizeof(PetscInt));
522:   PetscMemcpy(out_to->procs,in_to->procs,(out_to->n)*sizeof(PetscMPIInt));

524:   out->todata                        = (void*)out_to;
525:   out_to->local.n                    = in_to->local.n;
526:   out_to->local.nonmatching_computed = PETSC_FALSE;
527:   out_to->local.n_nonmatching        = 0;
528:   out_to->local.slots_nonmatching    = 0;
529:   if (in_to->local.n) {
530:     PetscMalloc1(in_to->local.n,&out_to->local.vslots);
531:     PetscMalloc1(in_from->local.n,&out_from->local.vslots);
532:     PetscMemcpy(out_to->local.vslots,in_to->local.vslots,in_to->local.n*sizeof(PetscInt));
533:     PetscMemcpy(out_from->local.vslots,in_from->local.vslots,in_from->local.n*sizeof(PetscInt));
534:   } else {
535:     out_to->local.vslots   = 0;
536:     out_from->local.vslots = 0;
537:   }

539:   /* allocate entire receive context */
540:   out_from->format    = in_from->format;
541:   ny                  = in_from->starts[in_from->n];
542:   out_from->n         = in_from->n;
543:   out_from->sendfirst = in_from->sendfirst;

545:   PetscMalloc1(out_from->n,&out_from->requests);
546:   PetscMalloc4(ny*bs,&out_from->values,ny,&out_from->indices,out_from->n+1,&out_from->starts,out_from->n,&out_from->procs);
547:   PetscMemcpy(out_from->indices,in_from->indices,ny*sizeof(PetscInt));
548:   PetscMemcpy(out_from->starts,in_from->starts,(out_from->n+1)*sizeof(PetscInt));
549:   PetscMemcpy(out_from->procs,in_from->procs,(out_from->n)*sizeof(PetscMPIInt));

551:   out->fromdata                        = (void*)out_from;
552:   out_from->local.n                    = in_from->local.n;
553:   out_from->local.nonmatching_computed = PETSC_FALSE;
554:   out_from->local.n_nonmatching        = 0;
555:   out_from->local.slots_nonmatching    = 0;

557:   out_to->use_alltoallv = out_from->use_alltoallv = PETSC_TRUE;

559:   PetscMalloc2(size,&out_to->counts,size,&out_to->displs);
560:   PetscMemcpy(out_to->counts,in_to->counts,size*sizeof(PetscMPIInt));
561:   PetscMemcpy(out_to->displs,in_to->displs,size*sizeof(PetscMPIInt));

563:   PetscMalloc2(size,&out_from->counts,size,&out_from->displs);
564:   PetscMemcpy(out_from->counts,in_from->counts,size*sizeof(PetscMPIInt));
565:   PetscMemcpy(out_from->displs,in_from->displs,size*sizeof(PetscMPIInt));
566:   return(0);
567: }
568: /* --------------------------------------------------------------------------------------------------
569:     Packs and unpacks the message data into send or from receive buffers.

571:     These could be generated automatically.

573:     Fortran kernels etc. could be used.
574: */
575: PETSC_STATIC_INLINE void Pack_1(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
576: {
577:   PetscInt i;
578:   for (i=0; i<n; i++) y[i] = x[indicesx[i]];
579: }

581: PETSC_STATIC_INLINE PetscErrorCode UnPack_1(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
582: {
583:   PetscInt i;

586:   switch (addv) {
587:   case INSERT_VALUES:
588:   case INSERT_ALL_VALUES:
589:     for (i=0; i<n; i++) y[indicesy[i]] = x[i];
590:     break;
591:   case ADD_VALUES:
592:   case ADD_ALL_VALUES:
593:     for (i=0; i<n; i++) y[indicesy[i]] += x[i];
594:     break;
595: #if !defined(PETSC_USE_COMPLEX)
596:   case MAX_VALUES:
597:     for (i=0; i<n; i++) y[indicesy[i]] = PetscMax(y[indicesy[i]],x[i]);
598: #else
599:   case MAX_VALUES:
600: #endif
601:   case NOT_SET_VALUES:
602:     break;
603:   default:
604:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
605:   }
606:   return(0);
607: }

609: PETSC_STATIC_INLINE PetscErrorCode Scatter_1(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
610: {
611:   PetscInt i;

614:   switch (addv) {
615:   case INSERT_VALUES:
616:   case INSERT_ALL_VALUES:
617:     for (i=0; i<n; i++) y[indicesy[i]] = x[indicesx[i]];
618:     break;
619:   case ADD_VALUES:
620:   case ADD_ALL_VALUES:
621:     for (i=0; i<n; i++) y[indicesy[i]] += x[indicesx[i]];
622:     break;
623: #if !defined(PETSC_USE_COMPLEX)
624:   case MAX_VALUES:
625:     for (i=0; i<n; i++) y[indicesy[i]] = PetscMax(y[indicesy[i]],x[indicesx[i]]);
626: #else
627:   case MAX_VALUES:
628: #endif
629:   case NOT_SET_VALUES:
630:     break;
631:   default:
632:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
633:   }
634:   return(0);
635: }

637: /* ----------------------------------------------------------------------------------------------- */
638: PETSC_STATIC_INLINE void Pack_2(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
639: {
640:   PetscInt i,idx;

642:   for (i=0; i<n; i++) {
643:     idx  = *indicesx++;
644:     y[0] = x[idx];
645:     y[1] = x[idx+1];
646:     y   += 2;
647:   }
648: }

650: PETSC_STATIC_INLINE PetscErrorCode UnPack_2(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
651: {
652:   PetscInt i,idy;

655:   switch (addv) {
656:   case INSERT_VALUES:
657:   case INSERT_ALL_VALUES:
658:     for (i=0; i<n; i++) {
659:       idy      = *indicesy++;
660:       y[idy]   = x[0];
661:       y[idy+1] = x[1];
662:       x       += 2;
663:     }
664:     break;
665:   case ADD_VALUES:
666:   case ADD_ALL_VALUES:
667:     for (i=0; i<n; i++) {
668:       idy       = *indicesy++;
669:       y[idy]   += x[0];
670:       y[idy+1] += x[1];
671:       x        += 2;
672:     }
673:     break;
674: #if !defined(PETSC_USE_COMPLEX)
675:   case MAX_VALUES:
676:     for (i=0; i<n; i++) {
677:       idy      = *indicesy++;
678:       y[idy]   = PetscMax(y[idy],x[0]);
679:       y[idy+1] = PetscMax(y[idy+1],x[1]);
680:       x       += 2;
681:     }
682: #else
683:   case MAX_VALUES:
684: #endif
685:   case NOT_SET_VALUES:
686:     break;
687:   default:
688:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
689:   }
690:   return(0);
691: }

693: PETSC_STATIC_INLINE PetscErrorCode Scatter_2(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
694: {
695:   PetscInt i,idx,idy;

698:   switch (addv) {
699:   case INSERT_VALUES:
700:   case INSERT_ALL_VALUES:
701:     for (i=0; i<n; i++) {
702:       idx      = *indicesx++;
703:       idy      = *indicesy++;
704:       y[idy]   = x[idx];
705:       y[idy+1] = x[idx+1];
706:     }
707:     break;
708:   case ADD_VALUES:
709:   case ADD_ALL_VALUES:
710:     for (i=0; i<n; i++) {
711:       idx       = *indicesx++;
712:       idy       = *indicesy++;
713:       y[idy]   += x[idx];
714:       y[idy+1] += x[idx+1];
715:     }
716:     break;
717: #if !defined(PETSC_USE_COMPLEX)
718:   case MAX_VALUES:
719:     for (i=0; i<n; i++) {
720:       idx      = *indicesx++;
721:       idy      = *indicesy++;
722:       y[idy]   = PetscMax(y[idy],x[idx]);
723:       y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
724:     }
725: #else
726:   case MAX_VALUES:
727: #endif
728:   case NOT_SET_VALUES:
729:     break;
730:   default:
731:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
732:   }
733:   return(0);
734: }
735: /* ----------------------------------------------------------------------------------------------- */
736: PETSC_STATIC_INLINE void Pack_3(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
737: {
738:   PetscInt i,idx;

740:   for (i=0; i<n; i++) {
741:     idx  = *indicesx++;
742:     y[0] = x[idx];
743:     y[1] = x[idx+1];
744:     y[2] = x[idx+2];
745:     y   += 3;
746:   }
747: }
748: PETSC_STATIC_INLINE PetscErrorCode UnPack_3(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
749: {
750:   PetscInt i,idy;

753:   switch (addv) {
754:   case INSERT_VALUES:
755:   case INSERT_ALL_VALUES:
756:     for (i=0; i<n; i++) {
757:       idy      = *indicesy++;
758:       y[idy]   = x[0];
759:       y[idy+1] = x[1];
760:       y[idy+2] = x[2];
761:       x       += 3;
762:     }
763:     break;
764:   case ADD_VALUES:
765:   case ADD_ALL_VALUES:
766:     for (i=0; i<n; i++) {
767:       idy       = *indicesy++;
768:       y[idy]   += x[0];
769:       y[idy+1] += x[1];
770:       y[idy+2] += x[2];
771:       x        += 3;
772:     }
773:     break;
774: #if !defined(PETSC_USE_COMPLEX)
775:   case MAX_VALUES:
776:     for (i=0; i<n; i++) {
777:       idy      = *indicesy++;
778:       y[idy]   = PetscMax(y[idy],x[0]);
779:       y[idy+1] = PetscMax(y[idy+1],x[1]);
780:       y[idy+2] = PetscMax(y[idy+2],x[2]);
781:       x       += 3;
782:     }
783: #else
784:   case MAX_VALUES:
785: #endif
786:   case NOT_SET_VALUES:
787:     break;
788:   default:
789:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
790:   }
791:   return(0);
792: }

794: PETSC_STATIC_INLINE PetscErrorCode Scatter_3(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
795: {
796:   PetscInt i,idx,idy;

799:   switch (addv) {
800:   case INSERT_VALUES:
801:   case INSERT_ALL_VALUES:
802:     for (i=0; i<n; i++) {
803:       idx      = *indicesx++;
804:       idy      = *indicesy++;
805:       y[idy]   = x[idx];
806:       y[idy+1] = x[idx+1];
807:       y[idy+2] = x[idx+2];
808:     }
809:     break;
810:   case ADD_VALUES:
811:   case ADD_ALL_VALUES:
812:     for (i=0; i<n; i++) {
813:       idx       = *indicesx++;
814:       idy       = *indicesy++;
815:       y[idy]   += x[idx];
816:       y[idy+1] += x[idx+1];
817:       y[idy+2] += x[idx+2];
818:     }
819:     break;
820: #if !defined(PETSC_USE_COMPLEX)
821:   case MAX_VALUES:
822:     for (i=0; i<n; i++) {
823:       idx      = *indicesx++;
824:       idy      = *indicesy++;
825:       y[idy]   = PetscMax(y[idy],x[idx]);
826:       y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
827:       y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
828:     }
829: #else
830:   case MAX_VALUES:
831: #endif
832:   case NOT_SET_VALUES:
833:     break;
834:   default:
835:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
836:   }
837:   return(0);
838: }
839: /* ----------------------------------------------------------------------------------------------- */
840: PETSC_STATIC_INLINE void Pack_4(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
841: {
842:   PetscInt i,idx;

844:   for (i=0; i<n; i++) {
845:     idx  = *indicesx++;
846:     y[0] = x[idx];
847:     y[1] = x[idx+1];
848:     y[2] = x[idx+2];
849:     y[3] = x[idx+3];
850:     y   += 4;
851:   }
852: }
853: PETSC_STATIC_INLINE PetscErrorCode UnPack_4(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
854: {
855:   PetscInt i,idy;

858:   switch (addv) {
859:   case INSERT_VALUES:
860:   case INSERT_ALL_VALUES:
861:     for (i=0; i<n; i++) {
862:       idy      = *indicesy++;
863:       y[idy]   = x[0];
864:       y[idy+1] = x[1];
865:       y[idy+2] = x[2];
866:       y[idy+3] = x[3];
867:       x       += 4;
868:     }
869:     break;
870:   case ADD_VALUES:
871:   case ADD_ALL_VALUES:
872:     for (i=0; i<n; i++) {
873:       idy       = *indicesy++;
874:       y[idy]   += x[0];
875:       y[idy+1] += x[1];
876:       y[idy+2] += x[2];
877:       y[idy+3] += x[3];
878:       x        += 4;
879:     }
880:     break;
881: #if !defined(PETSC_USE_COMPLEX)
882:   case MAX_VALUES:
883:     for (i=0; i<n; i++) {
884:       idy      = *indicesy++;
885:       y[idy]   = PetscMax(y[idy],x[0]);
886:       y[idy+1] = PetscMax(y[idy+1],x[1]);
887:       y[idy+2] = PetscMax(y[idy+2],x[2]);
888:       y[idy+3] = PetscMax(y[idy+3],x[3]);
889:       x       += 4;
890:     }
891: #else
892:   case MAX_VALUES:
893: #endif
894:   case NOT_SET_VALUES:
895:     break;
896:   default:
897:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
898:   }
899:   return(0);
900: }

902: PETSC_STATIC_INLINE PetscErrorCode Scatter_4(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
903: {
904:   PetscInt i,idx,idy;

907:   switch (addv) {
908:   case INSERT_VALUES:
909:   case INSERT_ALL_VALUES:
910:     for (i=0; i<n; i++) {
911:       idx      = *indicesx++;
912:       idy      = *indicesy++;
913:       y[idy]   = x[idx];
914:       y[idy+1] = x[idx+1];
915:       y[idy+2] = x[idx+2];
916:       y[idy+3] = x[idx+3];
917:     }
918:     break;
919:   case ADD_VALUES:
920:   case ADD_ALL_VALUES:
921:     for (i=0; i<n; i++) {
922:       idx       = *indicesx++;
923:       idy       = *indicesy++;
924:       y[idy]   += x[idx];
925:       y[idy+1] += x[idx+1];
926:       y[idy+2] += x[idx+2];
927:       y[idy+3] += x[idx+3];
928:     }
929:     break;
930: #if !defined(PETSC_USE_COMPLEX)
931:   case MAX_VALUES:
932:     for (i=0; i<n; i++) {
933:       idx      = *indicesx++;
934:       idy      = *indicesy++;
935:       y[idy]   = PetscMax(y[idy],x[idx]);
936:       y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
937:       y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
938:       y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
939:     }
940: #else
941:   case MAX_VALUES:
942: #endif
943:   case NOT_SET_VALUES:
944:     break;
945:   default:
946:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
947:   }
948:   return(0);
949: }
950: /* ----------------------------------------------------------------------------------------------- */
951: PETSC_STATIC_INLINE void Pack_5(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
952: {
953:   PetscInt i,idx;

955:   for (i=0; i<n; i++) {
956:     idx  = *indicesx++;
957:     y[0] = x[idx];
958:     y[1] = x[idx+1];
959:     y[2] = x[idx+2];
960:     y[3] = x[idx+3];
961:     y[4] = x[idx+4];
962:     y   += 5;
963:   }
964: }

966: PETSC_STATIC_INLINE PetscErrorCode UnPack_5(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
967: {
968:   PetscInt i,idy;

971:   switch (addv) {
972:   case INSERT_VALUES:
973:   case INSERT_ALL_VALUES:
974:     for (i=0; i<n; i++) {
975:       idy      = *indicesy++;
976:       y[idy]   = x[0];
977:       y[idy+1] = x[1];
978:       y[idy+2] = x[2];
979:       y[idy+3] = x[3];
980:       y[idy+4] = x[4];
981:       x       += 5;
982:     }
983:     break;
984:   case ADD_VALUES:
985:   case ADD_ALL_VALUES:
986:     for (i=0; i<n; i++) {
987:       idy       = *indicesy++;
988:       y[idy]   += x[0];
989:       y[idy+1] += x[1];
990:       y[idy+2] += x[2];
991:       y[idy+3] += x[3];
992:       y[idy+4] += x[4];
993:       x        += 5;
994:     }
995:     break;
996: #if !defined(PETSC_USE_COMPLEX)
997:   case MAX_VALUES:
998:     for (i=0; i<n; i++) {
999:       idy      = *indicesy++;
1000:       y[idy]   = PetscMax(y[idy],x[0]);
1001:       y[idy+1] = PetscMax(y[idy+1],x[1]);
1002:       y[idy+2] = PetscMax(y[idy+2],x[2]);
1003:       y[idy+3] = PetscMax(y[idy+3],x[3]);
1004:       y[idy+4] = PetscMax(y[idy+4],x[4]);
1005:       x       += 5;
1006:     }
1007: #else
1008:   case MAX_VALUES:
1009: #endif
1010:   case NOT_SET_VALUES:
1011:     break;
1012:   default:
1013:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1014:   }
1015:   return(0);
1016: }

1018: PETSC_STATIC_INLINE PetscErrorCode Scatter_5(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1019: {
1020:   PetscInt i,idx,idy;

1023:   switch (addv) {
1024:   case INSERT_VALUES:
1025:   case INSERT_ALL_VALUES:
1026:     for (i=0; i<n; i++) {
1027:       idx      = *indicesx++;
1028:       idy      = *indicesy++;
1029:       y[idy]   = x[idx];
1030:       y[idy+1] = x[idx+1];
1031:       y[idy+2] = x[idx+2];
1032:       y[idy+3] = x[idx+3];
1033:       y[idy+4] = x[idx+4];
1034:     }
1035:     break;
1036:   case ADD_VALUES:
1037:   case ADD_ALL_VALUES:
1038:     for (i=0; i<n; i++) {
1039:       idx       = *indicesx++;
1040:       idy       = *indicesy++;
1041:       y[idy]   += x[idx];
1042:       y[idy+1] += x[idx+1];
1043:       y[idy+2] += x[idx+2];
1044:       y[idy+3] += x[idx+3];
1045:       y[idy+4] += x[idx+4];
1046:     }
1047:     break;
1048: #if !defined(PETSC_USE_COMPLEX)
1049:   case MAX_VALUES:
1050:     for (i=0; i<n; i++) {
1051:       idx      = *indicesx++;
1052:       idy      = *indicesy++;
1053:       y[idy]   = PetscMax(y[idy],x[idx]);
1054:       y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
1055:       y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
1056:       y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
1057:       y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1058:     }
1059: #else
1060:   case MAX_VALUES:
1061: #endif
1062:   case NOT_SET_VALUES:
1063:     break;
1064:   default:
1065:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1066:   }
1067:   return(0);
1068: }
1069: /* ----------------------------------------------------------------------------------------------- */
1070: PETSC_STATIC_INLINE void Pack_6(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1071: {
1072:   PetscInt i,idx;

1074:   for (i=0; i<n; i++) {
1075:     idx  = *indicesx++;
1076:     y[0] = x[idx];
1077:     y[1] = x[idx+1];
1078:     y[2] = x[idx+2];
1079:     y[3] = x[idx+3];
1080:     y[4] = x[idx+4];
1081:     y[5] = x[idx+5];
1082:     y   += 6;
1083:   }
1084: }

1086: PETSC_STATIC_INLINE PetscErrorCode UnPack_6(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1087: {
1088:   PetscInt i,idy;

1091:   switch (addv) {
1092:   case INSERT_VALUES:
1093:   case INSERT_ALL_VALUES:
1094:     for (i=0; i<n; i++) {
1095:       idy      = *indicesy++;
1096:       y[idy]   = x[0];
1097:       y[idy+1] = x[1];
1098:       y[idy+2] = x[2];
1099:       y[idy+3] = x[3];
1100:       y[idy+4] = x[4];
1101:       y[idy+5] = x[5];
1102:       x       += 6;
1103:     }
1104:     break;
1105:   case ADD_VALUES:
1106:   case ADD_ALL_VALUES:
1107:     for (i=0; i<n; i++) {
1108:       idy       = *indicesy++;
1109:       y[idy]   += x[0];
1110:       y[idy+1] += x[1];
1111:       y[idy+2] += x[2];
1112:       y[idy+3] += x[3];
1113:       y[idy+4] += x[4];
1114:       y[idy+5] += x[5];
1115:       x        += 6;
1116:     }
1117:     break;
1118: #if !defined(PETSC_USE_COMPLEX)
1119:   case MAX_VALUES:
1120:     for (i=0; i<n; i++) {
1121:       idy      = *indicesy++;
1122:       y[idy]   = PetscMax(y[idy],x[0]);
1123:       y[idy+1] = PetscMax(y[idy+1],x[1]);
1124:       y[idy+2] = PetscMax(y[idy+2],x[2]);
1125:       y[idy+3] = PetscMax(y[idy+3],x[3]);
1126:       y[idy+4] = PetscMax(y[idy+4],x[4]);
1127:       y[idy+5] = PetscMax(y[idy+5],x[5]);
1128:       x       += 6;
1129:     }
1130: #else
1131:   case MAX_VALUES:
1132: #endif
1133:   case NOT_SET_VALUES:
1134:     break;
1135:   default:
1136:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1137:   }
1138:   return(0);
1139: }

1141: PETSC_STATIC_INLINE PetscErrorCode Scatter_6(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1142: {
1143:   PetscInt i,idx,idy;

1146:   switch (addv) {
1147:   case INSERT_VALUES:
1148:   case INSERT_ALL_VALUES:
1149:     for (i=0; i<n; i++) {
1150:       idx      = *indicesx++;
1151:       idy      = *indicesy++;
1152:       y[idy]   = x[idx];
1153:       y[idy+1] = x[idx+1];
1154:       y[idy+2] = x[idx+2];
1155:       y[idy+3] = x[idx+3];
1156:       y[idy+4] = x[idx+4];
1157:       y[idy+5] = x[idx+5];
1158:     }
1159:     break;
1160:   case ADD_VALUES:
1161:   case ADD_ALL_VALUES:
1162:     for (i=0; i<n; i++) {
1163:       idx       = *indicesx++;
1164:       idy       = *indicesy++;
1165:       y[idy]   += x[idx];
1166:       y[idy+1] += x[idx+1];
1167:       y[idy+2] += x[idx+2];
1168:       y[idy+3] += x[idx+3];
1169:       y[idy+4] += x[idx+4];
1170:       y[idy+5] += x[idx+5];
1171:     }
1172:     break;
1173: #if !defined(PETSC_USE_COMPLEX)
1174:   case MAX_VALUES:
1175:     for (i=0; i<n; i++) {
1176:       idx      = *indicesx++;
1177:       idy      = *indicesy++;
1178:       y[idy]   = PetscMax(y[idy],x[idx]);
1179:       y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
1180:       y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
1181:       y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
1182:       y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1183:       y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
1184:     }
1185: #else
1186:   case MAX_VALUES:
1187: #endif
1188:   case NOT_SET_VALUES:
1189:     break;
1190:   default:
1191:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1192:   }
1193:   return(0);
1194: }
1195: /* ----------------------------------------------------------------------------------------------- */
1196: PETSC_STATIC_INLINE void Pack_7(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1197: {
1198:   PetscInt i,idx;

1200:   for (i=0; i<n; i++) {
1201:     idx  = *indicesx++;
1202:     y[0] = x[idx];
1203:     y[1] = x[idx+1];
1204:     y[2] = x[idx+2];
1205:     y[3] = x[idx+3];
1206:     y[4] = x[idx+4];
1207:     y[5] = x[idx+5];
1208:     y[6] = x[idx+6];
1209:     y   += 7;
1210:   }
1211: }

1213: PETSC_STATIC_INLINE PetscErrorCode UnPack_7(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1214: {
1215:   PetscInt i,idy;

1218:   switch (addv) {
1219:   case INSERT_VALUES:
1220:   case INSERT_ALL_VALUES:
1221:     for (i=0; i<n; i++) {
1222:       idy      = *indicesy++;
1223:       y[idy]   = x[0];
1224:       y[idy+1] = x[1];
1225:       y[idy+2] = x[2];
1226:       y[idy+3] = x[3];
1227:       y[idy+4] = x[4];
1228:       y[idy+5] = x[5];
1229:       y[idy+6] = x[6];
1230:       x       += 7;
1231:     }
1232:     break;
1233:   case ADD_VALUES:
1234:   case ADD_ALL_VALUES:
1235:     for (i=0; i<n; i++) {
1236:       idy       = *indicesy++;
1237:       y[idy]   += x[0];
1238:       y[idy+1] += x[1];
1239:       y[idy+2] += x[2];
1240:       y[idy+3] += x[3];
1241:       y[idy+4] += x[4];
1242:       y[idy+5] += x[5];
1243:       y[idy+6] += x[6];
1244:       x        += 7;
1245:     }
1246:     break;
1247: #if !defined(PETSC_USE_COMPLEX)
1248:   case MAX_VALUES:
1249:     for (i=0; i<n; i++) {
1250:       idy      = *indicesy++;
1251:       y[idy]   = PetscMax(y[idy],x[0]);
1252:       y[idy+1] = PetscMax(y[idy+1],x[1]);
1253:       y[idy+2] = PetscMax(y[idy+2],x[2]);
1254:       y[idy+3] = PetscMax(y[idy+3],x[3]);
1255:       y[idy+4] = PetscMax(y[idy+4],x[4]);
1256:       y[idy+5] = PetscMax(y[idy+5],x[5]);
1257:       y[idy+6] = PetscMax(y[idy+6],x[6]);
1258:       x       += 7;
1259:     }
1260: #else
1261:   case MAX_VALUES:
1262: #endif
1263:   case NOT_SET_VALUES:
1264:     break;
1265:   default:
1266:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1267:   }
1268:   return(0);
1269: }

1271: PETSC_STATIC_INLINE PetscErrorCode Scatter_7(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1272: {
1273:   PetscInt i,idx,idy;

1276:   switch (addv) {
1277:   case INSERT_VALUES:
1278:   case INSERT_ALL_VALUES:
1279:     for (i=0; i<n; i++) {
1280:       idx      = *indicesx++;
1281:       idy      = *indicesy++;
1282:       y[idy]   = x[idx];
1283:       y[idy+1] = x[idx+1];
1284:       y[idy+2] = x[idx+2];
1285:       y[idy+3] = x[idx+3];
1286:       y[idy+4] = x[idx+4];
1287:       y[idy+5] = x[idx+5];
1288:       y[idy+6] = x[idx+6];
1289:     }
1290:     break;
1291:   case ADD_VALUES:
1292:   case ADD_ALL_VALUES:
1293:     for (i=0; i<n; i++) {
1294:       idx       = *indicesx++;
1295:       idy       = *indicesy++;
1296:       y[idy]   += x[idx];
1297:       y[idy+1] += x[idx+1];
1298:       y[idy+2] += x[idx+2];
1299:       y[idy+3] += x[idx+3];
1300:       y[idy+4] += x[idx+4];
1301:       y[idy+5] += x[idx+5];
1302:       y[idy+6] += x[idx+6];
1303:     }
1304:     break;
1305: #if !defined(PETSC_USE_COMPLEX)
1306:   case MAX_VALUES:
1307:     for (i=0; i<n; i++) {
1308:       idx      = *indicesx++;
1309:       idy      = *indicesy++;
1310:       y[idy]   = PetscMax(y[idy],x[idx]);
1311:       y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
1312:       y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
1313:       y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
1314:       y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1315:       y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
1316:       y[idy+6] = PetscMax(y[idy+6],x[idx+6]);
1317:     }
1318: #else
1319:   case MAX_VALUES:
1320: #endif
1321:   case NOT_SET_VALUES:
1322:     break;
1323:   default:
1324:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1325:   }
1326:   return(0);
1327: }
1328: /* ----------------------------------------------------------------------------------------------- */
1329: PETSC_STATIC_INLINE void Pack_8(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1330: {
1331:   PetscInt i,idx;

1333:   for (i=0; i<n; i++) {
1334:     idx  = *indicesx++;
1335:     y[0] = x[idx];
1336:     y[1] = x[idx+1];
1337:     y[2] = x[idx+2];
1338:     y[3] = x[idx+3];
1339:     y[4] = x[idx+4];
1340:     y[5] = x[idx+5];
1341:     y[6] = x[idx+6];
1342:     y[7] = x[idx+7];
1343:     y   += 8;
1344:   }
1345: }

1347: PETSC_STATIC_INLINE PetscErrorCode UnPack_8(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1348: {
1349:   PetscInt i,idy;

1352:   switch (addv) {
1353:   case INSERT_VALUES:
1354:   case INSERT_ALL_VALUES:
1355:     for (i=0; i<n; i++) {
1356:       idy      = *indicesy++;
1357:       y[idy]   = x[0];
1358:       y[idy+1] = x[1];
1359:       y[idy+2] = x[2];
1360:       y[idy+3] = x[3];
1361:       y[idy+4] = x[4];
1362:       y[idy+5] = x[5];
1363:       y[idy+6] = x[6];
1364:       y[idy+7] = x[7];
1365:       x       += 8;
1366:     }
1367:     break;
1368:   case ADD_VALUES:
1369:   case ADD_ALL_VALUES:
1370:     for (i=0; i<n; i++) {
1371:       idy       = *indicesy++;
1372:       y[idy]   += x[0];
1373:       y[idy+1] += x[1];
1374:       y[idy+2] += x[2];
1375:       y[idy+3] += x[3];
1376:       y[idy+4] += x[4];
1377:       y[idy+5] += x[5];
1378:       y[idy+6] += x[6];
1379:       y[idy+7] += x[7];
1380:       x        += 8;
1381:     }
1382:     break;
1383: #if !defined(PETSC_USE_COMPLEX)
1384:   case MAX_VALUES:
1385:     for (i=0; i<n; i++) {
1386:       idy      = *indicesy++;
1387:       y[idy]   = PetscMax(y[idy],x[0]);
1388:       y[idy+1] = PetscMax(y[idy+1],x[1]);
1389:       y[idy+2] = PetscMax(y[idy+2],x[2]);
1390:       y[idy+3] = PetscMax(y[idy+3],x[3]);
1391:       y[idy+4] = PetscMax(y[idy+4],x[4]);
1392:       y[idy+5] = PetscMax(y[idy+5],x[5]);
1393:       y[idy+6] = PetscMax(y[idy+6],x[6]);
1394:       y[idy+7] = PetscMax(y[idy+7],x[7]);
1395:       x       += 8;
1396:     }
1397: #else
1398:   case MAX_VALUES:
1399: #endif
1400:   case NOT_SET_VALUES:
1401:     break;
1402:   default:
1403:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1404:   }
1405:   return(0);
1406: }

1408: PETSC_STATIC_INLINE PetscErrorCode Scatter_8(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1409: {
1410:   PetscInt i,idx,idy;

1413:   switch (addv) {
1414:   case INSERT_VALUES:
1415:   case INSERT_ALL_VALUES:
1416:     for (i=0; i<n; i++) {
1417:       idx      = *indicesx++;
1418:       idy      = *indicesy++;
1419:       y[idy]   = x[idx];
1420:       y[idy+1] = x[idx+1];
1421:       y[idy+2] = x[idx+2];
1422:       y[idy+3] = x[idx+3];
1423:       y[idy+4] = x[idx+4];
1424:       y[idy+5] = x[idx+5];
1425:       y[idy+6] = x[idx+6];
1426:       y[idy+7] = x[idx+7];
1427:     }
1428:     break;
1429:   case ADD_VALUES:
1430:   case ADD_ALL_VALUES:
1431:     for (i=0; i<n; i++) {
1432:       idx       = *indicesx++;
1433:       idy       = *indicesy++;
1434:       y[idy]   += x[idx];
1435:       y[idy+1] += x[idx+1];
1436:       y[idy+2] += x[idx+2];
1437:       y[idy+3] += x[idx+3];
1438:       y[idy+4] += x[idx+4];
1439:       y[idy+5] += x[idx+5];
1440:       y[idy+6] += x[idx+6];
1441:       y[idy+7] += x[idx+7];
1442:     }
1443:     break;
1444: #if !defined(PETSC_USE_COMPLEX)
1445:   case MAX_VALUES:
1446:     for (i=0; i<n; i++) {
1447:       idx      = *indicesx++;
1448:       idy      = *indicesy++;
1449:       y[idy]   = PetscMax(y[idy],x[idx]);
1450:       y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
1451:       y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
1452:       y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
1453:       y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1454:       y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
1455:       y[idy+6] = PetscMax(y[idy+6],x[idx+6]);
1456:       y[idy+7] = PetscMax(y[idy+7],x[idx+7]);
1457:     }
1458: #else
1459:   case MAX_VALUES:
1460: #endif
1461:   case NOT_SET_VALUES:
1462:     break;
1463:   default:
1464:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1465:   }
1466:   return(0);
1467: }

1469: PETSC_STATIC_INLINE void Pack_9(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1470: {
1471:   PetscInt i,idx;

1473:   for (i=0; i<n; i++) {
1474:     idx   = *indicesx++;
1475:     y[0]  = x[idx];
1476:     y[1]  = x[idx+1];
1477:     y[2]  = x[idx+2];
1478:     y[3]  = x[idx+3];
1479:     y[4]  = x[idx+4];
1480:     y[5]  = x[idx+5];
1481:     y[6]  = x[idx+6];
1482:     y[7]  = x[idx+7];
1483:     y[8]  = x[idx+8];
1484:     y    += 9;
1485:   }
1486: }

1488: PETSC_STATIC_INLINE PetscErrorCode UnPack_9(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1489: {
1490:   PetscInt i,idy;

1493:   switch (addv) {
1494:   case INSERT_VALUES:
1495:   case INSERT_ALL_VALUES:
1496:     for (i=0; i<n; i++) {
1497:       idy       = *indicesy++;
1498:       y[idy]    = x[0];
1499:       y[idy+1]  = x[1];
1500:       y[idy+2]  = x[2];
1501:       y[idy+3]  = x[3];
1502:       y[idy+4]  = x[4];
1503:       y[idy+5]  = x[5];
1504:       y[idy+6]  = x[6];
1505:       y[idy+7]  = x[7];
1506:       y[idy+8]  = x[8];
1507:       x        += 9;
1508:     }
1509:     break;
1510:   case ADD_VALUES:
1511:   case ADD_ALL_VALUES:
1512:     for (i=0; i<n; i++) {
1513:       idy        = *indicesy++;
1514:       y[idy]    += x[0];
1515:       y[idy+1]  += x[1];
1516:       y[idy+2]  += x[2];
1517:       y[idy+3]  += x[3];
1518:       y[idy+4]  += x[4];
1519:       y[idy+5]  += x[5];
1520:       y[idy+6]  += x[6];
1521:       y[idy+7]  += x[7];
1522:       y[idy+8]  += x[8];
1523:       x         += 9;
1524:     }
1525:     break;
1526: #if !defined(PETSC_USE_COMPLEX)
1527:   case MAX_VALUES:
1528:     for (i=0; i<n; i++) {
1529:       idy       = *indicesy++;
1530:       y[idy]    = PetscMax(y[idy],x[0]);
1531:       y[idy+1]  = PetscMax(y[idy+1],x[1]);
1532:       y[idy+2]  = PetscMax(y[idy+2],x[2]);
1533:       y[idy+3]  = PetscMax(y[idy+3],x[3]);
1534:       y[idy+4]  = PetscMax(y[idy+4],x[4]);
1535:       y[idy+5]  = PetscMax(y[idy+5],x[5]);
1536:       y[idy+6]  = PetscMax(y[idy+6],x[6]);
1537:       y[idy+7]  = PetscMax(y[idy+7],x[7]);
1538:       y[idy+8]  = PetscMax(y[idy+8],x[8]);
1539:       x        += 9;
1540:     }
1541: #else
1542:   case MAX_VALUES:
1543: #endif
1544:   case NOT_SET_VALUES:
1545:     break;
1546:   default:
1547:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1548:   }
1549:   return(0);
1550: }

1552: PETSC_STATIC_INLINE PetscErrorCode Scatter_9(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1553: {
1554:   PetscInt i,idx,idy;

1557:   switch (addv) {
1558:   case INSERT_VALUES:
1559:   case INSERT_ALL_VALUES:
1560:     for (i=0; i<n; i++) {
1561:       idx       = *indicesx++;
1562:       idy       = *indicesy++;
1563:       y[idy]    = x[idx];
1564:       y[idy+1]  = x[idx+1];
1565:       y[idy+2]  = x[idx+2];
1566:       y[idy+3]  = x[idx+3];
1567:       y[idy+4]  = x[idx+4];
1568:       y[idy+5]  = x[idx+5];
1569:       y[idy+6]  = x[idx+6];
1570:       y[idy+7]  = x[idx+7];
1571:       y[idy+8]  = x[idx+8];
1572:     }
1573:     break;
1574:   case ADD_VALUES:
1575:   case ADD_ALL_VALUES:
1576:     for (i=0; i<n; i++) {
1577:       idx        = *indicesx++;
1578:       idy        = *indicesy++;
1579:       y[idy]    += x[idx];
1580:       y[idy+1]  += x[idx+1];
1581:       y[idy+2]  += x[idx+2];
1582:       y[idy+3]  += x[idx+3];
1583:       y[idy+4]  += x[idx+4];
1584:       y[idy+5]  += x[idx+5];
1585:       y[idy+6]  += x[idx+6];
1586:       y[idy+7]  += x[idx+7];
1587:       y[idy+8]  += x[idx+8];
1588:     }
1589:     break;
1590: #if !defined(PETSC_USE_COMPLEX)
1591:   case MAX_VALUES:
1592:     for (i=0; i<n; i++) {
1593:       idx       = *indicesx++;
1594:       idy       = *indicesy++;
1595:       y[idy]    = PetscMax(y[idy],x[idx]);
1596:       y[idy+1]  = PetscMax(y[idy+1],x[idx+1]);
1597:       y[idy+2]  = PetscMax(y[idy+2],x[idx+2]);
1598:       y[idy+3]  = PetscMax(y[idy+3],x[idx+3]);
1599:       y[idy+4]  = PetscMax(y[idy+4],x[idx+4]);
1600:       y[idy+5]  = PetscMax(y[idy+5],x[idx+5]);
1601:       y[idy+6]  = PetscMax(y[idy+6],x[idx+6]);
1602:       y[idy+7]  = PetscMax(y[idy+7],x[idx+7]);
1603:       y[idy+8]  = PetscMax(y[idy+8],x[idx+8]);
1604:     }
1605: #else
1606:   case MAX_VALUES:
1607: #endif
1608:   case NOT_SET_VALUES:
1609:     break;
1610:   default:
1611:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1612:   }
1613:   return(0);
1614: }

1616: PETSC_STATIC_INLINE void Pack_10(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1617: {
1618:   PetscInt i,idx;

1620:   for (i=0; i<n; i++) {
1621:     idx   = *indicesx++;
1622:     y[0]  = x[idx];
1623:     y[1]  = x[idx+1];
1624:     y[2]  = x[idx+2];
1625:     y[3]  = x[idx+3];
1626:     y[4]  = x[idx+4];
1627:     y[5]  = x[idx+5];
1628:     y[6]  = x[idx+6];
1629:     y[7]  = x[idx+7];
1630:     y[8]  = x[idx+8];
1631:     y[9]  = x[idx+9];
1632:     y    += 10;
1633:   }
1634: }

1636: PETSC_STATIC_INLINE PetscErrorCode UnPack_10(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1637: {
1638:   PetscInt i,idy;

1641:   switch (addv) {
1642:   case INSERT_VALUES:
1643:   case INSERT_ALL_VALUES:
1644:     for (i=0; i<n; i++) {
1645:       idy       = *indicesy++;
1646:       y[idy]    = x[0];
1647:       y[idy+1]  = x[1];
1648:       y[idy+2]  = x[2];
1649:       y[idy+3]  = x[3];
1650:       y[idy+4]  = x[4];
1651:       y[idy+5]  = x[5];
1652:       y[idy+6]  = x[6];
1653:       y[idy+7]  = x[7];
1654:       y[idy+8]  = x[8];
1655:       y[idy+9]  = x[9];
1656:       x        += 10;
1657:     }
1658:     break;
1659:   case ADD_VALUES:
1660:   case ADD_ALL_VALUES:
1661:     for (i=0; i<n; i++) {
1662:       idy        = *indicesy++;
1663:       y[idy]    += x[0];
1664:       y[idy+1]  += x[1];
1665:       y[idy+2]  += x[2];
1666:       y[idy+3]  += x[3];
1667:       y[idy+4]  += x[4];
1668:       y[idy+5]  += x[5];
1669:       y[idy+6]  += x[6];
1670:       y[idy+7]  += x[7];
1671:       y[idy+8]  += x[8];
1672:       y[idy+9]  += x[9];
1673:       x         += 10;
1674:     }
1675:     break;
1676: #if !defined(PETSC_USE_COMPLEX)
1677:   case MAX_VALUES:
1678:     for (i=0; i<n; i++) {
1679:       idy       = *indicesy++;
1680:       y[idy]    = PetscMax(y[idy],x[0]);
1681:       y[idy+1]  = PetscMax(y[idy+1],x[1]);
1682:       y[idy+2]  = PetscMax(y[idy+2],x[2]);
1683:       y[idy+3]  = PetscMax(y[idy+3],x[3]);
1684:       y[idy+4]  = PetscMax(y[idy+4],x[4]);
1685:       y[idy+5]  = PetscMax(y[idy+5],x[5]);
1686:       y[idy+6]  = PetscMax(y[idy+6],x[6]);
1687:       y[idy+7]  = PetscMax(y[idy+7],x[7]);
1688:       y[idy+8]  = PetscMax(y[idy+8],x[8]);
1689:       y[idy+9]  = PetscMax(y[idy+9],x[9]);
1690:       x        += 10;
1691:     }
1692: #else
1693:   case MAX_VALUES:
1694: #endif
1695:   case NOT_SET_VALUES:
1696:     break;
1697:   default:
1698:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1699:   }
1700:   return(0);
1701: }

1703: PETSC_STATIC_INLINE PetscErrorCode Scatter_10(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1704: {
1705:   PetscInt i,idx,idy;

1708:   switch (addv) {
1709:   case INSERT_VALUES:
1710:   case INSERT_ALL_VALUES:
1711:     for (i=0; i<n; i++) {
1712:       idx       = *indicesx++;
1713:       idy       = *indicesy++;
1714:       y[idy]    = x[idx];
1715:       y[idy+1]  = x[idx+1];
1716:       y[idy+2]  = x[idx+2];
1717:       y[idy+3]  = x[idx+3];
1718:       y[idy+4]  = x[idx+4];
1719:       y[idy+5]  = x[idx+5];
1720:       y[idy+6]  = x[idx+6];
1721:       y[idy+7]  = x[idx+7];
1722:       y[idy+8]  = x[idx+8];
1723:       y[idy+9]  = x[idx+9];
1724:     }
1725:     break;
1726:   case ADD_VALUES:
1727:   case ADD_ALL_VALUES:
1728:     for (i=0; i<n; i++) {
1729:       idx        = *indicesx++;
1730:       idy        = *indicesy++;
1731:       y[idy]    += x[idx];
1732:       y[idy+1]  += x[idx+1];
1733:       y[idy+2]  += x[idx+2];
1734:       y[idy+3]  += x[idx+3];
1735:       y[idy+4]  += x[idx+4];
1736:       y[idy+5]  += x[idx+5];
1737:       y[idy+6]  += x[idx+6];
1738:       y[idy+7]  += x[idx+7];
1739:       y[idy+8]  += x[idx+8];
1740:       y[idy+9]  += x[idx+9];
1741:     }
1742:     break;
1743: #if !defined(PETSC_USE_COMPLEX)
1744:   case MAX_VALUES:
1745:     for (i=0; i<n; i++) {
1746:       idx       = *indicesx++;
1747:       idy       = *indicesy++;
1748:       y[idy]    = PetscMax(y[idy],x[idx]);
1749:       y[idy+1]  = PetscMax(y[idy+1],x[idx+1]);
1750:       y[idy+2]  = PetscMax(y[idy+2],x[idx+2]);
1751:       y[idy+3]  = PetscMax(y[idy+3],x[idx+3]);
1752:       y[idy+4]  = PetscMax(y[idy+4],x[idx+4]);
1753:       y[idy+5]  = PetscMax(y[idy+5],x[idx+5]);
1754:       y[idy+6]  = PetscMax(y[idy+6],x[idx+6]);
1755:       y[idy+7]  = PetscMax(y[idy+7],x[idx+7]);
1756:       y[idy+8]  = PetscMax(y[idy+8],x[idx+8]);
1757:       y[idy+9]  = PetscMax(y[idy+9],x[idx+9]);
1758:     }
1759: #else
1760:   case MAX_VALUES:
1761: #endif
1762:   case NOT_SET_VALUES:
1763:     break;
1764:   default:
1765:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1766:   }
1767:   return(0);
1768: }

1770: PETSC_STATIC_INLINE void Pack_11(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1771: {
1772:   PetscInt i,idx;

1774:   for (i=0; i<n; i++) {
1775:     idx   = *indicesx++;
1776:     y[0]  = x[idx];
1777:     y[1]  = x[idx+1];
1778:     y[2]  = x[idx+2];
1779:     y[3]  = x[idx+3];
1780:     y[4]  = x[idx+4];
1781:     y[5]  = x[idx+5];
1782:     y[6]  = x[idx+6];
1783:     y[7]  = x[idx+7];
1784:     y[8]  = x[idx+8];
1785:     y[9]  = x[idx+9];
1786:     y[10] = x[idx+10];
1787:     y    += 11;
1788:   }
1789: }

1791: PETSC_STATIC_INLINE PetscErrorCode UnPack_11(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1792: {
1793:   PetscInt i,idy;

1796:   switch (addv) {
1797:   case INSERT_VALUES:
1798:   case INSERT_ALL_VALUES:
1799:     for (i=0; i<n; i++) {
1800:       idy       = *indicesy++;
1801:       y[idy]    = x[0];
1802:       y[idy+1]  = x[1];
1803:       y[idy+2]  = x[2];
1804:       y[idy+3]  = x[3];
1805:       y[idy+4]  = x[4];
1806:       y[idy+5]  = x[5];
1807:       y[idy+6]  = x[6];
1808:       y[idy+7]  = x[7];
1809:       y[idy+8]  = x[8];
1810:       y[idy+9]  = x[9];
1811:       y[idy+10] = x[10];
1812:       x        += 11;
1813:     }
1814:     break;
1815:   case ADD_VALUES:
1816:   case ADD_ALL_VALUES:
1817:     for (i=0; i<n; i++) {
1818:       idy        = *indicesy++;
1819:       y[idy]    += x[0];
1820:       y[idy+1]  += x[1];
1821:       y[idy+2]  += x[2];
1822:       y[idy+3]  += x[3];
1823:       y[idy+4]  += x[4];
1824:       y[idy+5]  += x[5];
1825:       y[idy+6]  += x[6];
1826:       y[idy+7]  += x[7];
1827:       y[idy+8]  += x[8];
1828:       y[idy+9]  += x[9];
1829:       y[idy+10] += x[10];
1830:       x         += 11;
1831:     }
1832:     break;
1833: #if !defined(PETSC_USE_COMPLEX)
1834:   case MAX_VALUES:
1835:     for (i=0; i<n; i++) {
1836:       idy       = *indicesy++;
1837:       y[idy]    = PetscMax(y[idy],x[0]);
1838:       y[idy+1]  = PetscMax(y[idy+1],x[1]);
1839:       y[idy+2]  = PetscMax(y[idy+2],x[2]);
1840:       y[idy+3]  = PetscMax(y[idy+3],x[3]);
1841:       y[idy+4]  = PetscMax(y[idy+4],x[4]);
1842:       y[idy+5]  = PetscMax(y[idy+5],x[5]);
1843:       y[idy+6]  = PetscMax(y[idy+6],x[6]);
1844:       y[idy+7]  = PetscMax(y[idy+7],x[7]);
1845:       y[idy+8]  = PetscMax(y[idy+8],x[8]);
1846:       y[idy+9]  = PetscMax(y[idy+9],x[9]);
1847:       y[idy+10] = PetscMax(y[idy+10],x[10]);
1848:       x        += 11;
1849:     }
1850: #else
1851:   case MAX_VALUES:
1852: #endif
1853:   case NOT_SET_VALUES:
1854:     break;
1855:   default:
1856:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1857:   }
1858:   return(0);
1859: }

1861: PETSC_STATIC_INLINE PetscErrorCode Scatter_11(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1862: {
1863:   PetscInt i,idx,idy;

1866:   switch (addv) {
1867:   case INSERT_VALUES:
1868:   case INSERT_ALL_VALUES:
1869:     for (i=0; i<n; i++) {
1870:       idx       = *indicesx++;
1871:       idy       = *indicesy++;
1872:       y[idy]    = x[idx];
1873:       y[idy+1]  = x[idx+1];
1874:       y[idy+2]  = x[idx+2];
1875:       y[idy+3]  = x[idx+3];
1876:       y[idy+4]  = x[idx+4];
1877:       y[idy+5]  = x[idx+5];
1878:       y[idy+6]  = x[idx+6];
1879:       y[idy+7]  = x[idx+7];
1880:       y[idy+8]  = x[idx+8];
1881:       y[idy+9]  = x[idx+9];
1882:       y[idy+10] = x[idx+10];
1883:     }
1884:     break;
1885:   case ADD_VALUES:
1886:   case ADD_ALL_VALUES:
1887:     for (i=0; i<n; i++) {
1888:       idx        = *indicesx++;
1889:       idy        = *indicesy++;
1890:       y[idy]    += x[idx];
1891:       y[idy+1]  += x[idx+1];
1892:       y[idy+2]  += x[idx+2];
1893:       y[idy+3]  += x[idx+3];
1894:       y[idy+4]  += x[idx+4];
1895:       y[idy+5]  += x[idx+5];
1896:       y[idy+6]  += x[idx+6];
1897:       y[idy+7]  += x[idx+7];
1898:       y[idy+8]  += x[idx+8];
1899:       y[idy+9]  += x[idx+9];
1900:       y[idy+10] += x[idx+10];
1901:     }
1902:     break;
1903: #if !defined(PETSC_USE_COMPLEX)
1904:   case MAX_VALUES:
1905:     for (i=0; i<n; i++) {
1906:       idx       = *indicesx++;
1907:       idy       = *indicesy++;
1908:       y[idy]    = PetscMax(y[idy],x[idx]);
1909:       y[idy+1]  = PetscMax(y[idy+1],x[idx+1]);
1910:       y[idy+2]  = PetscMax(y[idy+2],x[idx+2]);
1911:       y[idy+3]  = PetscMax(y[idy+3],x[idx+3]);
1912:       y[idy+4]  = PetscMax(y[idy+4],x[idx+4]);
1913:       y[idy+5]  = PetscMax(y[idy+5],x[idx+5]);
1914:       y[idy+6]  = PetscMax(y[idy+6],x[idx+6]);
1915:       y[idy+7]  = PetscMax(y[idy+7],x[idx+7]);
1916:       y[idy+8]  = PetscMax(y[idy+8],x[idx+8]);
1917:       y[idy+9]  = PetscMax(y[idy+9],x[idx+9]);
1918:       y[idy+10] = PetscMax(y[idy+10],x[idx+10]);
1919:     }
1920: #else
1921:   case MAX_VALUES:
1922: #endif
1923:   case NOT_SET_VALUES:
1924:     break;
1925:   default:
1926:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1927:   }
1928:   return(0);
1929: }

1931: /* ----------------------------------------------------------------------------------------------- */
1932: PETSC_STATIC_INLINE void Pack_12(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1933: {
1934:   PetscInt i,idx;

1936:   for (i=0; i<n; i++) {
1937:     idx   = *indicesx++;
1938:     y[0]  = x[idx];
1939:     y[1]  = x[idx+1];
1940:     y[2]  = x[idx+2];
1941:     y[3]  = x[idx+3];
1942:     y[4]  = x[idx+4];
1943:     y[5]  = x[idx+5];
1944:     y[6]  = x[idx+6];
1945:     y[7]  = x[idx+7];
1946:     y[8]  = x[idx+8];
1947:     y[9]  = x[idx+9];
1948:     y[10] = x[idx+10];
1949:     y[11] = x[idx+11];
1950:     y    += 12;
1951:   }
1952: }

1954: PETSC_STATIC_INLINE PetscErrorCode UnPack_12(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1955: {
1956:   PetscInt i,idy;

1959:   switch (addv) {
1960:   case INSERT_VALUES:
1961:   case INSERT_ALL_VALUES:
1962:     for (i=0; i<n; i++) {
1963:       idy       = *indicesy++;
1964:       y[idy]    = x[0];
1965:       y[idy+1]  = x[1];
1966:       y[idy+2]  = x[2];
1967:       y[idy+3]  = x[3];
1968:       y[idy+4]  = x[4];
1969:       y[idy+5]  = x[5];
1970:       y[idy+6]  = x[6];
1971:       y[idy+7]  = x[7];
1972:       y[idy+8]  = x[8];
1973:       y[idy+9]  = x[9];
1974:       y[idy+10] = x[10];
1975:       y[idy+11] = x[11];
1976:       x        += 12;
1977:     }
1978:     break;
1979:   case ADD_VALUES:
1980:   case ADD_ALL_VALUES:
1981:     for (i=0; i<n; i++) {
1982:       idy        = *indicesy++;
1983:       y[idy]    += x[0];
1984:       y[idy+1]  += x[1];
1985:       y[idy+2]  += x[2];
1986:       y[idy+3]  += x[3];
1987:       y[idy+4]  += x[4];
1988:       y[idy+5]  += x[5];
1989:       y[idy+6]  += x[6];
1990:       y[idy+7]  += x[7];
1991:       y[idy+8]  += x[8];
1992:       y[idy+9]  += x[9];
1993:       y[idy+10] += x[10];
1994:       y[idy+11] += x[11];
1995:       x         += 12;
1996:     }
1997:     break;
1998: #if !defined(PETSC_USE_COMPLEX)
1999:   case MAX_VALUES:
2000:     for (i=0; i<n; i++) {
2001:       idy       = *indicesy++;
2002:       y[idy]    = PetscMax(y[idy],x[0]);
2003:       y[idy+1]  = PetscMax(y[idy+1],x[1]);
2004:       y[idy+2]  = PetscMax(y[idy+2],x[2]);
2005:       y[idy+3]  = PetscMax(y[idy+3],x[3]);
2006:       y[idy+4]  = PetscMax(y[idy+4],x[4]);
2007:       y[idy+5]  = PetscMax(y[idy+5],x[5]);
2008:       y[idy+6]  = PetscMax(y[idy+6],x[6]);
2009:       y[idy+7]  = PetscMax(y[idy+7],x[7]);
2010:       y[idy+8]  = PetscMax(y[idy+8],x[8]);
2011:       y[idy+9]  = PetscMax(y[idy+9],x[9]);
2012:       y[idy+10] = PetscMax(y[idy+10],x[10]);
2013:       y[idy+11] = PetscMax(y[idy+11],x[11]);
2014:       x        += 12;
2015:     }
2016: #else
2017:   case MAX_VALUES:
2018: #endif
2019:   case NOT_SET_VALUES:
2020:     break;
2021:   default:
2022:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
2023:   }
2024:   return(0);
2025: }

2027: PETSC_STATIC_INLINE PetscErrorCode Scatter_12(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
2028: {
2029:   PetscInt i,idx,idy;

2032:   switch (addv) {
2033:   case INSERT_VALUES:
2034:   case INSERT_ALL_VALUES:
2035:     for (i=0; i<n; i++) {
2036:       idx       = *indicesx++;
2037:       idy       = *indicesy++;
2038:       y[idy]    = x[idx];
2039:       y[idy+1]  = x[idx+1];
2040:       y[idy+2]  = x[idx+2];
2041:       y[idy+3]  = x[idx+3];
2042:       y[idy+4]  = x[idx+4];
2043:       y[idy+5]  = x[idx+5];
2044:       y[idy+6]  = x[idx+6];
2045:       y[idy+7]  = x[idx+7];
2046:       y[idy+8]  = x[idx+8];
2047:       y[idy+9]  = x[idx+9];
2048:       y[idy+10] = x[idx+10];
2049:       y[idy+11] = x[idx+11];
2050:     }
2051:     break;
2052:   case ADD_VALUES:
2053:   case ADD_ALL_VALUES:
2054:     for (i=0; i<n; i++) {
2055:       idx        = *indicesx++;
2056:       idy        = *indicesy++;
2057:       y[idy]    += x[idx];
2058:       y[idy+1]  += x[idx+1];
2059:       y[idy+2]  += x[idx+2];
2060:       y[idy+3]  += x[idx+3];
2061:       y[idy+4]  += x[idx+4];
2062:       y[idy+5]  += x[idx+5];
2063:       y[idy+6]  += x[idx+6];
2064:       y[idy+7]  += x[idx+7];
2065:       y[idy+8]  += x[idx+8];
2066:       y[idy+9]  += x[idx+9];
2067:       y[idy+10] += x[idx+10];
2068:       y[idy+11] += x[idx+11];
2069:     }
2070:     break;
2071: #if !defined(PETSC_USE_COMPLEX)
2072:   case MAX_VALUES:
2073:     for (i=0; i<n; i++) {
2074:       idx       = *indicesx++;
2075:       idy       = *indicesy++;
2076:       y[idy]    = PetscMax(y[idy],x[idx]);
2077:       y[idy+1]  = PetscMax(y[idy+1],x[idx+1]);
2078:       y[idy+2]  = PetscMax(y[idy+2],x[idx+2]);
2079:       y[idy+3]  = PetscMax(y[idy+3],x[idx+3]);
2080:       y[idy+4]  = PetscMax(y[idy+4],x[idx+4]);
2081:       y[idy+5]  = PetscMax(y[idy+5],x[idx+5]);
2082:       y[idy+6]  = PetscMax(y[idy+6],x[idx+6]);
2083:       y[idy+7]  = PetscMax(y[idy+7],x[idx+7]);
2084:       y[idy+8]  = PetscMax(y[idy+8],x[idx+8]);
2085:       y[idy+9]  = PetscMax(y[idy+9],x[idx+9]);
2086:       y[idy+10] = PetscMax(y[idy+10],x[idx+10]);
2087:       y[idy+11] = PetscMax(y[idy+11],x[idx+11]);
2088:     }
2089: #else
2090:   case MAX_VALUES:
2091: #endif
2092:   case NOT_SET_VALUES:
2093:     break;
2094:   default:
2095:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
2096:   }
2097:   return(0);
2098: }

2100: /* ----------------------------------------------------------------------------------------------- */
2101: PETSC_STATIC_INLINE void Pack_bs(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
2102: {
2103:   PetscInt       i,idx;

2105:   for (i=0; i<n; i++) {
2106:     idx   = *indicesx++;
2107:     PetscMemcpy(y,x + idx,bs*sizeof(PetscScalar));
2108:     y    += bs;
2109:   }
2110: }

2112: PETSC_STATIC_INLINE PetscErrorCode UnPack_bs(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
2113: {
2114:   PetscInt i,idy,j;

2117:   switch (addv) {
2118:   case INSERT_VALUES:
2119:   case INSERT_ALL_VALUES:
2120:     for (i=0; i<n; i++) {
2121:       idy       = *indicesy++;
2122:       PetscMemcpy(y + idy,x,bs*sizeof(PetscScalar));
2123:       x        += bs;
2124:     }
2125:     break;
2126:   case ADD_VALUES:
2127:   case ADD_ALL_VALUES:
2128:     for (i=0; i<n; i++) {
2129:       idy        = *indicesy++;
2130:       for (j=0; j<bs; j++) y[idy+j] += x[j];
2131:       x         += bs;
2132:     }
2133:     break;
2134: #if !defined(PETSC_USE_COMPLEX)
2135:   case MAX_VALUES:
2136:     for (i=0; i<n; i++) {
2137:       idy = *indicesy++;
2138:       for (j=0; j<bs; j++) y[idy+j] = PetscMax(y[idy+j],x[j]);
2139:       x  += bs;
2140:     }
2141: #else
2142:   case MAX_VALUES:
2143: #endif
2144:   case NOT_SET_VALUES:
2145:     break;
2146:   default:
2147:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
2148:   }
2149:   return(0);
2150: }

2152: PETSC_STATIC_INLINE PetscErrorCode Scatter_bs(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
2153: {
2154:   PetscInt i,idx,idy,j;

2157:   switch (addv) {
2158:   case INSERT_VALUES:
2159:   case INSERT_ALL_VALUES:
2160:     for (i=0; i<n; i++) {
2161:       idx       = *indicesx++;
2162:       idy       = *indicesy++;
2163:       PetscMemcpy(y + idy, x + idx,bs*sizeof(PetscScalar));
2164:     }
2165:     break;
2166:   case ADD_VALUES:
2167:   case ADD_ALL_VALUES:
2168:     for (i=0; i<n; i++) {
2169:       idx        = *indicesx++;
2170:       idy        = *indicesy++;
2171:       for (j=0; j<bs; j++ )  y[idy+j] += x[idx+j];
2172:     }
2173:     break;
2174: #if !defined(PETSC_USE_COMPLEX)
2175:   case MAX_VALUES:
2176:     for (i=0; i<n; i++) {
2177:       idx       = *indicesx++;
2178:       idy       = *indicesy++;
2179:       for (j=0; j<bs; j++ )  y[idy+j] = PetscMax(y[idy+j],x[idx+j]);
2180:     }
2181: #else
2182:   case MAX_VALUES:
2183: #endif
2184:   case NOT_SET_VALUES:
2185:     break;
2186:   default:
2187:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
2188:   }
2189:   return(0);
2190: }

2192: /* Create the VecScatterBegin/End_P for our chosen block sizes */
2193: #define BS 1
2194:  #include <../src/vec/vscat/impls/vpscat.h>
2195: #define BS 2
2196:  #include <../src/vec/vscat/impls/vpscat.h>
2197: #define BS 3
2198:  #include <../src/vec/vscat/impls/vpscat.h>
2199: #define BS 4
2200:  #include <../src/vec/vscat/impls/vpscat.h>
2201: #define BS 5
2202:  #include <../src/vec/vscat/impls/vpscat.h>
2203: #define BS 6
2204:  #include <../src/vec/vscat/impls/vpscat.h>
2205: #define BS 7
2206:  #include <../src/vec/vscat/impls/vpscat.h>
2207: #define BS 8
2208:  #include <../src/vec/vscat/impls/vpscat.h>
2209: #define BS 9
2210:  #include <../src/vec/vscat/impls/vpscat.h>
2211: #define BS 10
2212:  #include <../src/vec/vscat/impls/vpscat.h>
2213: #define BS 11
2214:  #include <../src/vec/vscat/impls/vpscat.h>
2215: #define BS 12
2216:  #include <../src/vec/vscat/impls/vpscat.h>
2217: #define BS bs
2218:  #include <../src/vec/vscat/impls/vpscat.h>

2220: /* ==========================================================================================*/
2221: /*              create parallel to sequential scatter context                                */

2223: extern PetscErrorCode VecScatterCreateCommon_PtoS_MPI3(VecScatter_MPI_General*,VecScatter_MPI_General*,VecScatter);

2225: /*
2226:    bs indicates how many elements there are in each block. Normally this would be 1.

2228:    contains check that PetscMPIInt can handle the sizes needed
2229: */
2230:  #include <petscbt.h>
2231: PetscErrorCode VecScatterCreateLocal_PtoS_MPI3(PetscInt nx,const PetscInt *inidx,PetscInt ny,const PetscInt *inidy,Vec xin,Vec yin,PetscInt bs,VecScatter ctx)
2232: {
2233:   VecScatter_MPI_General *from,*to;
2234:   PetscMPIInt            size,rank,mrank,imdex,tag,n;
2235:   PetscInt               *source = NULL,*owners = NULL,nxr;
2236:   PetscInt               *lowner = NULL,*start = NULL,*lsharedowner = NULL,*sharedstart = NULL,lengthy,lengthx;
2237:   PetscMPIInt            *nprocs = NULL,nrecvs,nrecvshared;
2238:   PetscInt               i,j,idx = 0,nsends;
2239:   PetscMPIInt            *owner = NULL;
2240:   PetscInt               *starts = NULL,count,slen,slenshared;
2241:   PetscInt               *rvalues,*svalues,base,*values,nprocslocal,recvtotal,*rsvalues;
2242:   PetscMPIInt            *onodes1,*olengths1;
2243:   MPI_Comm               comm;
2244:   MPI_Request            *send_waits = NULL,*recv_waits = NULL;
2245:   MPI_Status             recv_status,*send_status;
2246:   PetscErrorCode         ierr;
2247:   PetscCommShared        scomm;
2248:   PetscMPIInt            jj;
2249:   MPI_Info               info;
2250:   MPI_Comm               mscomm;
2251:   MPI_Win                sharedoffsetwin;  /* Window that owns sharedspaceoffset */
2252:   PetscInt               *sharedspaceoffset;
2253:   VecScatterType         type;
2254:   PetscBool              mpi3node;
2256:   PetscObjectGetNewTag((PetscObject)ctx,&tag);
2257:   PetscObjectGetComm((PetscObject)xin,&comm);
2258:   MPI_Comm_rank(comm,&rank);
2259:   MPI_Comm_size(comm,&size);

2261:   VecScatterGetType(ctx,&type);
2262:   PetscStrcmp(type,"mpi3node",&mpi3node);

2264:   PetscCommSharedGet(comm,&scomm);
2265:   PetscCommSharedGetComm(scomm,&mscomm);

2267:   MPI_Info_create(&info);
2268:   MPI_Info_set(info, "alloc_shared_noncontig", "true");

2270:   if (mpi3node) {
2271:     /* Check if (parallel) inidx has duplicate indices.
2272:      Current VecScatterEndMPI3Node() (case StoP) writes the sequential vector to the parallel vector. 
2273:      Writing to the same shared location without variable locking
2274:      leads to incorrect scattering. See src/vec/vscat/examples/runex2_5 and runex3_5 */

2276:     PetscInt    *mem,**optr;
2277:     MPI_Win     swin;
2278:     PetscMPIInt msize;

2280:     MPI_Comm_size(mscomm,&msize);
2281:     MPI_Comm_rank(mscomm,&mrank);

2283:     PetscMalloc1(msize,&optr);
2284:     MPIU_Win_allocate_shared((nx+1)*sizeof(PetscInt),sizeof(PetscInt),MPI_INFO_NULL,mscomm,&mem,&swin);

2286:     /* Write local nx and inidx into mem */
2287:     mem[0] = nx;
2288:     for (i=1; i<=nx; i++) mem[i] = inidx[i-1];
2289:     MPI_Barrier(mscomm); /* sync shared memory */

2291:     if (!mrank) {
2292:       PetscBT        table;
2293:       /* sz and dsp_unit are not used. Replacing them with NULL would cause MPI_Win_shared_query() crash */
2294:       MPI_Aint       sz;
2295:       PetscMPIInt    dsp_unit;
2296:       PetscInt       N = xin->map->N;

2298:       PetscBTCreate(N,&table); /* may not be scalable */
2299:       PetscBTMemzero(N,table);

2301:       jj = 0;
2302:       while (jj<msize && !ctx->is_duplicate) {
2303:         if (jj == mrank) {jj++; continue;}
2304:         MPIU_Win_shared_query(swin,jj,&sz,&dsp_unit,&optr[jj]);
2305:         for (j=1; j<=optr[jj][0]; j++) { /* optr[jj][0] = nx */
2306:           if (PetscBTLookupSet(table,optr[jj][j])) {
2307:             ctx->is_duplicate = PETSC_TRUE; /* only mrank=0 has this info., will be checked in VecScatterEndMPI3Node() */
2308:             break;
2309:           }
2310:         }
2311:         jj++;
2312:       }
2313:       PetscBTDestroy(&table);
2314:     }

2316:     if (swin != MPI_WIN_NULL) {MPI_Win_free(&swin);}
2317:     PetscFree(optr);
2318:   }

2320:   owners = xin->map->range;
2321:   VecGetSize(yin,&lengthy);
2322:   VecGetSize(xin,&lengthx);

2324:   /*  first count number of contributors to each processor */
2325:   PetscMalloc2(size,&nprocs,nx,&owner);
2326:   PetscMemzero(nprocs,size*sizeof(PetscMPIInt));

2328:   j      = 0;
2329:   nsends = 0;
2330:   for (i=0; i<nx; i++) {
2331:     PetscIntMultError(bs,inidx[i],&idx);
2332:     if (idx < owners[j]) j = 0;
2333:     for (; j<size; j++) {
2334:       if (idx < owners[j+1]) {
2335:         if (!nprocs[j]++) nsends++;
2336:         owner[i] = j;
2337:         break;
2338:       }
2339:     }
2340:     if (j == size) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"ith %D block entry %D not owned by any process, upper bound %D",i,idx,owners[size]);
2341:   }

2343:   nprocslocal  = nprocs[rank];
2344:   nprocs[rank] = 0;
2345:   if (nprocslocal) nsends--;
2346:   /* inform other processors of number of messages and max length*/
2347:   PetscGatherNumberOfMessages(comm,NULL,nprocs,&nrecvs);
2348:   PetscGatherMessageLengths(comm,nsends,nrecvs,nprocs,&onodes1,&olengths1);
2349:   PetscSortMPIIntWithArray(nrecvs,onodes1,olengths1);
2350:   recvtotal = 0;
2351:   for (i=0; i<nrecvs; i++) {
2352:     PetscIntSumError(recvtotal,olengths1[i],&recvtotal);
2353:   }

2355:   /* post receives:   */
2356:   PetscMalloc3(recvtotal,&rvalues,nrecvs,&source,nrecvs,&recv_waits);
2357:   count = 0;
2358:   for (i=0; i<nrecvs; i++) {
2359:     MPI_Irecv((rvalues+count),olengths1[i],MPIU_INT,onodes1[i],tag,comm,recv_waits+i);
2360:     count += olengths1[i];
2361:   }

2363:   /* do sends:
2364:      1) starts[i] gives the starting index in svalues for stuff going to
2365:      the ith processor
2366:   */
2367:   nxr = 0;
2368:   for (i=0; i<nx; i++) {
2369:     if (owner[i] != rank) nxr++;
2370:   }
2371:   PetscMalloc3(nxr,&svalues,nsends,&send_waits,size+1,&starts);

2373:   starts[0]  = 0;
2374:   for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[i-1];
2375:   for (i=0; i<nx; i++) {
2376:     if (owner[i] != rank) svalues[starts[owner[i]]++] = bs*inidx[i];
2377:   }
2378:   starts[0] = 0;
2379:   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[i-1];
2380:   count = 0;
2381:   for (i=0; i<size; i++) {
2382:     if (nprocs[i]) {
2383:       MPI_Isend(svalues+starts[i],nprocs[i],MPIU_INT,i,tag,comm,send_waits+count++);
2384:     }
2385:   }

2387:   /*  wait on receives; this is everyone who we need to deliver data to */
2388:   count = nrecvs;
2389:   slen  = 0;
2390:   nrecvshared = 0;
2391:   slenshared  = 0;
2392:   while (count) {
2393:     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
2394:     /* unpack receives into our local space */
2395:     MPI_Get_count(&recv_status,MPIU_INT,&n);
2396:     PetscCommSharedGlobalToLocal(scomm,onodes1[imdex],&jj);
2397:     if (jj > -1) {
2398:       PetscInfo3(NULL,"[%d] Sending values to shared memory partner %d,global rank %d\n",rank,jj,onodes1[imdex]);
2399:       nrecvshared++;
2400:       slenshared += n;
2401:     }
2402:     slen += n;
2403:     count--;
2404:   }

2406:   if (slen != recvtotal) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Total message lengths %D not expected %D",slen,recvtotal);

2408:   /* allocate entire send scatter context */
2409:   PetscNewLog(ctx,&to);
2410:   to->sharedwin = MPI_WIN_NULL;
2411:   to->n         = nrecvs-nrecvshared;

2413:   PetscMalloc1(nrecvs-nrecvshared,&to->requests);
2414:   PetscMalloc4(bs*(slen-slenshared),&to->values,slen-slenshared,&to->indices,nrecvs-nrecvshared+1,&to->starts,nrecvs-nrecvshared,&to->procs);
2415:   PetscMalloc2(PetscMax(to->n,nsends),&to->sstatus,PetscMax(to->n,nsends),&to->rstatus);

2417:   MPI_Comm_size(mscomm,&to->msize);
2418:   PetscMalloc1(slenshared,&to->sharedspaceindices);
2419:   PetscCalloc1(to->msize+1,&to->sharedspacestarts);

2421:   ctx->todata              = (void*)to;
2422:   to->starts[0]            = 0;
2423:   to->sharedspacestarts[0] = 0;

2425:   /* Allocate shared memory space for shared memory partner communication */
2426:   MPIU_Win_allocate_shared(to->msize*sizeof(PetscInt),sizeof(PetscInt),info,mscomm,&sharedspaceoffset,&sharedoffsetwin);
2427:   for (i=0; i<to->msize; i++) sharedspaceoffset[i] = -1; /* mark with -1 for later error checking */
2428:   if (nrecvs) {
2429:     /* move the data into the send scatter */
2430:     base     = owners[rank];
2431:     rsvalues = rvalues;
2432:     to->n    = 0;
2433:     for (i=0; i<nrecvs; i++) {
2434:       values = rsvalues;
2435:       PetscCommSharedGlobalToLocal(scomm,(PetscMPIInt)onodes1[i],&jj);
2436:       if (jj > -1) {
2437:         to->sharedspacestarts[jj]   = to->sharedcnt;
2438:         to->sharedspacestarts[jj+1] = to->sharedcnt + olengths1[i];
2439:         for (j=0; j<olengths1[i]; j++) to->sharedspaceindices[to->sharedcnt + j] = values[j] - base;
2440:         sharedspaceoffset[jj]      = to->sharedcnt;
2441:         to->sharedcnt             += olengths1[i];
2442:         PetscInfo4(NULL,"[%d] Sending %d values to shared memory partner %d,global rank %d\n",rank,olengths1[i],jj,onodes1[i]);
2443:       } else {
2444:         to->starts[to->n+1] = to->starts[to->n] + olengths1[i];
2445:         to->procs[to->n]    = onodes1[i];
2446:         for (j=0; j<olengths1[i]; j++) to->indices[to->starts[to->n] + j] = values[j] - base;
2447:         to->n++;
2448:       }
2449:       rsvalues += olengths1[i];
2450:     }
2451:   }
2452:   PetscFree(olengths1);
2453:   PetscFree(onodes1);
2454:   PetscFree3(rvalues,source,recv_waits);

2456:   if (mpi3node) {
2457:     /* to->sharedspace is used only as a flag */
2458:     i = 0;
2459:     if (to->sharedcnt) i = 1;
2460:     MPIU_Win_allocate_shared(i*sizeof(PetscScalar),sizeof(PetscScalar),info,mscomm,&to->sharedspace,&to->sharedwin);
2461:   } else { /* mpi3 */
2462:     MPIU_Win_allocate_shared(bs*to->sharedcnt*sizeof(PetscScalar),sizeof(PetscScalar),info,mscomm,&to->sharedspace,&to->sharedwin);
2463:   }
2464:   if (to->sharedwin == MPI_WIN_NULL) SETERRQ(PETSC_COMM_SELF,100,"what the");
2465:   MPI_Info_free(&info);

2467:   /* allocate entire receive scatter context */
2468:   PetscNewLog(ctx,&from);
2469:   from->sharedwin       = MPI_WIN_NULL;
2470:   from->msize           = to->msize;
2471:   /* we don't actually know the correct value for from->n at this point so we use the upper bound */
2472:   from->n = nsends;

2474:   PetscMalloc1(nsends,&from->requests);
2475:   PetscMalloc4((ny-nprocslocal)*bs,&from->values,ny-nprocslocal,&from->indices,nsends+1,&from->starts,from->n,&from->procs);
2476:   ctx->fromdata = (void*)from;

2478:   PetscCalloc1(to->msize+1,&from->sharedspacestarts);
2479:   /* move data into receive scatter */
2480:   PetscMalloc2(size,&lowner,nsends+1,&start);
2481:   PetscMalloc2(size,&lsharedowner,to->msize+1,&sharedstart);
2482:   count = 0; from->starts[0] = start[0] = 0;
2483:   from->sharedcnt = 0;
2484:   for (i=0; i<size; i++) {
2485:     lsharedowner[i] = -1;
2486:     lowner[i]       = -1;
2487:     if (nprocs[i]) {
2488:       PetscCommSharedGlobalToLocal(scomm,i,&jj);
2489:       if (jj > -1) {
2490:         from->sharedspacestarts[jj] = sharedstart[jj] = from->sharedcnt;
2491:         from->sharedspacestarts[jj+1] = sharedstart[jj+1] = from->sharedspacestarts[jj] + nprocs[i];
2492:         from->sharedcnt += nprocs[i];
2493:         lsharedowner[i] = jj;
2494:       } else {
2495:         lowner[i]            = count++;
2496:         from->procs[count-1]  = i;
2497:         from->starts[count] = start[count] = start[count-1] + nprocs[i];
2498:       }
2499:     }
2500:   }
2501:   from->n = count;

2503:   for (i=0; i<nx; i++) {
2504:     if (owner[i] != rank && lowner[owner[i]] > -1) {
2505:       from->indices[start[lowner[owner[i]]]++] = bs*inidy[i];
2506:       if (bs*inidy[i] >= lengthy) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Scattering past end of TO vector");
2507:     }
2508:   }

2510:   /* copy over appropriate parts of inidy[] into from->sharedspaceindices */
2511:   if (mpi3node) {
2512:     /* in addition, copy over appropriate parts of inidx[] into 2nd part of from->sharedspaceindices */
2513:     PetscInt *sharedspaceindicesx;
2514:     PetscMalloc1(2*from->sharedcnt,&from->sharedspaceindices);
2515:     sharedspaceindicesx = from->sharedspaceindices + from->sharedcnt;
2516:     for (i=0; i<nx; i++) {
2517:       if (owner[i] != rank && lsharedowner[owner[i]] > -1) {
2518:         if (sharedstart[lsharedowner[owner[i]]] > from->sharedcnt) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_MEM,"won");

2520:         from->sharedspaceindices[sharedstart[lsharedowner[owner[i]]]] = bs*inidy[i];
2521:         sharedspaceindicesx[sharedstart[lsharedowner[owner[i]]]] = bs*inidx[i]-owners[owner[i]]; /* local inidx */
2522:         sharedstart[lsharedowner[owner[i]]]++;
2523:       }
2524:     }
2525:   } else { /* mpi3 */
2526:     PetscMalloc1(from->sharedcnt,&from->sharedspaceindices);
2527:     for (i=0; i<nx; i++) {
2528:       if (owner[i] != rank && lsharedowner[owner[i]] > -1) {
2529:         if (sharedstart[lsharedowner[owner[i]]] > from->sharedcnt) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_MEM,"won");
2530:         from->sharedspaceindices[sharedstart[lsharedowner[owner[i]]]++] = bs*inidy[i];
2531:       }
2532:     }
2533:   }

2535:   PetscFree2(lowner,start);
2536:   PetscFree2(lsharedowner,sharedstart);
2537:   PetscFree2(nprocs,owner);

2539:   /* wait on sends */
2540:   if (nsends) {
2541:     PetscMalloc1(nsends,&send_status);
2542:     MPI_Waitall(nsends,send_waits,send_status);
2543:     PetscFree(send_status);
2544:   }
2545:   PetscFree3(svalues,send_waits,starts);

2547:   if (nprocslocal) {
2548:     PetscInt nt = from->local.n = to->local.n = nprocslocal;
2549:     /* we have a scatter to ourselves */
2550:     PetscMalloc1(nt,&to->local.vslots);
2551:     PetscMalloc1(nt,&from->local.vslots);
2552:     nt   = 0;
2553:     for (i=0; i<nx; i++) {
2554:       idx = bs*inidx[i];
2555:       if (idx >= owners[rank] && idx < owners[rank+1]) {
2556:         to->local.vslots[nt]     = idx - owners[rank];
2557:         from->local.vslots[nt++] = bs*inidy[i];
2558:         if (bs*inidy[i] >= lengthy) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Scattering past end of TO vector");
2559:       }
2560:     }
2561:     PetscLogObjectMemory((PetscObject)ctx,2*nt*sizeof(PetscInt));
2562:   } else {
2563:     from->local.n      = 0;
2564:     from->local.vslots = 0;
2565:     to->local.n        = 0;
2566:     to->local.vslots   = 0;
2567:   }

2569:   /* Get the shared memory address for all processes we will be copying data from */
2570:   PetscCalloc1(to->msize,&from->sharedspaces);
2571:   PetscCalloc1(to->msize,&from->sharedspacesoffset);
2572:   MPI_Comm_rank(mscomm,&mrank);
2573:   for (jj=0; jj<to->msize; jj++) {
2574:     MPI_Aint    isize;
2575:     PetscMPIInt disp_unit;
2576:     PetscInt    *ptr;
2577:     MPIU_Win_shared_query(to->sharedwin,jj,&isize,&disp_unit,&from->sharedspaces[jj]);
2578:     MPIU_Win_shared_query(sharedoffsetwin,jj,&isize,&disp_unit,&ptr);
2579:     from->sharedspacesoffset[jj] = ptr[mrank];
2580:   }
2581:   MPI_Win_free(&sharedoffsetwin);

2583:   if (mpi3node && !from->sharedspace) {
2584:     /* comput from->notdone to be used by VecScatterEndMPI3Node() */
2585:     PetscInt notdone = 0;
2586:     for (i=0; i<from->msize; i++) {
2587:       if (from->sharedspacesoffset && from->sharedspacesoffset[i] > -1) {
2588:         notdone++;
2589:       }
2590:     }
2591:     from->notdone = notdone;
2592:   }

2594:   from->local.nonmatching_computed = PETSC_FALSE;
2595:   from->local.n_nonmatching        = 0;
2596:   from->local.slots_nonmatching    = 0;
2597:   to->local.nonmatching_computed   = PETSC_FALSE;
2598:   to->local.n_nonmatching          = 0;
2599:   to->local.slots_nonmatching      = 0;

2601:   from->format = VEC_SCATTER_MPI_GENERAL;
2602:   to->format   = VEC_SCATTER_MPI_GENERAL;
2603:   from->bs     = bs;
2604:   to->bs       = bs;

2606:   VecScatterCreateCommon_PtoS_MPI3(from,to,ctx);
2607:   return(0);
2608: }

2610: /*
2611:    bs indicates how many elements there are in each block. Normally this would be 1.
2612: */
2613: PetscErrorCode VecScatterCreateCommon_PtoS_MPI3(VecScatter_MPI_General *from,VecScatter_MPI_General *to,VecScatter ctx)
2614: {
2615:   MPI_Comm       comm;
2616:   PetscMPIInt    tag  = ((PetscObject)ctx)->tag, tagr;
2617:   PetscInt       bs   = to->bs;
2618:   PetscMPIInt    size;
2619:   PetscInt       i, n;
2621:   VecScatterType type;
2622:   PetscBool      mpi3node;

2625:   PetscObjectGetComm((PetscObject)ctx,&comm);
2626:   PetscObjectGetNewTag((PetscObject)ctx,&tagr);
2627:   ctx->ops->destroy = VecScatterDestroy_PtoP_MPI3;

2629:   ctx->reproduce = PETSC_FALSE;
2630:   to->sendfirst  = PETSC_FALSE;
2631:   PetscOptionsGetBool(NULL,NULL,"-vecscatter_reproduce",&ctx->reproduce,NULL);
2632:   PetscOptionsGetBool(NULL,NULL,"-vecscatter_sendfirst",&to->sendfirst,NULL);
2633:   from->sendfirst = to->sendfirst;

2635:   MPI_Comm_size(comm,&size);
2636:   /* check if the receives are ALL going into contiguous locations; if so can skip indexing */
2637:   to->contiq = PETSC_FALSE;
2638:   n = from->starts[from->n];
2639:   from->contiq = PETSC_TRUE;
2640:   for (i=1; i<n; i++) {
2641:     if (from->indices[i] != from->indices[i-1] + bs) {
2642:       from->contiq = PETSC_FALSE;
2643:       break;
2644:     }
2645:   }

2647:   to->use_alltoallv = PETSC_FALSE;
2648:   PetscOptionsGetBool(NULL,NULL,"-vecscatter_alltoall",&to->use_alltoallv,NULL);
2649:   from->use_alltoallv = to->use_alltoallv;
2650:   if (from->use_alltoallv) PetscInfo(ctx,"Using MPI_Alltoallv() for scatter\n");
2651: #if defined(PETSC_HAVE_MPI_ALLTOALLW)  && !defined(PETSC_USE_64BIT_INDICES)
2652:   if (to->use_alltoallv) {
2653:     to->use_alltoallw = PETSC_FALSE;
2654:     PetscOptionsGetBool(NULL,NULL,"-vecscatter_nopack",&to->use_alltoallw,NULL);
2655:   }
2656:   from->use_alltoallw = to->use_alltoallw;
2657:   if (from->use_alltoallw) PetscInfo(ctx,"Using MPI_Alltoallw() for scatter\n");
2658: #endif

2660:   to->use_window = PETSC_FALSE;
2661:   PetscOptionsGetBool(NULL,NULL,"-vecscatter_window",&to->use_window,NULL);
2662:   from->use_window = to->use_window;

2664:   if (to->use_alltoallv) {
2665:     PetscMalloc2(size,&to->counts,size,&to->displs);
2666:     PetscMemzero(to->counts,size*sizeof(PetscMPIInt));
2667:     for (i=0; i<to->n; i++) to->counts[to->procs[i]] = bs*(to->starts[i+1] - to->starts[i]);

2669:     to->displs[0] = 0;
2670:     for (i=1; i<size; i++) to->displs[i] = to->displs[i-1] + to->counts[i-1];

2672:     PetscMalloc2(size,&from->counts,size,&from->displs);
2673:     PetscMemzero(from->counts,size*sizeof(PetscMPIInt));
2674:     for (i=0; i<from->n; i++) from->counts[from->procs[i]] = bs*(from->starts[i+1] - from->starts[i]);
2675:     from->displs[0] = 0;
2676:     for (i=1; i<size; i++) from->displs[i] = from->displs[i-1] + from->counts[i-1];

2678: #if defined(PETSC_HAVE_MPI_ALLTOALLW) && !defined(PETSC_USE_64BIT_INDICES)
2679:     if (to->use_alltoallw) {
2680:       PetscMPIInt mpibs, mpilen;

2682:       ctx->packtogether = PETSC_FALSE;
2683:       PetscMPIIntCast(bs,&mpibs);
2684:       PetscMalloc3(size,&to->wcounts,size,&to->wdispls,size,&to->types);
2685:       PetscMemzero(to->wcounts,size*sizeof(PetscMPIInt));
2686:       PetscMemzero(to->wdispls,size*sizeof(PetscMPIInt));
2687:       for (i=0; i<size; i++) to->types[i] = MPIU_SCALAR;

2689:       for (i=0; i<to->n; i++) {
2690:         to->wcounts[to->procs[i]] = 1;
2691:         PetscMPIIntCast(to->starts[i+1]-to->starts[i],&mpilen);
2692:         MPI_Type_create_indexed_block(mpilen,mpibs,to->indices+to->starts[i],MPIU_SCALAR,to->types+to->procs[i]);
2693:         MPI_Type_commit(to->types+to->procs[i]);
2694:       }
2695:       PetscMalloc3(size,&from->wcounts,size,&from->wdispls,size,&from->types);
2696:       PetscMemzero(from->wcounts,size*sizeof(PetscMPIInt));
2697:       PetscMemzero(from->wdispls,size*sizeof(PetscMPIInt));
2698:       for (i=0; i<size; i++) from->types[i] = MPIU_SCALAR;

2700:       if (from->contiq) {
2701:         PetscInfo(ctx,"Scattered vector entries are stored contiguously, taking advantage of this with -vecscatter_alltoall\n");
2702:         for (i=0; i<from->n; i++) from->wcounts[from->procs[i]] = bs*(from->starts[i+1] - from->starts[i]);

2704:         if (from->n) from->wdispls[from->procs[0]] = sizeof(PetscScalar)*from->indices[0];
2705:         for (i=1; i<from->n; i++) from->wdispls[from->procs[i]] = from->wdispls[from->procs[i-1]] + sizeof(PetscScalar)*from->wcounts[from->procs[i-1]];

2707:       } else {
2708:         for (i=0; i<from->n; i++) {
2709:           from->wcounts[from->procs[i]] = 1;
2710:           PetscMPIIntCast(from->starts[i+1]-from->starts[i],&mpilen);
2711:           MPI_Type_create_indexed_block(mpilen,mpibs,from->indices+from->starts[i],MPIU_SCALAR,from->types+from->procs[i]);
2712:           MPI_Type_commit(from->types+from->procs[i]);
2713:         }
2714:       }
2715:     } else ctx->ops->copy = VecScatterCopy_PtoP_AllToAll;

2717: #else
2718:     to->use_alltoallw   = PETSC_FALSE;
2719:     from->use_alltoallw = PETSC_FALSE;
2720:     ctx->ops->copy      = VecScatterCopy_PtoP_AllToAll;
2721: #endif
2722:   } else if (to->use_window) {
2723:     PetscMPIInt temptag,winsize;
2724:     PetscInt    iwinsize = 0;
2725:     MPI_Request *request;
2726:     MPI_Status  *status;

2728:     PetscObjectGetNewTag((PetscObject)ctx,&temptag);
2729:     PetscIntMultError((to->n ? to->starts[to->n] : 0),bs*sizeof(PetscScalar),&iwinsize);
2730:     PetscMPIIntCast(iwinsize,&winsize);
2731:     MPI_Win_create(to->values ? to->values : MPI_BOTTOM,winsize,sizeof(PetscScalar),MPI_INFO_NULL,comm,&to->window);
2732:     PetscMalloc1(to->n,&to->winstarts);
2733:     PetscMalloc2(to->n,&request,to->n,&status);
2734:     for (i=0; i<to->n; i++) {
2735:       MPI_Irecv(to->winstarts+i,1,MPIU_INT,to->procs[i],temptag,comm,request+i);
2736:     }
2737:     for (i=0; i<from->n; i++) {
2738:       MPI_Send(from->starts+i,1,MPIU_INT,from->procs[i],temptag,comm);
2739:     }
2740:     MPI_Waitall(to->n,request,status);
2741:     PetscFree2(request,status);

2743:     PetscIntMultError((from->n ? from->starts[from->n] : 0),bs*sizeof(PetscScalar),&iwinsize);
2744:     PetscMPIIntCast(iwinsize,&winsize);
2745:     MPI_Win_create(from->values ? from->values : MPI_BOTTOM,winsize,sizeof(PetscScalar),MPI_INFO_NULL,comm,&from->window);
2746:     PetscMalloc1(from->n,&from->winstarts);
2747:     PetscMalloc2(from->n,&request,from->n,&status);
2748:     for (i=0; i<from->n; i++) {
2749:       MPI_Irecv(from->winstarts+i,1,MPIU_INT,from->procs[i],temptag,comm,request+i);
2750:     }
2751:     for (i=0; i<to->n; i++) {
2752:       MPI_Send(to->starts+i,1,MPIU_INT,to->procs[i],temptag,comm);
2753:     }
2754:     MPI_Waitall(from->n,request,status);
2755:     PetscFree2(request,status);
2756:   } else {
2757:     PetscBool   use_rsend = PETSC_FALSE, use_ssend = PETSC_FALSE;
2758:     PetscInt    *sstarts  = to->starts,  *rstarts = from->starts;
2759:     PetscMPIInt *sprocs   = to->procs,   *rprocs  = from->procs;
2760:     MPI_Request *swaits   = to->requests,*rwaits  = from->requests;
2761:     MPI_Request *rev_swaits,*rev_rwaits;
2762:     PetscScalar *Ssvalues = to->values, *Srvalues = from->values;

2764:     /* allocate additional wait variables for the "reverse" scatter */
2765:     PetscMalloc1(to->n,&rev_rwaits);
2766:     PetscMalloc1(from->n,&rev_swaits);
2767:     to->rev_requests   = rev_rwaits;
2768:     from->rev_requests = rev_swaits;

2770:     /* Register the receives that you will use later (sends for scatter reverse) */
2771:     PetscOptionsGetBool(NULL,NULL,"-vecscatter_rsend",&use_rsend,NULL);
2772:     PetscOptionsGetBool(NULL,NULL,"-vecscatter_ssend",&use_ssend,NULL);
2773:     if (use_rsend) {
2774:       PetscInfo(ctx,"Using VecScatter ready receiver mode\n");
2775:       to->use_readyreceiver   = PETSC_TRUE;
2776:       from->use_readyreceiver = PETSC_TRUE;
2777:     } else {
2778:       to->use_readyreceiver   = PETSC_FALSE;
2779:       from->use_readyreceiver = PETSC_FALSE;
2780:     }
2781:     if (use_ssend) {
2782:       PetscInfo(ctx,"Using VecScatter Ssend mode\n");
2783:     }

2785:     for (i=0; i<from->n; i++) {
2786:       if (use_rsend) {
2787:         MPI_Rsend_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tagr,comm,rev_swaits+i);
2788:       } else if (use_ssend) {
2789:         MPI_Ssend_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tagr,comm,rev_swaits+i);
2790:       } else {
2791:         MPI_Send_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tagr,comm,rev_swaits+i);
2792:       }
2793:     }

2795:     for (i=0; i<to->n; i++) {
2796:       if (use_rsend) {
2797:         MPI_Rsend_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
2798:       } else if (use_ssend) {
2799:         MPI_Ssend_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
2800:       } else {
2801:         MPI_Send_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
2802:       }
2803:     }
2804:     /* Register receives for scatter and reverse */
2805:     for (i=0; i<from->n; i++) {
2806:       MPI_Recv_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);
2807:     }
2808:     for (i=0; i<to->n; i++) {
2809:       MPI_Recv_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tagr,comm,rev_rwaits+i);
2810:     }
2811:     if (use_rsend) {
2812:       if (to->n)   {MPI_Startall_irecv(to->starts[to->n]*to->bs,to->n,to->rev_requests);}
2813:       if (from->n) {MPI_Startall_irecv(from->starts[from->n]*from->bs,from->n,from->requests);}
2814:       MPI_Barrier(comm);
2815:     }
2816:     ctx->ops->copy = VecScatterCopy_PtoP_X;
2817:   }
2818:   PetscInfo1(ctx,"Using blocksize %D scatter\n",bs);

2820: #if defined(PETSC_USE_DEBUG)
2821:   MPIU_Allreduce(&bs,&i,1,MPIU_INT,MPI_MIN,PetscObjectComm((PetscObject)ctx));
2822:   MPIU_Allreduce(&bs,&n,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)ctx));
2823:   if (bs!=i || bs!=n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Blocks size %D != %D or %D",bs,i,n);
2824: #endif

2826:   VecScatterGetType(ctx,&type);
2827:   PetscStrcmp(type,"mpi3node",&mpi3node);

2829:   if (mpi3node) {
2830:     switch (bs) {
2831:     case 12:
2832:       ctx->ops->begin = VecScatterBeginMPI3Node_12;
2833:       ctx->ops->end   = VecScatterEndMPI3Node_12;
2834:       break;
2835:     case 11:
2836:       ctx->ops->begin = VecScatterBeginMPI3Node_11;
2837:       ctx->ops->end   = VecScatterEndMPI3Node_11;
2838:       break;
2839:     case 10:
2840:       ctx->ops->begin = VecScatterBeginMPI3Node_10;
2841:       ctx->ops->end   = VecScatterEndMPI3Node_10;
2842:       break;
2843:     case 9:
2844:       ctx->ops->begin = VecScatterBeginMPI3Node_9;
2845:       ctx->ops->end   = VecScatterEndMPI3Node_9;
2846:       break;
2847:     case 8:
2848:       ctx->ops->begin = VecScatterBeginMPI3Node_8;
2849:       ctx->ops->end   = VecScatterEndMPI3Node_8;
2850:       break;
2851:     case 7:
2852:       ctx->ops->begin = VecScatterBeginMPI3Node_7;
2853:       ctx->ops->end   = VecScatterEndMPI3Node_7;
2854:       break;
2855:     case 6:
2856:       ctx->ops->begin = VecScatterBeginMPI3Node_6;
2857:       ctx->ops->end   = VecScatterEndMPI3Node_6;
2858:       break;
2859:     case 5:
2860:       ctx->ops->begin = VecScatterBeginMPI3Node_5;
2861:       ctx->ops->end   = VecScatterEndMPI3Node_5;
2862:       break;
2863:     case 4:
2864:       ctx->ops->begin = VecScatterBeginMPI3Node_4;
2865:       ctx->ops->end   = VecScatterEndMPI3Node_4;
2866:       break;
2867:     case 3:
2868:       ctx->ops->begin = VecScatterBeginMPI3Node_3;
2869:       ctx->ops->end   = VecScatterEndMPI3Node_3;
2870:       break;
2871:     case 2:
2872:       ctx->ops->begin = VecScatterBeginMPI3Node_2;
2873:       ctx->ops->end   = VecScatterEndMPI3Node_2;
2874:       break;
2875:     case 1:
2876:       ctx->ops->begin = VecScatterBeginMPI3Node_1;
2877:       ctx->ops->end   = VecScatterEndMPI3Node_1;
2878:       break;
2879:     default:
2880:       ctx->ops->begin = VecScatterBegin_bs;
2881:       ctx->ops->end   = VecScatterEnd_bs;
2882:     }
2883:   } else { /* !mpi3node */

2885:     switch (bs) {
2886:     case 12:
2887:       ctx->ops->begin = VecScatterBegin_12;
2888:       ctx->ops->end   = VecScatterEnd_12;
2889:       break;
2890:     case 11:
2891:       ctx->ops->begin = VecScatterBegin_11;
2892:       ctx->ops->end   = VecScatterEnd_11;
2893:       break;
2894:     case 10:
2895:       ctx->ops->begin = VecScatterBegin_10;
2896:       ctx->ops->end   = VecScatterEnd_10;
2897:       break;
2898:     case 9:
2899:       ctx->ops->begin = VecScatterBegin_9;
2900:       ctx->ops->end   = VecScatterEnd_9;
2901:       break;
2902:     case 8:
2903:       ctx->ops->begin = VecScatterBegin_8;
2904:       ctx->ops->end   = VecScatterEnd_8;
2905:       break;
2906:     case 7:
2907:       ctx->ops->begin = VecScatterBegin_7;
2908:       ctx->ops->end   = VecScatterEnd_7;
2909:       break;
2910:     case 6:
2911:       ctx->ops->begin = VecScatterBegin_6;
2912:       ctx->ops->end   = VecScatterEnd_6;
2913:       break;
2914:     case 5:
2915:       ctx->ops->begin = VecScatterBegin_5;
2916:       ctx->ops->end   = VecScatterEnd_5;
2917:       break;
2918:     case 4:
2919:       ctx->ops->begin = VecScatterBegin_4;
2920:       ctx->ops->end   = VecScatterEnd_4;
2921:       break;
2922:     case 3:
2923:       ctx->ops->begin = VecScatterBegin_3;
2924:       ctx->ops->end   = VecScatterEnd_3;
2925:       break;
2926:     case 2:
2927:       ctx->ops->begin = VecScatterBegin_2;
2928:       ctx->ops->end   = VecScatterEnd_2;
2929:       break;
2930:     case 1:
2931:       if (mpi3node) {
2932:         ctx->ops->begin = VecScatterBeginMPI3Node_1;
2933:         ctx->ops->end   = VecScatterEndMPI3Node_1;
2934:       } else {
2935:         ctx->ops->begin = VecScatterBegin_1;
2936:         ctx->ops->end   = VecScatterEnd_1;
2937:       }
2938:       break;
2939:     default:
2940:       ctx->ops->begin = VecScatterBegin_bs;
2941:       ctx->ops->end   = VecScatterEnd_bs;
2942:     }
2943:   }

2945:   ctx->ops->view = VecScatterView_MPI;

2947:   /* Check if the local scatter is actually a copy; important special case */
2948:   if (to->local.n) {
2949:     VecScatterLocalOptimizeCopy_Private(ctx,&to->local,&from->local,bs);
2950:   }
2951:   return(0);
2952: }

2954: /* ------------------------------------------------------------------------------------*/
2955: /*
2956:          Scatter from local Seq vectors to a parallel vector.
2957:          Reverses the order of the arguments, calls VecScatterCreateLocal_PtoS() then
2958:          reverses the result.
2959: */
2960: PetscErrorCode VecScatterCreateLocal_StoP_MPI3(PetscInt nx,const PetscInt *inidx,PetscInt ny,const PetscInt *inidy,Vec xin,Vec yin,PetscInt bs,VecScatter ctx)
2961: {
2962:   PetscErrorCode         ierr;
2963:   MPI_Request            *waits;
2964:   VecScatter_MPI_General *to,*from;

2967:   VecScatterCreateLocal_PtoS_MPI3(ny,inidy,nx,inidx,yin,xin,bs,ctx);
2968:   to            = (VecScatter_MPI_General*)ctx->fromdata;
2969:   from          = (VecScatter_MPI_General*)ctx->todata;
2970:   ctx->todata   = (void*)to;
2971:   ctx->fromdata = (void*)from;
2972:   /* these two are special, they are ALWAYS stored in to struct */
2973:   to->sstatus   = from->sstatus;
2974:   to->rstatus   = from->rstatus;

2976:   from->sstatus = 0;
2977:   from->rstatus = 0;
2978:   waits              = from->rev_requests;
2979:   from->rev_requests = from->requests;
2980:   from->requests     = waits;
2981:   waits              = to->rev_requests;
2982:   to->rev_requests   = to->requests;
2983:   to->requests       = waits;
2984:   return(0);
2985: }

2987: /* ---------------------------------------------------------------------------------*/
2988: PetscErrorCode VecScatterCreateLocal_PtoP_MPI3(PetscInt nx,const PetscInt *inidx,PetscInt ny,const PetscInt *inidy,Vec xin,Vec yin,PetscInt bs,VecScatter ctx)
2989: {
2991:   PetscMPIInt    size,rank,tag,imdex,n;
2992:   PetscInt       *owners = xin->map->range;
2993:   PetscMPIInt    *nprocs = NULL;
2994:   PetscInt       i,j,idx,nsends,*local_inidx = NULL,*local_inidy = NULL;
2995:   PetscMPIInt    *owner   = NULL;
2996:   PetscInt       *starts  = NULL,count,slen;
2997:   PetscInt       *rvalues = NULL,*svalues = NULL,base,*values = NULL,*rsvalues,recvtotal,lastidx;
2998:   PetscMPIInt    *onodes1,*olengths1,nrecvs;
2999:   MPI_Comm       comm;
3000:   MPI_Request    *send_waits = NULL,*recv_waits = NULL;
3001:   MPI_Status     recv_status,*send_status = NULL;
3002:   PetscBool      duplicate = PETSC_FALSE;
3003: #if defined(PETSC_USE_DEBUG)
3004:   PetscBool      found = PETSC_FALSE;
3005: #endif

3008:   PetscObjectGetNewTag((PetscObject)ctx,&tag);
3009:   PetscObjectGetComm((PetscObject)xin,&comm);
3010:   MPI_Comm_size(comm,&size);
3011:   MPI_Comm_rank(comm,&rank);
3012:   if (size == 1) {
3013:     VecScatterCreateLocal_StoP_MPI3(nx,inidx,ny,inidy,xin,yin,bs,ctx);
3014:     return(0);
3015:   }

3017:   /*
3018:      Each processor ships off its inidx[j] and inidy[j] to the appropriate processor
3019:      They then call the StoPScatterCreate()
3020:   */
3021:   /*  first count number of contributors to each processor */
3022:   PetscMalloc3(size,&nprocs,nx,&owner,(size+1),&starts);
3023:   PetscMemzero(nprocs,size*sizeof(PetscMPIInt));

3025:   lastidx = -1;
3026:   j       = 0;
3027:   for (i=0; i<nx; i++) {
3028:     /* if indices are NOT locally sorted, need to start search at the beginning */
3029:     if (lastidx > (idx = bs*inidx[i])) j = 0;
3030:     lastidx = idx;
3031:     for (; j<size; j++) {
3032:       if (idx >= owners[j] && idx < owners[j+1]) {
3033:         nprocs[j]++;
3034:         owner[i] = j;
3035: #if defined(PETSC_USE_DEBUG)
3036:         found = PETSC_TRUE;
3037: #endif
3038:         break;
3039:       }
3040:     }
3041: #if defined(PETSC_USE_DEBUG)
3042:     if (!found) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index %D out of range",idx);
3043:     found = PETSC_FALSE;
3044: #endif
3045:   }
3046:   nsends = 0;
3047:   for (i=0; i<size; i++) nsends += (nprocs[i] > 0);

3049:   /* inform other processors of number of messages and max length*/
3050:   PetscGatherNumberOfMessages(comm,NULL,nprocs,&nrecvs);
3051:   PetscGatherMessageLengths(comm,nsends,nrecvs,nprocs,&onodes1,&olengths1);
3052:   PetscSortMPIIntWithArray(nrecvs,onodes1,olengths1);
3053:   recvtotal = 0;
3054:   for (i=0; i<nrecvs; i++) {
3055:     PetscIntSumError(recvtotal,olengths1[i],&recvtotal);
3056:   }

3058:   /* post receives:   */
3059:   PetscMalloc5(2*recvtotal,&rvalues,2*nx,&svalues,nrecvs,&recv_waits,nsends,&send_waits,nsends,&send_status);

3061:   count = 0;
3062:   for (i=0; i<nrecvs; i++) {
3063:     MPI_Irecv((rvalues+2*count),2*olengths1[i],MPIU_INT,onodes1[i],tag,comm,recv_waits+i);
3064:     PetscIntSumError(count,olengths1[i],&count);
3065:   }
3066:   PetscFree(onodes1);

3068:   /* do sends:
3069:       1) starts[i] gives the starting index in svalues for stuff going to
3070:          the ith processor
3071:   */
3072:   starts[0]= 0;
3073:   for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[i-1];
3074:   for (i=0; i<nx; i++) {
3075:     svalues[2*starts[owner[i]]]       = bs*inidx[i];
3076:     svalues[1 + 2*starts[owner[i]]++] = bs*inidy[i];
3077:   }

3079:   starts[0] = 0;
3080:   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[i-1];
3081:   count = 0;
3082:   for (i=0; i<size; i++) {
3083:     if (nprocs[i]) {
3084:       MPI_Isend(svalues+2*starts[i],2*nprocs[i],MPIU_INT,i,tag,comm,send_waits+count);
3085:       count++;
3086:     }
3087:   }
3088:   PetscFree3(nprocs,owner,starts);

3090:   /*  wait on receives */
3091:   count = nrecvs;
3092:   slen  = 0;
3093:   while (count) {
3094:     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
3095:     /* unpack receives into our local space */
3096:     MPI_Get_count(&recv_status,MPIU_INT,&n);
3097:     slen += n/2;
3098:     count--;
3099:   }
3100:   if (slen != recvtotal) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Total message lengths %D not as expected %D",slen,recvtotal);

3102:   PetscMalloc2(slen,&local_inidx,slen,&local_inidy);
3103:   base     = owners[rank];
3104:   count    = 0;
3105:   rsvalues = rvalues;
3106:   for (i=0; i<nrecvs; i++) {
3107:     values    = rsvalues;
3108:     rsvalues += 2*olengths1[i];
3109:     for (j=0; j<olengths1[i]; j++) {
3110:       local_inidx[count]   = values[2*j] - base;
3111:       local_inidy[count++] = values[2*j+1];
3112:     }
3113:   }
3114:   PetscFree(olengths1);

3116:   /* wait on sends */
3117:   if (nsends) {MPI_Waitall(nsends,send_waits,send_status);}
3118:   PetscFree5(rvalues,svalues,recv_waits,send_waits,send_status);

3120:   /*
3121:      should sort and remove duplicates from local_inidx,local_inidy
3122:   */
3123: #if defined(do_it_slow)
3124:   /* sort on the from index */
3125:   PetscSortIntWithArray(slen,local_inidx,local_inidy);
3126:   start = 0;
3127:   while (start < slen) {
3128:     count = start+1;
3129:     last  = local_inidx[start];
3130:     while (count < slen && last == local_inidx[count]) count++;
3131:     if (count > start + 1) { /* found 2 or more same local_inidx[] in a row */
3132:       /* sort on to index */
3133:       PetscSortInt(count-start,local_inidy+start);
3134:     }
3135:     /* remove duplicates; not most efficient way, but probably good enough */
3136:     i = start;
3137:     while (i < count-1) {
3138:       if (local_inidy[i] != local_inidy[i+1]) i++;
3139:       else { /* found a duplicate */
3140:         duplicate = PETSC_TRUE;
3141:         for (j=i; j<slen-1; j++) {
3142:           local_inidx[j] = local_inidx[j+1];
3143:           local_inidy[j] = local_inidy[j+1];
3144:         }
3145:         slen--;
3146:         count--;
3147:       }
3148:     }
3149:     start = count;
3150:   }
3151: #endif
3152:   if (duplicate) {
3153:     PetscInfo(ctx,"Duplicate from to indices passed in VecScatterCreate(), they are ignored\n");
3154:   }
3155:   VecScatterCreateLocal_StoP_MPI3(slen,local_inidx,slen,local_inidy,xin,yin,bs,ctx);
3156:   PetscFree2(local_inidx,local_inidy);
3157:   return(0);
3158: }

3160: #endif