Actual source code: vpscat.c

petsc-3.4.5 2014-06-29
  2: /*
  3:     Defines parallel vector scatters.
  4: */

  6: #include <../src/vec/vec/impls/dvecimpl.h>         /*I "petscvec.h" I*/
  7: #include <../src/vec/vec/impls/mpi/pvecimpl.h>

 11: PetscErrorCode VecScatterView_MPI(VecScatter ctx,PetscViewer viewer)
 12: {
 13:   VecScatter_MPI_General *to  =(VecScatter_MPI_General*)ctx->todata;
 14:   VecScatter_MPI_General *from=(VecScatter_MPI_General*)ctx->fromdata;
 15:   PetscErrorCode         ierr;
 16:   PetscInt               i;
 17:   PetscMPIInt            rank;
 18:   PetscViewerFormat      format;
 19:   PetscBool              iascii;

 22:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
 23:   if (iascii) {
 24:     MPI_Comm_rank(PetscObjectComm((PetscObject)ctx),&rank);
 25:     PetscViewerGetFormat(viewer,&format);
 26:     if (format ==  PETSC_VIEWER_ASCII_INFO) {
 27:       PetscInt nsend_max,nrecv_max,lensend_max,lenrecv_max,alldata,itmp;

 29:       MPI_Reduce(&to->n,&nsend_max,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)ctx));
 30:       MPI_Reduce(&from->n,&nrecv_max,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)ctx));
 31:       itmp = to->starts[to->n+1];
 32:       MPI_Reduce(&itmp,&lensend_max,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)ctx));
 33:       itmp = from->starts[from->n+1];
 34:       MPI_Reduce(&itmp,&lenrecv_max,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)ctx));
 35:       MPI_Reduce(&itmp,&alldata,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)ctx));

 37:       PetscViewerASCIIPrintf(viewer,"VecScatter statistics\n");
 38:       PetscViewerASCIIPrintf(viewer,"  Maximum number sends %D\n",nsend_max);
 39:       PetscViewerASCIIPrintf(viewer,"  Maximum number receives %D\n",nrecv_max);
 40:       PetscViewerASCIIPrintf(viewer,"  Maximum data sent %D\n",(int)(lensend_max*to->bs*sizeof(PetscScalar)));
 41:       PetscViewerASCIIPrintf(viewer,"  Maximum data received %D\n",(int)(lenrecv_max*to->bs*sizeof(PetscScalar)));
 42:       PetscViewerASCIIPrintf(viewer,"  Total data sent %D\n",(int)(alldata*to->bs*sizeof(PetscScalar)));

 44:     } else {
 45:       PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);
 46:       PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number sends = %D; Number to self = %D\n",rank,to->n,to->local.n);
 47:       if (to->n) {
 48:         for (i=0; i<to->n; i++) {
 49:           PetscViewerASCIISynchronizedPrintf(viewer,"[%d]   %D length = %D to whom %D\n",rank,i,to->starts[i+1]-to->starts[i],to->procs[i]);
 50:         }
 51:         PetscViewerASCIISynchronizedPrintf(viewer,"Now the indices for all remote sends (in order by process sent to)\n");
 52:         for (i=0; i<to->starts[to->n]; i++) {
 53:           PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D \n",rank,to->indices[i]);
 54:         }
 55:       }

 57:       PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number receives = %D; Number from self = %D\n",rank,from->n,from->local.n);
 58:       if (from->n) {
 59:         for (i=0; i<from->n; i++) {
 60:           PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D length %D from whom %D\n",rank,i,from->starts[i+1]-from->starts[i],from->procs[i]);
 61:         }

 63:         PetscViewerASCIISynchronizedPrintf(viewer,"Now the indices for all remote receives (in order by process received from)\n");
 64:         for (i=0; i<from->starts[from->n]; i++) {
 65:           PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D \n",rank,from->indices[i]);
 66:         }
 67:       }
 68:       if (to->local.n) {
 69:         PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Indices for local part of scatter\n",rank);
 70:         for (i=0; i<to->local.n; i++) {
 71:           PetscViewerASCIISynchronizedPrintf(viewer,"[%d] From %D to %D \n",rank,from->local.vslots[i],to->local.vslots[i]);
 72:         }
 73:       }

 75:       PetscViewerFlush(viewer);
 76:       PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);
 77:     }
 78:   }
 79:   return(0);
 80: }

 82: /* -----------------------------------------------------------------------------------*/
 83: /*
 84:       The next routine determines what part of  the local part of the scatter is an
 85:   exact copy of values into their current location. We check this here and
 86:   then know that we need not perform that portion of the scatter when the vector is
 87:   scattering to itself with INSERT_VALUES.

 89:      This is currently not used but would speed up, for example DMDALocalToLocalBegin/End()

 91: */
 94: PetscErrorCode VecScatterLocalOptimize_Private(VecScatter scatter,VecScatter_Seq_General *to,VecScatter_Seq_General *from)
 95: {
 96:   PetscInt       n = to->n,n_nonmatching = 0,i,*to_slots = to->vslots,*from_slots = from->vslots;
 98:   PetscInt       *nto_slots,*nfrom_slots,j = 0;

101:   for (i=0; i<n; i++) {
102:     if (to_slots[i] != from_slots[i]) n_nonmatching++;
103:   }

105:   if (!n_nonmatching) {
106:     to->nonmatching_computed = PETSC_TRUE;
107:     to->n_nonmatching        = from->n_nonmatching = 0;

109:     PetscInfo1(scatter,"Reduced %D to 0\n", n);
110:   } else if (n_nonmatching == n) {
111:     to->nonmatching_computed = PETSC_FALSE;

113:     PetscInfo(scatter,"All values non-matching\n");
114:   } else {
115:     to->nonmatching_computed= PETSC_TRUE;
116:     to->n_nonmatching       = from->n_nonmatching = n_nonmatching;

118:     PetscMalloc(n_nonmatching*sizeof(PetscInt),&nto_slots);
119:     PetscMalloc(n_nonmatching*sizeof(PetscInt),&nfrom_slots);

121:     to->slots_nonmatching   = nto_slots;
122:     from->slots_nonmatching = nfrom_slots;
123:     for (i=0; i<n; i++) {
124:       if (to_slots[i] != from_slots[i]) {
125:         nto_slots[j]   = to_slots[i];
126:         nfrom_slots[j] = from_slots[i];
127:         j++;
128:       }
129:     }
130:     PetscInfo2(scatter,"Reduced %D to %D\n",n,n_nonmatching);
131:   }
132:   return(0);
133: }

135: /* --------------------------------------------------------------------------------------*/

137: /* -------------------------------------------------------------------------------------*/
140: PetscErrorCode VecScatterDestroy_PtoP(VecScatter ctx)
141: {
142:   VecScatter_MPI_General *to   = (VecScatter_MPI_General*)ctx->todata;
143:   VecScatter_MPI_General *from = (VecScatter_MPI_General*)ctx->fromdata;
144:   PetscErrorCode         ierr;
145:   PetscInt               i;

148:   if (to->use_readyreceiver) {
149:     /*
150:        Since we have already posted sends we must cancel them before freeing
151:        the requests
152:     */
153:     for (i=0; i<from->n; i++) {
154:       MPI_Cancel(from->requests+i);
155:     }
156:     for (i=0; i<to->n; i++) {
157:       MPI_Cancel(to->rev_requests+i);
158:     }
159:     MPI_Waitall(from->n,from->requests,to->rstatus);
160:     MPI_Waitall(to->n,to->rev_requests,to->rstatus);
161:   }

163: #if defined(PETSC_HAVE_MPI_ALLTOALLW) && !defined(PETSC_USE_64BIT_INDICES)
164:   if (to->use_alltoallw) {
165:     PetscFree3(to->wcounts,to->wdispls,to->types);
166:     PetscFree3(from->wcounts,from->wdispls,from->types);
167:   }
168: #endif

170: #if defined(PETSC_HAVE_MPI_WIN_CREATE)
171:   if (to->use_window) {
172:     MPI_Win_free(&from->window);
173:     MPI_Win_free(&to->window);
174:     PetscFree(from->winstarts);
175:     PetscFree(to->winstarts);
176:   }
177: #endif

179:   if (to->use_alltoallv) {
180:     PetscFree2(to->counts,to->displs);
181:     PetscFree2(from->counts,from->displs);
182:   }

184:   /* release MPI resources obtained with MPI_Send_init() and MPI_Recv_init() */
185:   /*
186:      IBM's PE version of MPI has a bug where freeing these guys will screw up later
187:      message passing.
188:   */
189: #if !defined(PETSC_HAVE_BROKEN_REQUEST_FREE)
190:   if (!to->use_alltoallv && !to->use_window) {   /* currently the to->requests etc are ALWAYS allocated even if not used */
191:     if (to->requests) {
192:       for (i=0; i<to->n; i++) {
193:         MPI_Request_free(to->requests + i);
194:       }
195:     }
196:     if (to->rev_requests) {
197:       for (i=0; i<to->n; i++) {
198:         MPI_Request_free(to->rev_requests + i);
199:       }
200:     }
201:   }
202:   /*
203:       MPICH could not properly cancel requests thus with ready receiver mode we
204:     cannot free the requests. It may be fixed now, if not then put the following
205:     code inside a if (!to->use_readyreceiver) {
206:   */
207:   if (!to->use_alltoallv && !to->use_window) {    /* currently the from->requests etc are ALWAYS allocated even if not used */
208:     if (from->requests) {
209:       for (i=0; i<from->n; i++) {
210:         MPI_Request_free(from->requests + i);
211:       }
212:     }

214:     if (from->rev_requests) {
215:       for (i=0; i<from->n; i++) {
216:         MPI_Request_free(from->rev_requests + i);
217:       }
218:     }
219:   }
220: #endif

222:   PetscFree(to->local.vslots);
223:   PetscFree(from->local.vslots);
224:   PetscFree2(to->counts,to->displs);
225:   PetscFree2(from->counts,from->displs);
226:   PetscFree(to->local.slots_nonmatching);
227:   PetscFree(from->local.slots_nonmatching);
228:   PetscFree(to->rev_requests);
229:   PetscFree(from->rev_requests);
230:   PetscFree(to->requests);
231:   PetscFree(from->requests);
232:   PetscFree4(to->values,to->indices,to->starts,to->procs);
233:   PetscFree2(to->sstatus,to->rstatus);
234:   PetscFree4(from->values,from->indices,from->starts,from->procs);
235:   PetscFree(from);
236:   PetscFree(to);
237: #if defined(PETSC_HAVE_CUSP)
238:   PetscCUSPIndicesDestroy((PetscCUSPIndices*)&ctx->spptr);
239: #endif
240:   return(0);
241: }



245: /* --------------------------------------------------------------------------------------*/
246: /*
247:     Special optimization to see if the local part of the scatter is actually
248:     a copy. The scatter routines call PetscMemcpy() instead.

250: */
253: PetscErrorCode VecScatterLocalOptimizeCopy_Private(VecScatter scatter,VecScatter_Seq_General *to,VecScatter_Seq_General *from,PetscInt bs)
254: {
255:   PetscInt       n = to->n,i,*to_slots = to->vslots,*from_slots = from->vslots;
256:   PetscInt       to_start,from_start;

260:   to_start   = to_slots[0];
261:   from_start = from_slots[0];

263:   for (i=1; i<n; i++) {
264:     to_start   += bs;
265:     from_start += bs;
266:     if (to_slots[i]   != to_start)   return(0);
267:     if (from_slots[i] != from_start) return(0);
268:   }
269:   to->is_copy       = PETSC_TRUE;
270:   to->copy_start    = to_slots[0];
271:   to->copy_length   = bs*sizeof(PetscScalar)*n;
272:   from->is_copy     = PETSC_TRUE;
273:   from->copy_start  = from_slots[0];
274:   from->copy_length = bs*sizeof(PetscScalar)*n;
275:   PetscInfo(scatter,"Local scatter is a copy, optimizing for it\n");
276:   return(0);
277: }

279: /* --------------------------------------------------------------------------------------*/

283: PetscErrorCode VecScatterCopy_PtoP_X(VecScatter in,VecScatter out)
284: {
285:   VecScatter_MPI_General *in_to   = (VecScatter_MPI_General*)in->todata;
286:   VecScatter_MPI_General *in_from = (VecScatter_MPI_General*)in->fromdata,*out_to,*out_from;
287:   PetscErrorCode         ierr;
288:   PetscInt               ny,bs = in_from->bs;

291:   out->begin   = in->begin;
292:   out->end     = in->end;
293:   out->copy    = in->copy;
294:   out->destroy = in->destroy;
295:   out->view    = in->view;

297:   /* allocate entire send scatter context */
298:   PetscNewLog(out,VecScatter_MPI_General,&out_to);
299:   PetscNewLog(out,VecScatter_MPI_General,&out_from);

301:   ny                = in_to->starts[in_to->n];
302:   out_to->n         = in_to->n;
303:   out_to->type      = in_to->type;
304:   out_to->sendfirst = in_to->sendfirst;

306:   PetscMalloc(out_to->n*sizeof(MPI_Request),&out_to->requests);
307:   PetscMalloc4(bs*ny,PetscScalar,&out_to->values,ny,PetscInt,&out_to->indices,out_to->n+1,PetscInt,&out_to->starts,out_to->n,PetscMPIInt,&out_to->procs);
308:   PetscMalloc2(PetscMax(in_to->n,in_from->n),MPI_Status,&out_to->sstatus,PetscMax(in_to->n,in_from->n),MPI_Status,&out_to->rstatus);
309:   PetscMemcpy(out_to->indices,in_to->indices,ny*sizeof(PetscInt));
310:   PetscMemcpy(out_to->starts,in_to->starts,(out_to->n+1)*sizeof(PetscInt));
311:   PetscMemcpy(out_to->procs,in_to->procs,(out_to->n)*sizeof(PetscMPIInt));

313:   out->todata                        = (void*)out_to;
314:   out_to->local.n                    = in_to->local.n;
315:   out_to->local.nonmatching_computed = PETSC_FALSE;
316:   out_to->local.n_nonmatching        = 0;
317:   out_to->local.slots_nonmatching    = 0;
318:   if (in_to->local.n) {
319:     PetscMalloc(in_to->local.n*sizeof(PetscInt),&out_to->local.vslots);
320:     PetscMalloc(in_from->local.n*sizeof(PetscInt),&out_from->local.vslots);
321:     PetscMemcpy(out_to->local.vslots,in_to->local.vslots,in_to->local.n*sizeof(PetscInt));
322:     PetscMemcpy(out_from->local.vslots,in_from->local.vslots,in_from->local.n*sizeof(PetscInt));
323:   } else {
324:     out_to->local.vslots   = 0;
325:     out_from->local.vslots = 0;
326:   }

328:   /* allocate entire receive context */
329:   out_from->type      = in_from->type;
330:   ny                  = in_from->starts[in_from->n];
331:   out_from->n         = in_from->n;
332:   out_from->sendfirst = in_from->sendfirst;

334:   PetscMalloc(out_from->n*sizeof(MPI_Request),&out_from->requests);
335:   PetscMalloc4(ny*bs,PetscScalar,&out_from->values,ny,PetscInt,&out_from->indices,out_from->n+1,PetscInt,&out_from->starts,out_from->n,PetscMPIInt,&out_from->procs);
336:   PetscMemcpy(out_from->indices,in_from->indices,ny*sizeof(PetscInt));
337:   PetscMemcpy(out_from->starts,in_from->starts,(out_from->n+1)*sizeof(PetscInt));
338:   PetscMemcpy(out_from->procs,in_from->procs,(out_from->n)*sizeof(PetscMPIInt));

340:   out->fromdata                        = (void*)out_from;
341:   out_from->local.n                    = in_from->local.n;
342:   out_from->local.nonmatching_computed = PETSC_FALSE;
343:   out_from->local.n_nonmatching        = 0;
344:   out_from->local.slots_nonmatching    = 0;

346:   /*
347:       set up the request arrays for use with isend_init() and irecv_init()
348:   */
349:   {
350:     PetscMPIInt tag;
351:     MPI_Comm    comm;
352:     PetscInt    *sstarts = out_to->starts,  *rstarts = out_from->starts;
353:     PetscMPIInt *sprocs  = out_to->procs,   *rprocs  = out_from->procs;
354:     PetscInt    i;
355:     PetscBool   flg;
356:     MPI_Request *swaits   = out_to->requests,*rwaits  = out_from->requests;
357:     MPI_Request *rev_swaits,*rev_rwaits;
358:     PetscScalar *Ssvalues = out_to->values, *Srvalues = out_from->values;

360:     PetscMalloc(in_to->n*sizeof(MPI_Request),&out_to->rev_requests);
361:     PetscMalloc(in_from->n*sizeof(MPI_Request),&out_from->rev_requests);

363:     rev_rwaits = out_to->rev_requests;
364:     rev_swaits = out_from->rev_requests;

366:     out_from->bs = out_to->bs = bs;
367:     tag          = ((PetscObject)out)->tag;
368:     PetscObjectGetComm((PetscObject)out,&comm);

370:     /* Register the receives that you will use later (sends for scatter reverse) */
371:     for (i=0; i<out_from->n; i++) {
372:       MPI_Recv_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);
373:       MPI_Send_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tag,comm,rev_swaits+i);
374:     }

376:     flg  = PETSC_FALSE;
377:     PetscOptionsGetBool(NULL,"-vecscatter_rsend",&flg,NULL);
378:     if (flg) {
379:       out_to->use_readyreceiver   = PETSC_TRUE;
380:       out_from->use_readyreceiver = PETSC_TRUE;
381:       for (i=0; i<out_to->n; i++) {
382:         MPI_Rsend_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
383:       }
384:       if (out_from->n) {MPI_Startall_irecv(out_from->starts[out_from->n]*out_from->bs,out_from->n,out_from->requests);}
385:       MPI_Barrier(comm);
386:       PetscInfo(in,"Using VecScatter ready receiver mode\n");
387:     } else {
388:       out_to->use_readyreceiver   = PETSC_FALSE;
389:       out_from->use_readyreceiver = PETSC_FALSE;
390:       flg                         = PETSC_FALSE;
391:       PetscOptionsGetBool(NULL,"-vecscatter_ssend",&flg,NULL);
392:       if (flg) {
393:         PetscInfo(in,"Using VecScatter Ssend mode\n");
394:       }
395:       for (i=0; i<out_to->n; i++) {
396:         if (!flg) {
397:           MPI_Send_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
398:         } else {
399:           MPI_Ssend_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
400:         }
401:       }
402:     }
403:     /* Register receives for scatter reverse */
404:     for (i=0; i<out_to->n; i++) {
405:       MPI_Recv_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,rev_rwaits+i);
406:     }
407:   }
408:   return(0);
409: }

413: PetscErrorCode VecScatterCopy_PtoP_AllToAll(VecScatter in,VecScatter out)
414: {
415:   VecScatter_MPI_General *in_to   = (VecScatter_MPI_General*)in->todata;
416:   VecScatter_MPI_General *in_from = (VecScatter_MPI_General*)in->fromdata,*out_to,*out_from;
417:   PetscErrorCode         ierr;
418:   PetscInt               ny,bs = in_from->bs;
419:   PetscMPIInt            size;

422:   MPI_Comm_size(PetscObjectComm((PetscObject)in),&size);

424:   out->begin     = in->begin;
425:   out->end       = in->end;
426:   out->copy      = in->copy;
427:   out->destroy   = in->destroy;
428:   out->view      = in->view;

430:   /* allocate entire send scatter context */
431:   PetscNewLog(out,VecScatter_MPI_General,&out_to);
432:   PetscNewLog(out,VecScatter_MPI_General,&out_from);

434:   ny                = in_to->starts[in_to->n];
435:   out_to->n         = in_to->n;
436:   out_to->type      = in_to->type;
437:   out_to->sendfirst = in_to->sendfirst;

439:   PetscMalloc(out_to->n*sizeof(MPI_Request),&out_to->requests);
440:   PetscMalloc4(bs*ny,PetscScalar,&out_to->values,ny,PetscInt,&out_to->indices,out_to->n+1,PetscInt,&out_to->starts,out_to->n,PetscMPIInt,&out_to->procs);
441:   PetscMalloc2(PetscMax(in_to->n,in_from->n),MPI_Status,&out_to->sstatus,PetscMax(in_to->n,in_from->n),MPI_Status,&out_to->rstatus);
442:   PetscMemcpy(out_to->indices,in_to->indices,ny*sizeof(PetscInt));
443:   PetscMemcpy(out_to->starts,in_to->starts,(out_to->n+1)*sizeof(PetscInt));
444:   PetscMemcpy(out_to->procs,in_to->procs,(out_to->n)*sizeof(PetscMPIInt));

446:   out->todata                        = (void*)out_to;
447:   out_to->local.n                    = in_to->local.n;
448:   out_to->local.nonmatching_computed = PETSC_FALSE;
449:   out_to->local.n_nonmatching        = 0;
450:   out_to->local.slots_nonmatching    = 0;
451:   if (in_to->local.n) {
452:     PetscMalloc(in_to->local.n*sizeof(PetscInt),&out_to->local.vslots);
453:     PetscMalloc(in_from->local.n*sizeof(PetscInt),&out_from->local.vslots);
454:     PetscMemcpy(out_to->local.vslots,in_to->local.vslots,in_to->local.n*sizeof(PetscInt));
455:     PetscMemcpy(out_from->local.vslots,in_from->local.vslots,in_from->local.n*sizeof(PetscInt));
456:   } else {
457:     out_to->local.vslots   = 0;
458:     out_from->local.vslots = 0;
459:   }

461:   /* allocate entire receive context */
462:   out_from->type      = in_from->type;
463:   ny                  = in_from->starts[in_from->n];
464:   out_from->n         = in_from->n;
465:   out_from->sendfirst = in_from->sendfirst;

467:   PetscMalloc(out_from->n*sizeof(MPI_Request),&out_from->requests);
468:   PetscMalloc4(ny*bs,PetscScalar,&out_from->values,ny,PetscInt,&out_from->indices,out_from->n+1,PetscInt,&out_from->starts,out_from->n,PetscMPIInt,&out_from->procs);
469:   PetscMemcpy(out_from->indices,in_from->indices,ny*sizeof(PetscInt));
470:   PetscMemcpy(out_from->starts,in_from->starts,(out_from->n+1)*sizeof(PetscInt));
471:   PetscMemcpy(out_from->procs,in_from->procs,(out_from->n)*sizeof(PetscMPIInt));

473:   out->fromdata                        = (void*)out_from;
474:   out_from->local.n                    = in_from->local.n;
475:   out_from->local.nonmatching_computed = PETSC_FALSE;
476:   out_from->local.n_nonmatching        = 0;
477:   out_from->local.slots_nonmatching    = 0;

479:   out_to->use_alltoallv = out_from->use_alltoallv = PETSC_TRUE;

481:   PetscMalloc2(size,PetscMPIInt,&out_to->counts,size,PetscMPIInt,&out_to->displs);
482:   PetscMemcpy(out_to->counts,in_to->counts,size*sizeof(PetscMPIInt));
483:   PetscMemcpy(out_to->displs,in_to->displs,size*sizeof(PetscMPIInt));

485:   PetscMalloc2(size,PetscMPIInt,&out_from->counts,size,PetscMPIInt,&out_from->displs);
486:   PetscMemcpy(out_from->counts,in_from->counts,size*sizeof(PetscMPIInt));
487:   PetscMemcpy(out_from->displs,in_from->displs,size*sizeof(PetscMPIInt));
488:   return(0);
489: }
490: /* --------------------------------------------------------------------------------------------------
491:     Packs and unpacks the message data into send or from receive buffers.

493:     These could be generated automatically.

495:     Fortran kernels etc. could be used.
496: */
497: PETSC_STATIC_INLINE void Pack_1(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y)
498: {
499:   PetscInt i;
500:   for (i=0; i<n; i++) y[i] = x[indicesx[i]];
501: }

505: PETSC_STATIC_INLINE PetscErrorCode UnPack_1(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
506: {
507:   PetscInt i;

510:   switch (addv) {
511:   case INSERT_VALUES:
512:   case INSERT_ALL_VALUES:
513:     for (i=0; i<n; i++) y[indicesy[i]] = x[i];
514:     break;
515:   case ADD_VALUES:
516:   case ADD_ALL_VALUES:
517:     for (i=0; i<n; i++) y[indicesy[i]] += x[i];
518:     break;
519: #if !defined(PETSC_USE_COMPLEX)
520:   case MAX_VALUES:
521:     for (i=0; i<n; i++) y[indicesy[i]] = PetscMax(y[indicesy[i]],x[i]);
522: #else
523:   case MAX_VALUES:
524: #endif
525:   case NOT_SET_VALUES:
526:     break;
527:   default:
528:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
529:   }
530:   return(0);
531: }

535: PETSC_STATIC_INLINE PetscErrorCode Scatter_1(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
536: {
537:   PetscInt i;

540:   switch (addv) {
541:   case INSERT_VALUES:
542:   case INSERT_ALL_VALUES:
543:     for (i=0; i<n; i++) y[indicesy[i]] = x[indicesx[i]];
544:     break;
545:   case ADD_VALUES:
546:   case ADD_ALL_VALUES:
547:     for (i=0; i<n; i++) y[indicesy[i]] += x[indicesx[i]];
548:     break;
549: #if !defined(PETSC_USE_COMPLEX)
550:   case MAX_VALUES:
551:     for (i=0; i<n; i++) y[indicesy[i]] = PetscMax(y[indicesy[i]],x[indicesx[i]]);
552: #else
553:   case MAX_VALUES:
554: #endif
555:   case NOT_SET_VALUES:
556:     break;
557:   default:
558:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
559:   }
560:   return(0);
561: }

563: /* ----------------------------------------------------------------------------------------------- */
564: PETSC_STATIC_INLINE void Pack_2(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y)
565: {
566:   PetscInt i,idx;

568:   for (i=0; i<n; i++) {
569:     idx  = *indicesx++;
570:     y[0] = x[idx];
571:     y[1] = x[idx+1];
572:     y   += 2;
573:   }
574: }

578: PETSC_STATIC_INLINE PetscErrorCode UnPack_2(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
579: {
580:   PetscInt i,idy;

583:   switch (addv) {
584:   case INSERT_VALUES:
585:   case INSERT_ALL_VALUES:
586:     for (i=0; i<n; i++) {
587:       idy      = *indicesy++;
588:       y[idy]   = x[0];
589:       y[idy+1] = x[1];
590:       x       += 2;
591:     }
592:     break;
593:   case ADD_VALUES:
594:   case ADD_ALL_VALUES:
595:     for (i=0; i<n; i++) {
596:       idy       = *indicesy++;
597:       y[idy]   += x[0];
598:       y[idy+1] += x[1];
599:       x        += 2;
600:     }
601:     break;
602: #if !defined(PETSC_USE_COMPLEX)
603:   case MAX_VALUES:
604:     for (i=0; i<n; i++) {
605:       idy      = *indicesy++;
606:       y[idy]   = PetscMax(y[idy],x[0]);
607:       y[idy+1] = PetscMax(y[idy+1],x[1]);
608:       x       += 2;
609:     }
610: #else
611:   case MAX_VALUES:
612: #endif
613:   case NOT_SET_VALUES:
614:     break;
615:   default:
616:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
617:   }
618:   return(0);
619: }

623: PETSC_STATIC_INLINE PetscErrorCode Scatter_2(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
624: {
625:   PetscInt i,idx,idy;

628:   switch (addv) {
629:   case INSERT_VALUES:
630:   case INSERT_ALL_VALUES:
631:     for (i=0; i<n; i++) {
632:       idx      = *indicesx++;
633:       idy      = *indicesy++;
634:       y[idy]   = x[idx];
635:       y[idy+1] = x[idx+1];
636:     }
637:     break;
638:   case ADD_VALUES:
639:   case ADD_ALL_VALUES:
640:     for (i=0; i<n; i++) {
641:       idx       = *indicesx++;
642:       idy       = *indicesy++;
643:       y[idy]   += x[idx];
644:       y[idy+1] += x[idx+1];
645:     }
646:     break;
647: #if !defined(PETSC_USE_COMPLEX)
648:   case MAX_VALUES:
649:     for (i=0; i<n; i++) {
650:       idx      = *indicesx++;
651:       idy      = *indicesy++;
652:       y[idy]   = PetscMax(y[idy],x[idx]);
653:       y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
654:     }
655: #else
656:   case MAX_VALUES:
657: #endif
658:   case NOT_SET_VALUES:
659:     break;
660:   default:
661:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
662:   }
663:   return(0);
664: }
665: /* ----------------------------------------------------------------------------------------------- */
666: PETSC_STATIC_INLINE void Pack_3(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y)
667: {
668:   PetscInt i,idx;

670:   for (i=0; i<n; i++) {
671:     idx  = *indicesx++;
672:     y[0] = x[idx];
673:     y[1] = x[idx+1];
674:     y[2] = x[idx+2];
675:     y   += 3;
676:   }
677: }
680: PETSC_STATIC_INLINE PetscErrorCode UnPack_3(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
681: {
682:   PetscInt i,idy;

685:   switch (addv) {
686:   case INSERT_VALUES:
687:   case INSERT_ALL_VALUES:
688:     for (i=0; i<n; i++) {
689:       idy      = *indicesy++;
690:       y[idy]   = x[0];
691:       y[idy+1] = x[1];
692:       y[idy+2] = x[2];
693:       x       += 3;
694:     }
695:     break;
696:   case ADD_VALUES:
697:   case ADD_ALL_VALUES:
698:     for (i=0; i<n; i++) {
699:       idy       = *indicesy++;
700:       y[idy]   += x[0];
701:       y[idy+1] += x[1];
702:       y[idy+2] += x[2];
703:       x        += 3;
704:     }
705:     break;
706: #if !defined(PETSC_USE_COMPLEX)
707:   case MAX_VALUES:
708:     for (i=0; i<n; i++) {
709:       idy      = *indicesy++;
710:       y[idy]   = PetscMax(y[idy],x[0]);
711:       y[idy+1] = PetscMax(y[idy+1],x[1]);
712:       y[idy+2] = PetscMax(y[idy+2],x[2]);
713:       x       += 3;
714:     }
715: #else
716:   case MAX_VALUES:
717: #endif
718:   case NOT_SET_VALUES:
719:     break;
720:   default:
721:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
722:   }
723:   return(0);
724: }

728: PETSC_STATIC_INLINE PetscErrorCode Scatter_3(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
729: {
730:   PetscInt i,idx,idy;

733:   switch (addv) {
734:   case INSERT_VALUES:
735:   case INSERT_ALL_VALUES:
736:     for (i=0; i<n; i++) {
737:       idx      = *indicesx++;
738:       idy      = *indicesy++;
739:       y[idy]   = x[idx];
740:       y[idy+1] = x[idx+1];
741:       y[idy+2] = x[idx+2];
742:     }
743:     break;
744:   case ADD_VALUES:
745:   case ADD_ALL_VALUES:
746:     for (i=0; i<n; i++) {
747:       idx       = *indicesx++;
748:       idy       = *indicesy++;
749:       y[idy]   += x[idx];
750:       y[idy+1] += x[idx+1];
751:       y[idy+2] += x[idx+2];
752:     }
753:     break;
754: #if !defined(PETSC_USE_COMPLEX)
755:   case MAX_VALUES:
756:     for (i=0; i<n; i++) {
757:       idx      = *indicesx++;
758:       idy      = *indicesy++;
759:       y[idy]   = PetscMax(y[idy],x[idx]);
760:       y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
761:       y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
762:     }
763: #else
764:   case MAX_VALUES:
765: #endif
766:   case NOT_SET_VALUES:
767:     break;
768:   default:
769:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
770:   }
771:   return(0);
772: }
773: /* ----------------------------------------------------------------------------------------------- */
774: PETSC_STATIC_INLINE void Pack_4(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y)
775: {
776:   PetscInt i,idx;

778:   for (i=0; i<n; i++) {
779:     idx  = *indicesx++;
780:     y[0] = x[idx];
781:     y[1] = x[idx+1];
782:     y[2] = x[idx+2];
783:     y[3] = x[idx+3];
784:     y   += 4;
785:   }
786: }
789: PETSC_STATIC_INLINE PetscErrorCode UnPack_4(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
790: {
791:   PetscInt i,idy;

794:   switch (addv) {
795:   case INSERT_VALUES:
796:   case INSERT_ALL_VALUES:
797:     for (i=0; i<n; i++) {
798:       idy      = *indicesy++;
799:       y[idy]   = x[0];
800:       y[idy+1] = x[1];
801:       y[idy+2] = x[2];
802:       y[idy+3] = x[3];
803:       x       += 4;
804:     }
805:     break;
806:   case ADD_VALUES:
807:   case ADD_ALL_VALUES:
808:     for (i=0; i<n; i++) {
809:       idy       = *indicesy++;
810:       y[idy]   += x[0];
811:       y[idy+1] += x[1];
812:       y[idy+2] += x[2];
813:       y[idy+3] += x[3];
814:       x        += 4;
815:     }
816:     break;
817: #if !defined(PETSC_USE_COMPLEX)
818:   case MAX_VALUES:
819:     for (i=0; i<n; i++) {
820:       idy      = *indicesy++;
821:       y[idy]   = PetscMax(y[idy],x[0]);
822:       y[idy+1] = PetscMax(y[idy+1],x[1]);
823:       y[idy+2] = PetscMax(y[idy+2],x[2]);
824:       y[idy+3] = PetscMax(y[idy+3],x[3]);
825:       x       += 4;
826:     }
827: #else
828:   case MAX_VALUES:
829: #endif
830:   case NOT_SET_VALUES:
831:     break;
832:   default:
833:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
834:   }
835:   return(0);
836: }

840: PETSC_STATIC_INLINE PetscErrorCode Scatter_4(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
841: {
842:   PetscInt i,idx,idy;

845:   switch (addv) {
846:   case INSERT_VALUES:
847:   case INSERT_ALL_VALUES:
848:     for (i=0; i<n; i++) {
849:       idx      = *indicesx++;
850:       idy      = *indicesy++;
851:       y[idy]   = x[idx];
852:       y[idy+1] = x[idx+1];
853:       y[idy+2] = x[idx+2];
854:       y[idy+3] = x[idx+3];
855:     }
856:     break;
857:   case ADD_VALUES:
858:   case ADD_ALL_VALUES:
859:     for (i=0; i<n; i++) {
860:       idx       = *indicesx++;
861:       idy       = *indicesy++;
862:       y[idy]   += x[idx];
863:       y[idy+1] += x[idx+1];
864:       y[idy+2] += x[idx+2];
865:       y[idy+3] += x[idx+3];
866:     }
867:     break;
868: #if !defined(PETSC_USE_COMPLEX)
869:   case MAX_VALUES:
870:     for (i=0; i<n; i++) {
871:       idx      = *indicesx++;
872:       idy      = *indicesy++;
873:       y[idy]   = PetscMax(y[idy],x[idx]);
874:       y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
875:       y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
876:       y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
877:     }
878: #else
879:   case MAX_VALUES:
880: #endif
881:   case NOT_SET_VALUES:
882:     break;
883:   default:
884:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
885:   }
886:   return(0);
887: }
888: /* ----------------------------------------------------------------------------------------------- */
889: PETSC_STATIC_INLINE void Pack_5(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y)
890: {
891:   PetscInt i,idx;

893:   for (i=0; i<n; i++) {
894:     idx  = *indicesx++;
895:     y[0] = x[idx];
896:     y[1] = x[idx+1];
897:     y[2] = x[idx+2];
898:     y[3] = x[idx+3];
899:     y[4] = x[idx+4];
900:     y   += 5;
901:   }
902: }

906: PETSC_STATIC_INLINE PetscErrorCode UnPack_5(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
907: {
908:   PetscInt i,idy;

911:   switch (addv) {
912:   case INSERT_VALUES:
913:   case INSERT_ALL_VALUES:
914:     for (i=0; i<n; i++) {
915:       idy      = *indicesy++;
916:       y[idy]   = x[0];
917:       y[idy+1] = x[1];
918:       y[idy+2] = x[2];
919:       y[idy+3] = x[3];
920:       y[idy+4] = x[4];
921:       x       += 5;
922:     }
923:     break;
924:   case ADD_VALUES:
925:   case ADD_ALL_VALUES:
926:     for (i=0; i<n; i++) {
927:       idy       = *indicesy++;
928:       y[idy]   += x[0];
929:       y[idy+1] += x[1];
930:       y[idy+2] += x[2];
931:       y[idy+3] += x[3];
932:       y[idy+4] += x[4];
933:       x        += 5;
934:     }
935:     break;
936: #if !defined(PETSC_USE_COMPLEX)
937:   case MAX_VALUES:
938:     for (i=0; i<n; i++) {
939:       idy      = *indicesy++;
940:       y[idy]   = PetscMax(y[idy],x[0]);
941:       y[idy+1] = PetscMax(y[idy+1],x[1]);
942:       y[idy+2] = PetscMax(y[idy+2],x[2]);
943:       y[idy+3] = PetscMax(y[idy+3],x[3]);
944:       y[idy+4] = PetscMax(y[idy+4],x[4]);
945:       x       += 5;
946:     }
947: #else
948:   case MAX_VALUES:
949: #endif
950:   case NOT_SET_VALUES:
951:     break;
952:   default:
953:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
954:   }
955:   return(0);
956: }

960: PETSC_STATIC_INLINE PetscErrorCode Scatter_5(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
961: {
962:   PetscInt i,idx,idy;

965:   switch (addv) {
966:   case INSERT_VALUES:
967:   case INSERT_ALL_VALUES:
968:     for (i=0; i<n; i++) {
969:       idx      = *indicesx++;
970:       idy      = *indicesy++;
971:       y[idy]   = x[idx];
972:       y[idy+1] = x[idx+1];
973:       y[idy+2] = x[idx+2];
974:       y[idy+3] = x[idx+3];
975:       y[idy+4] = x[idx+4];
976:     }
977:     break;
978:   case ADD_VALUES:
979:   case ADD_ALL_VALUES:
980:     for (i=0; i<n; i++) {
981:       idx       = *indicesx++;
982:       idy       = *indicesy++;
983:       y[idy]   += x[idx];
984:       y[idy+1] += x[idx+1];
985:       y[idy+2] += x[idx+2];
986:       y[idy+3] += x[idx+3];
987:       y[idy+4] += x[idx+4];
988:     }
989:     break;
990: #if !defined(PETSC_USE_COMPLEX)
991:   case MAX_VALUES:
992:     for (i=0; i<n; i++) {
993:       idx      = *indicesx++;
994:       idy      = *indicesy++;
995:       y[idy]   = PetscMax(y[idy],x[idx]);
996:       y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
997:       y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
998:       y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
999:       y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1000:     }
1001: #else
1002:   case MAX_VALUES:
1003: #endif
1004:   case NOT_SET_VALUES:
1005:     break;
1006:   default:
1007:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1008:   }
1009:   return(0);
1010: }
1011: /* ----------------------------------------------------------------------------------------------- */
1012: PETSC_STATIC_INLINE void Pack_6(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y)
1013: {
1014:   PetscInt i,idx;

1016:   for (i=0; i<n; i++) {
1017:     idx  = *indicesx++;
1018:     y[0] = x[idx];
1019:     y[1] = x[idx+1];
1020:     y[2] = x[idx+2];
1021:     y[3] = x[idx+3];
1022:     y[4] = x[idx+4];
1023:     y[5] = x[idx+5];
1024:     y   += 6;
1025:   }
1026: }

1030: PETSC_STATIC_INLINE PetscErrorCode UnPack_6(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
1031: {
1032:   PetscInt i,idy;

1035:   switch (addv) {
1036:   case INSERT_VALUES:
1037:   case INSERT_ALL_VALUES:
1038:     for (i=0; i<n; i++) {
1039:       idy      = *indicesy++;
1040:       y[idy]   = x[0];
1041:       y[idy+1] = x[1];
1042:       y[idy+2] = x[2];
1043:       y[idy+3] = x[3];
1044:       y[idy+4] = x[4];
1045:       y[idy+5] = x[5];
1046:       x       += 6;
1047:     }
1048:     break;
1049:   case ADD_VALUES:
1050:   case ADD_ALL_VALUES:
1051:     for (i=0; i<n; i++) {
1052:       idy       = *indicesy++;
1053:       y[idy]   += x[0];
1054:       y[idy+1] += x[1];
1055:       y[idy+2] += x[2];
1056:       y[idy+3] += x[3];
1057:       y[idy+4] += x[4];
1058:       y[idy+5] += x[5];
1059:       x        += 6;
1060:     }
1061:     break;
1062: #if !defined(PETSC_USE_COMPLEX)
1063:   case MAX_VALUES:
1064:     for (i=0; i<n; i++) {
1065:       idy      = *indicesy++;
1066:       y[idy]   = PetscMax(y[idy],x[0]);
1067:       y[idy+1] = PetscMax(y[idy+1],x[1]);
1068:       y[idy+2] = PetscMax(y[idy+2],x[2]);
1069:       y[idy+3] = PetscMax(y[idy+3],x[3]);
1070:       y[idy+4] = PetscMax(y[idy+4],x[4]);
1071:       y[idy+5] = PetscMax(y[idy+5],x[5]);
1072:       x       += 6;
1073:     }
1074: #else
1075:   case MAX_VALUES:
1076: #endif
1077:   case NOT_SET_VALUES:
1078:     break;
1079:   default:
1080:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1081:   }
1082:   return(0);
1083: }

1087: PETSC_STATIC_INLINE PetscErrorCode Scatter_6(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
1088: {
1089:   PetscInt i,idx,idy;

1092:   switch (addv) {
1093:   case INSERT_VALUES:
1094:   case INSERT_ALL_VALUES:
1095:     for (i=0; i<n; i++) {
1096:       idx      = *indicesx++;
1097:       idy      = *indicesy++;
1098:       y[idy]   = x[idx];
1099:       y[idy+1] = x[idx+1];
1100:       y[idy+2] = x[idx+2];
1101:       y[idy+3] = x[idx+3];
1102:       y[idy+4] = x[idx+4];
1103:       y[idy+5] = x[idx+5];
1104:     }
1105:     break;
1106:   case ADD_VALUES:
1107:   case ADD_ALL_VALUES:
1108:     for (i=0; i<n; i++) {
1109:       idx       = *indicesx++;
1110:       idy       = *indicesy++;
1111:       y[idy]   += x[idx];
1112:       y[idy+1] += x[idx+1];
1113:       y[idy+2] += x[idx+2];
1114:       y[idy+3] += x[idx+3];
1115:       y[idy+4] += x[idx+4];
1116:       y[idy+5] += x[idx+5];
1117:     }
1118:     break;
1119: #if !defined(PETSC_USE_COMPLEX)
1120:   case MAX_VALUES:
1121:     for (i=0; i<n; i++) {
1122:       idx      = *indicesx++;
1123:       idy      = *indicesy++;
1124:       y[idy]   = PetscMax(y[idy],x[idx]);
1125:       y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
1126:       y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
1127:       y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
1128:       y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1129:       y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
1130:     }
1131: #else
1132:   case MAX_VALUES:
1133: #endif
1134:   case NOT_SET_VALUES:
1135:     break;
1136:   default:
1137:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1138:   }
1139:   return(0);
1140: }
1141: /* ----------------------------------------------------------------------------------------------- */
1142: PETSC_STATIC_INLINE void Pack_7(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y)
1143: {
1144:   PetscInt i,idx;

1146:   for (i=0; i<n; i++) {
1147:     idx  = *indicesx++;
1148:     y[0] = x[idx];
1149:     y[1] = x[idx+1];
1150:     y[2] = x[idx+2];
1151:     y[3] = x[idx+3];
1152:     y[4] = x[idx+4];
1153:     y[5] = x[idx+5];
1154:     y[6] = x[idx+6];
1155:     y   += 7;
1156:   }
1157: }

1161: PETSC_STATIC_INLINE PetscErrorCode UnPack_7(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
1162: {
1163:   PetscInt i,idy;

1166:   switch (addv) {
1167:   case INSERT_VALUES:
1168:   case INSERT_ALL_VALUES:
1169:     for (i=0; i<n; i++) {
1170:       idy      = *indicesy++;
1171:       y[idy]   = x[0];
1172:       y[idy+1] = x[1];
1173:       y[idy+2] = x[2];
1174:       y[idy+3] = x[3];
1175:       y[idy+4] = x[4];
1176:       y[idy+5] = x[5];
1177:       y[idy+6] = x[6];
1178:       x       += 7;
1179:     }
1180:     break;
1181:   case ADD_VALUES:
1182:   case ADD_ALL_VALUES:
1183:     for (i=0; i<n; i++) {
1184:       idy       = *indicesy++;
1185:       y[idy]   += x[0];
1186:       y[idy+1] += x[1];
1187:       y[idy+2] += x[2];
1188:       y[idy+3] += x[3];
1189:       y[idy+4] += x[4];
1190:       y[idy+5] += x[5];
1191:       y[idy+6] += x[6];
1192:       x        += 7;
1193:     }
1194:     break;
1195: #if !defined(PETSC_USE_COMPLEX)
1196:   case MAX_VALUES:
1197:     for (i=0; i<n; i++) {
1198:       idy      = *indicesy++;
1199:       y[idy]   = PetscMax(y[idy],x[0]);
1200:       y[idy+1] = PetscMax(y[idy+1],x[1]);
1201:       y[idy+2] = PetscMax(y[idy+2],x[2]);
1202:       y[idy+3] = PetscMax(y[idy+3],x[3]);
1203:       y[idy+4] = PetscMax(y[idy+4],x[4]);
1204:       y[idy+5] = PetscMax(y[idy+5],x[5]);
1205:       y[idy+6] = PetscMax(y[idy+6],x[6]);
1206:       x       += 7;
1207:     }
1208: #else
1209:   case MAX_VALUES:
1210: #endif
1211:   case NOT_SET_VALUES:
1212:     break;
1213:   default:
1214:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1215:   }
1216:   return(0);
1217: }

1221: PETSC_STATIC_INLINE PetscErrorCode Scatter_7(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
1222: {
1223:   PetscInt i,idx,idy;

1226:   switch (addv) {
1227:   case INSERT_VALUES:
1228:   case INSERT_ALL_VALUES:
1229:     for (i=0; i<n; i++) {
1230:       idx      = *indicesx++;
1231:       idy      = *indicesy++;
1232:       y[idy]   = x[idx];
1233:       y[idy+1] = x[idx+1];
1234:       y[idy+2] = x[idx+2];
1235:       y[idy+3] = x[idx+3];
1236:       y[idy+4] = x[idx+4];
1237:       y[idy+5] = x[idx+5];
1238:       y[idy+6] = x[idx+6];
1239:     }
1240:     break;
1241:   case ADD_VALUES:
1242:   case ADD_ALL_VALUES:
1243:     for (i=0; i<n; i++) {
1244:       idx       = *indicesx++;
1245:       idy       = *indicesy++;
1246:       y[idy]   += x[idx];
1247:       y[idy+1] += x[idx+1];
1248:       y[idy+2] += x[idx+2];
1249:       y[idy+3] += x[idx+3];
1250:       y[idy+4] += x[idx+4];
1251:       y[idy+5] += x[idx+5];
1252:       y[idy+6] += x[idx+6];
1253:     }
1254:     break;
1255: #if !defined(PETSC_USE_COMPLEX)
1256:   case MAX_VALUES:
1257:     for (i=0; i<n; i++) {
1258:       idx      = *indicesx++;
1259:       idy      = *indicesy++;
1260:       y[idy]   = PetscMax(y[idy],x[idx]);
1261:       y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
1262:       y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
1263:       y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
1264:       y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1265:       y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
1266:       y[idy+6] = PetscMax(y[idy+6],x[idx+6]);
1267:     }
1268: #else
1269:   case MAX_VALUES:
1270: #endif
1271:   case NOT_SET_VALUES:
1272:     break;
1273:   default:
1274:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1275:   }
1276:   return(0);
1277: }
1278: /* ----------------------------------------------------------------------------------------------- */
1279: PETSC_STATIC_INLINE void Pack_8(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y)
1280: {
1281:   PetscInt i,idx;

1283:   for (i=0; i<n; i++) {
1284:     idx  = *indicesx++;
1285:     y[0] = x[idx];
1286:     y[1] = x[idx+1];
1287:     y[2] = x[idx+2];
1288:     y[3] = x[idx+3];
1289:     y[4] = x[idx+4];
1290:     y[5] = x[idx+5];
1291:     y[6] = x[idx+6];
1292:     y[7] = x[idx+7];
1293:     y   += 8;
1294:   }
1295: }

1299: PETSC_STATIC_INLINE PetscErrorCode UnPack_8(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
1300: {
1301:   PetscInt i,idy;

1304:   switch (addv) {
1305:   case INSERT_VALUES:
1306:   case INSERT_ALL_VALUES:
1307:     for (i=0; i<n; i++) {
1308:       idy      = *indicesy++;
1309:       y[idy]   = x[0];
1310:       y[idy+1] = x[1];
1311:       y[idy+2] = x[2];
1312:       y[idy+3] = x[3];
1313:       y[idy+4] = x[4];
1314:       y[idy+5] = x[5];
1315:       y[idy+6] = x[6];
1316:       y[idy+7] = x[7];
1317:       x       += 8;
1318:     }
1319:     break;
1320:   case ADD_VALUES:
1321:   case ADD_ALL_VALUES:
1322:     for (i=0; i<n; i++) {
1323:       idy       = *indicesy++;
1324:       y[idy]   += x[0];
1325:       y[idy+1] += x[1];
1326:       y[idy+2] += x[2];
1327:       y[idy+3] += x[3];
1328:       y[idy+4] += x[4];
1329:       y[idy+5] += x[5];
1330:       y[idy+6] += x[6];
1331:       y[idy+7] += x[7];
1332:       x        += 8;
1333:     }
1334:     break;
1335: #if !defined(PETSC_USE_COMPLEX)
1336:   case MAX_VALUES:
1337:     for (i=0; i<n; i++) {
1338:       idy      = *indicesy++;
1339:       y[idy]   = PetscMax(y[idy],x[0]);
1340:       y[idy+1] = PetscMax(y[idy+1],x[1]);
1341:       y[idy+2] = PetscMax(y[idy+2],x[2]);
1342:       y[idy+3] = PetscMax(y[idy+3],x[3]);
1343:       y[idy+4] = PetscMax(y[idy+4],x[4]);
1344:       y[idy+5] = PetscMax(y[idy+5],x[5]);
1345:       y[idy+6] = PetscMax(y[idy+6],x[6]);
1346:       y[idy+7] = PetscMax(y[idy+7],x[7]);
1347:       x       += 8;
1348:     }
1349: #else
1350:   case MAX_VALUES:
1351: #endif
1352:   case NOT_SET_VALUES:
1353:     break;
1354:   default:
1355:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1356:   }
1357:   return(0);
1358: }

1362: PETSC_STATIC_INLINE PetscErrorCode Scatter_8(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
1363: {
1364:   PetscInt i,idx,idy;

1367:   switch (addv) {
1368:   case INSERT_VALUES:
1369:   case INSERT_ALL_VALUES:
1370:     for (i=0; i<n; i++) {
1371:       idx      = *indicesx++;
1372:       idy      = *indicesy++;
1373:       y[idy]   = x[idx];
1374:       y[idy+1] = x[idx+1];
1375:       y[idy+2] = x[idx+2];
1376:       y[idy+3] = x[idx+3];
1377:       y[idy+4] = x[idx+4];
1378:       y[idy+5] = x[idx+5];
1379:       y[idy+6] = x[idx+6];
1380:       y[idy+7] = x[idx+7];
1381:     }
1382:     break;
1383:   case ADD_VALUES:
1384:   case ADD_ALL_VALUES:
1385:     for (i=0; i<n; i++) {
1386:       idx       = *indicesx++;
1387:       idy       = *indicesy++;
1388:       y[idy]   += x[idx];
1389:       y[idy+1] += x[idx+1];
1390:       y[idy+2] += x[idx+2];
1391:       y[idy+3] += x[idx+3];
1392:       y[idy+4] += x[idx+4];
1393:       y[idy+5] += x[idx+5];
1394:       y[idy+6] += x[idx+6];
1395:       y[idy+7] += x[idx+7];
1396:     }
1397:     break;
1398: #if !defined(PETSC_USE_COMPLEX)
1399:   case MAX_VALUES:
1400:     for (i=0; i<n; i++) {
1401:       idx      = *indicesx++;
1402:       idy      = *indicesy++;
1403:       y[idy]   = PetscMax(y[idy],x[idx]);
1404:       y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
1405:       y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
1406:       y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
1407:       y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1408:       y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
1409:       y[idy+6] = PetscMax(y[idy+6],x[idx+6]);
1410:       y[idy+7] = PetscMax(y[idy+7],x[idx+7]);
1411:     }
1412: #else
1413:   case MAX_VALUES:
1414: #endif
1415:   case NOT_SET_VALUES:
1416:     break;
1417:   default:
1418:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1419:   }
1420:   return(0);
1421: }

1423: /* ----------------------------------------------------------------------------------------------- */
1424: PETSC_STATIC_INLINE void Pack_12(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y)
1425: {
1426:   PetscInt i,idx;

1428:   for (i=0; i<n; i++) {
1429:     idx   = *indicesx++;
1430:     y[0]  = x[idx];
1431:     y[1]  = x[idx+1];
1432:     y[2]  = x[idx+2];
1433:     y[3]  = x[idx+3];
1434:     y[4]  = x[idx+4];
1435:     y[5]  = x[idx+5];
1436:     y[6]  = x[idx+6];
1437:     y[7]  = x[idx+7];
1438:     y[8]  = x[idx+8];
1439:     y[9]  = x[idx+9];
1440:     y[10] = x[idx+10];
1441:     y[11] = x[idx+11];
1442:     y    += 12;
1443:   }
1444: }

1448: PETSC_STATIC_INLINE PetscErrorCode UnPack_12(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
1449: {
1450:   PetscInt i,idy;

1453:   switch (addv) {
1454:   case INSERT_VALUES:
1455:   case INSERT_ALL_VALUES:
1456:     for (i=0; i<n; i++) {
1457:       idy       = *indicesy++;
1458:       y[idy]    = x[0];
1459:       y[idy+1]  = x[1];
1460:       y[idy+2]  = x[2];
1461:       y[idy+3]  = x[3];
1462:       y[idy+4]  = x[4];
1463:       y[idy+5]  = x[5];
1464:       y[idy+6]  = x[6];
1465:       y[idy+7]  = x[7];
1466:       y[idy+8]  = x[8];
1467:       y[idy+9]  = x[9];
1468:       y[idy+10] = x[10];
1469:       y[idy+11] = x[11];
1470:       x        += 12;
1471:     }
1472:     break;
1473:   case ADD_VALUES:
1474:   case ADD_ALL_VALUES:
1475:     for (i=0; i<n; i++) {
1476:       idy        = *indicesy++;
1477:       y[idy]    += x[0];
1478:       y[idy+1]  += x[1];
1479:       y[idy+2]  += x[2];
1480:       y[idy+3]  += x[3];
1481:       y[idy+4]  += x[4];
1482:       y[idy+5]  += x[5];
1483:       y[idy+6]  += x[6];
1484:       y[idy+7]  += x[7];
1485:       y[idy+8]  += x[8];
1486:       y[idy+9]  += x[9];
1487:       y[idy+10] += x[10];
1488:       y[idy+11] += x[11];
1489:       x         += 12;
1490:     }
1491:     break;
1492: #if !defined(PETSC_USE_COMPLEX)
1493:   case MAX_VALUES:
1494:     for (i=0; i<n; i++) {
1495:       idy       = *indicesy++;
1496:       y[idy]    = PetscMax(y[idy],x[0]);
1497:       y[idy+1]  = PetscMax(y[idy+1],x[1]);
1498:       y[idy+2]  = PetscMax(y[idy+2],x[2]);
1499:       y[idy+3]  = PetscMax(y[idy+3],x[3]);
1500:       y[idy+4]  = PetscMax(y[idy+4],x[4]);
1501:       y[idy+5]  = PetscMax(y[idy+5],x[5]);
1502:       y[idy+6]  = PetscMax(y[idy+6],x[6]);
1503:       y[idy+7]  = PetscMax(y[idy+7],x[7]);
1504:       y[idy+8]  = PetscMax(y[idy+8],x[8]);
1505:       y[idy+9]  = PetscMax(y[idy+9],x[9]);
1506:       y[idy+10] = PetscMax(y[idy+10],x[10]);
1507:       y[idy+11] = PetscMax(y[idy+11],x[11]);
1508:       x        += 12;
1509:     }
1510: #else
1511:   case MAX_VALUES:
1512: #endif
1513:   case NOT_SET_VALUES:
1514:     break;
1515:   default:
1516:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1517:   }
1518:   return(0);
1519: }

1523: PETSC_STATIC_INLINE PetscErrorCode Scatter_12(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
1524: {
1525:   PetscInt i,idx,idy;

1528:   switch (addv) {
1529:   case INSERT_VALUES:
1530:   case INSERT_ALL_VALUES:
1531:     for (i=0; i<n; i++) {
1532:       idx       = *indicesx++;
1533:       idy       = *indicesy++;
1534:       y[idy]    = x[idx];
1535:       y[idy+1]  = x[idx+1];
1536:       y[idy+2]  = x[idx+2];
1537:       y[idy+3]  = x[idx+3];
1538:       y[idy+4]  = x[idx+4];
1539:       y[idy+5]  = x[idx+5];
1540:       y[idy+6]  = x[idx+6];
1541:       y[idy+7]  = x[idx+7];
1542:       y[idy+8]  = x[idx+8];
1543:       y[idy+9]  = x[idx+9];
1544:       y[idy+10] = x[idx+10];
1545:       y[idy+11] = x[idx+11];
1546:     }
1547:     break;
1548:   case ADD_VALUES:
1549:   case ADD_ALL_VALUES:
1550:     for (i=0; i<n; i++) {
1551:       idx        = *indicesx++;
1552:       idy        = *indicesy++;
1553:       y[idy]    += x[idx];
1554:       y[idy+1]  += x[idx+1];
1555:       y[idy+2]  += x[idx+2];
1556:       y[idy+3]  += x[idx+3];
1557:       y[idy+4]  += x[idx+4];
1558:       y[idy+5]  += x[idx+5];
1559:       y[idy+6]  += x[idx+6];
1560:       y[idy+7]  += x[idx+7];
1561:       y[idy+8]  += x[idx+8];
1562:       y[idy+9]  += x[idx+9];
1563:       y[idy+10] += x[idx+10];
1564:       y[idy+11] += x[idx+11];
1565:     }
1566:     break;
1567: #if !defined(PETSC_USE_COMPLEX)
1568:   case MAX_VALUES:
1569:     for (i=0; i<n; i++) {
1570:       idx       = *indicesx++;
1571:       idy       = *indicesy++;
1572:       y[idy]    = PetscMax(y[idy],x[idx]);
1573:       y[idy+1]  = PetscMax(y[idy+1],x[idx+1]);
1574:       y[idy+2]  = PetscMax(y[idy+2],x[idx+2]);
1575:       y[idy+3]  = PetscMax(y[idy+3],x[idx+3]);
1576:       y[idy+4]  = PetscMax(y[idy+4],x[idx+4]);
1577:       y[idy+5]  = PetscMax(y[idy+5],x[idx+5]);
1578:       y[idy+6]  = PetscMax(y[idy+6],x[idx+6]);
1579:       y[idy+7]  = PetscMax(y[idy+7],x[idx+7]);
1580:       y[idy+8]  = PetscMax(y[idy+8],x[idx+8]);
1581:       y[idy+9]  = PetscMax(y[idy+9],x[idx+9]);
1582:       y[idy+10] = PetscMax(y[idy+10],x[idx+10]);
1583:       y[idy+11] = PetscMax(y[idy+11],x[idx+11]);
1584:     }
1585: #else
1586:   case MAX_VALUES:
1587: #endif
1588:   case NOT_SET_VALUES:
1589:     break;
1590:   default:
1591:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1592:   }
1593:   return(0);
1594: }

1596: /* Create the VecScatterBegin/End_P for our chosen block sizes */
1597: #define BS 1
1598: #include <../src/vec/vec/utils/vpscat.h>
1599: #define BS 2
1600: #include <../src/vec/vec/utils/vpscat.h>
1601: #define BS 3
1602: #include <../src/vec/vec/utils/vpscat.h>
1603: #define BS 4
1604: #include <../src/vec/vec/utils/vpscat.h>
1605: #define BS 5
1606: #include <../src/vec/vec/utils/vpscat.h>
1607: #define BS 6
1608: #include <../src/vec/vec/utils/vpscat.h>
1609: #define BS 7
1610: #include <../src/vec/vec/utils/vpscat.h>
1611: #define BS 8
1612: #include <../src/vec/vec/utils/vpscat.h>
1613: #define BS 12
1614: #include <../src/vec/vec/utils/vpscat.h>

1616: /* ==========================================================================================*/

1618: /*              create parallel to sequential scatter context                           */

1620: PetscErrorCode VecScatterCreateCommon_PtoS(VecScatter_MPI_General*,VecScatter_MPI_General*,VecScatter);

1624: /*@
1625:      VecScatterCreateLocal - Creates a VecScatter from a list of messages it must send and receive.

1627:      Collective on VecScatter

1629:    Input Parameters:
1630: +     VecScatter - obtained with VecScatterCreateEmpty()
1631: .     nsends -
1632: .     sendSizes -
1633: .     sendProcs -
1634: .     sendIdx - indices where the sent entries are obtained from (in local, on process numbering), this is one long array of size \sum_{i=0,i<nsends} sendSizes[i]
1635: .     nrecvs - number of receives to expect
1636: .     recvSizes -
1637: .     recvProcs - processes who are sending to me
1638: .     recvIdx - indices of where received entries are to be put, (in local, on process numbering), this is one long array of size \sum_{i=0,i<nrecvs} recvSizes[i]
1639: -     bs - size of block

1641:      Notes:  sendSizes[] and recvSizes[] cannot have any 0 entries. If you want to support having 0 entries you need
1642:       to change the code below to "compress out" the sendProcs[] and recvProcs[] entries that have 0 entries.

1644:        Probably does not handle sends to self properly. It should remove those from the counts that are used
1645:       in allocating space inside of the from struct

1647:   Level: intermediate

1649: @*/
1650: PetscErrorCode VecScatterCreateLocal(VecScatter ctx,PetscInt nsends,const PetscInt sendSizes[],const PetscInt sendProcs[],const PetscInt sendIdx[],PetscInt nrecvs,const PetscInt recvSizes[],const PetscInt recvProcs[],const PetscInt recvIdx[],PetscInt bs)
1651: {
1652:   VecScatter_MPI_General *from, *to;
1653:   PetscInt               sendSize, recvSize;
1654:   PetscInt               n, i;
1655:   PetscErrorCode         ierr;

1657:   /* allocate entire send scatter context */
1658:   PetscNewLog(ctx,VecScatter_MPI_General,&to);
1659:   to->n = nsends;
1660:   for (n = 0, sendSize = 0; n < to->n; n++) sendSize += sendSizes[n];

1662:   PetscMalloc(to->n*sizeof(MPI_Request),&to->requests);
1663:   PetscMalloc4(bs*sendSize,PetscScalar,&to->values,sendSize,PetscInt,&to->indices,to->n+1,PetscInt,&to->starts,to->n,PetscMPIInt,&to->procs);
1664:   PetscMalloc2(PetscMax(to->n,nrecvs),MPI_Status,&to->sstatus,PetscMax(to->n,nrecvs),MPI_Status,&to->rstatus);

1666:   to->starts[0] = 0;
1667:   for (n = 0; n < to->n; n++) {
1668:     if (sendSizes[n] <=0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"sendSizes[n=%D] = %D cannot be less than 1",n,sendSizes[n]);
1669:     to->starts[n+1] = to->starts[n] + sendSizes[n];
1670:     to->procs[n]    = sendProcs[n];
1671:     for (i = to->starts[n]; i < to->starts[n]+sendSizes[n]; i++) to->indices[i] = sendIdx[i];
1672:   }
1673:   ctx->todata = (void*) to;

1675:   /* allocate entire receive scatter context */
1676:   PetscNewLog(ctx,VecScatter_MPI_General,&from);
1677:   from->n = nrecvs;
1678:   for (n = 0, recvSize = 0; n < from->n; n++) recvSize += recvSizes[n];

1680:   PetscMalloc(from->n*sizeof(MPI_Request),&from->requests);
1681:   PetscMalloc4(bs*recvSize,PetscScalar,&from->values,recvSize,PetscInt,&from->indices,from->n+1,PetscInt,&from->starts,from->n,PetscMPIInt,&from->procs);

1683:   from->starts[0] = 0;
1684:   for (n = 0; n < from->n; n++) {
1685:     if (recvSizes[n] <=0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"recvSizes[n=%D] = %D cannot be less than 1",n,recvSizes[n]);
1686:     from->starts[n+1] = from->starts[n] + recvSizes[n];
1687:     from->procs[n]    = recvProcs[n];
1688:     for (i = from->starts[n]; i < from->starts[n]+recvSizes[n]; i++) from->indices[i] = recvIdx[i];
1689:   }
1690:   ctx->fromdata = (void*)from;

1692:   /* No local scatter optimization */
1693:   from->local.n                    = 0;
1694:   from->local.vslots               = 0;
1695:   to->local.n                      = 0;
1696:   to->local.vslots                 = 0;
1697:   from->local.nonmatching_computed = PETSC_FALSE;
1698:   from->local.n_nonmatching        = 0;
1699:   from->local.slots_nonmatching    = 0;
1700:   to->local.nonmatching_computed   = PETSC_FALSE;
1701:   to->local.n_nonmatching          = 0;
1702:   to->local.slots_nonmatching      = 0;

1704:   from->type = VEC_SCATTER_MPI_GENERAL;
1705:   to->type   = VEC_SCATTER_MPI_GENERAL;
1706:   from->bs   = bs;
1707:   to->bs     = bs;
1708:   VecScatterCreateCommon_PtoS(from, to, ctx);

1710:   /* mark lengths as negative so it won't check local vector lengths */
1711:   ctx->from_n = ctx->to_n = -1;
1712:   return(0);
1713: }

1715: /*
1716:    bs indicates how many elements there are in each block. Normally this would be 1.

1718:    contains check that PetscMPIInt can handle the sizes needed
1719: */
1722: PetscErrorCode VecScatterCreate_PtoS(PetscInt nx,const PetscInt *inidx,PetscInt ny,const PetscInt *inidy,Vec xin,Vec yin,PetscInt bs,VecScatter ctx)
1723: {
1724:   VecScatter_MPI_General *from,*to;
1725:   PetscMPIInt            size,rank,imdex,tag,n;
1726:   PetscInt               *source = NULL,*owners = NULL;
1727:   PetscInt               *lowner = NULL,*start = NULL,lengthy,lengthx;
1728:   PetscMPIInt            *nprocs = NULL,nrecvs;
1729:   PetscInt               i,j,idx,nsends;
1730:   PetscInt               *owner = NULL,*starts = NULL,count,slen;
1731:   PetscInt               *rvalues,*svalues,base,*values,nprocslocal,recvtotal,*rsvalues;
1732:   PetscMPIInt            *onodes1,*olengths1;
1733:   MPI_Comm               comm;
1734:   MPI_Request            *send_waits = NULL,*recv_waits = NULL;
1735:   MPI_Status             recv_status,*send_status;
1736:   PetscErrorCode         ierr;

1739:   PetscObjectGetNewTag((PetscObject)ctx,&tag);
1740:   PetscObjectGetComm((PetscObject)xin,&comm);
1741:   MPI_Comm_rank(comm,&rank);
1742:   MPI_Comm_size(comm,&size);
1743:   owners = xin->map->range;
1744:   VecGetSize(yin,&lengthy);
1745:   VecGetSize(xin,&lengthx);

1747:   /*  first count number of contributors to each processor */
1748:   PetscMalloc2(size,PetscMPIInt,&nprocs,nx,PetscInt,&owner);
1749:   PetscMemzero(nprocs,size*sizeof(PetscMPIInt));

1751:   j      = 0;
1752:   nsends = 0;
1753:   for (i=0; i<nx; i++) {
1754:     idx = bs*inidx[i];
1755:     if (idx < owners[j]) j = 0;
1756:     for (; j<size; j++) {
1757:       if (idx < owners[j+1]) {
1758:         if (!nprocs[j]++) nsends++;
1759:         owner[i] = j;
1760:         break;
1761:       }
1762:     }
1763:     if (j == size) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"ith %D block entry %D not owned by any process, upper bound %D",i,idx,owners[size]);
1764:   }

1766:   nprocslocal  = nprocs[rank];
1767:   nprocs[rank] = 0;
1768:   if (nprocslocal) nsends--;
1769:   /* inform other processors of number of messages and max length*/
1770:   PetscGatherNumberOfMessages(comm,NULL,nprocs,&nrecvs);
1771:   PetscGatherMessageLengths(comm,nsends,nrecvs,nprocs,&onodes1,&olengths1);
1772:   PetscSortMPIIntWithArray(nrecvs,onodes1,olengths1);
1773:   recvtotal = 0; for (i=0; i<nrecvs; i++) recvtotal += olengths1[i];

1775:   /* post receives:   */
1776:   PetscMalloc3(recvtotal,PetscInt,&rvalues,nrecvs,PetscInt,&source,nrecvs,MPI_Request,&recv_waits);
1777:   count = 0;
1778:   for (i=0; i<nrecvs; i++) {
1779:     MPI_Irecv((rvalues+count),olengths1[i],MPIU_INT,onodes1[i],tag,comm,recv_waits+i);
1780:     count += olengths1[i];
1781:   }

1783:   /* do sends:
1784:      1) starts[i] gives the starting index in svalues for stuff going to
1785:      the ith processor
1786:   */
1787:   PetscMalloc3(nx,PetscInt,&svalues,nsends,MPI_Request,&send_waits,size+1,PetscInt,&starts);

1789:   starts[0]  = 0;
1790:   for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[i-1];
1791:   for (i=0; i<nx; i++) {
1792:     if (owner[i] != rank) svalues[starts[owner[i]]++] = bs*inidx[i];
1793:   }
1794:   starts[0] = 0;
1795:   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[i-1];
1796:   count = 0;
1797:   for (i=0; i<size; i++) {
1798:     if (nprocs[i]) {
1799:       MPI_Isend(svalues+starts[i],nprocs[i],MPIU_INT,i,tag,comm,send_waits+count++);
1800:     }
1801:   }

1803:   /*  wait on receives */
1804:   count = nrecvs;
1805:   slen  = 0;
1806:   while (count) {
1807:     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
1808:     /* unpack receives into our local space */
1809:     MPI_Get_count(&recv_status,MPIU_INT,&n);
1810:     slen += n;
1811:     count--;
1812:   }

1814:   if (slen != recvtotal) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Total message lengths %D not expected %D",slen,recvtotal);

1816:   /* allocate entire send scatter context */
1817:   PetscNewLog(ctx,VecScatter_MPI_General,&to);
1818:   to->n = nrecvs;

1820:   PetscMalloc(nrecvs*sizeof(MPI_Request),&to->requests);
1821:   PetscMalloc4(bs*slen,PetscScalar,&to->values,slen,PetscInt,&to->indices,nrecvs+1,PetscInt,&to->starts,nrecvs,PetscMPIInt,&to->procs);
1822:   PetscMalloc2(PetscMax(to->n,nsends),MPI_Status,&to->sstatus,PetscMax(to->n,nsends),MPI_Status,&to->rstatus);

1824:   ctx->todata   = (void*)to;
1825:   to->starts[0] = 0;

1827:   if (nrecvs) {
1828:     /* move the data into the send scatter */
1829:     base     = owners[rank];
1830:     rsvalues = rvalues;
1831:     for (i=0; i<nrecvs; i++) {
1832:       to->starts[i+1] = to->starts[i] + olengths1[i];
1833:       to->procs[i]    = onodes1[i];
1834:       values = rsvalues;
1835:       rsvalues += olengths1[i];
1836:       for (j=0; j<olengths1[i]; j++) to->indices[to->starts[i] + j] = values[j] - base;
1837:     }
1838:   }
1839:   PetscFree(olengths1);
1840:   PetscFree(onodes1);
1841:   PetscFree3(rvalues,source,recv_waits);

1843:   /* allocate entire receive scatter context */
1844:   PetscNewLog(ctx,VecScatter_MPI_General,&from);
1845:   from->n = nsends;

1847:   PetscMalloc(nsends*sizeof(MPI_Request),&from->requests);
1848:   PetscMalloc4((ny-nprocslocal)*bs,PetscScalar,&from->values,ny-nprocslocal,PetscInt,&from->indices,nsends+1,PetscInt,&from->starts,from->n,PetscMPIInt,&from->procs);
1849:   ctx->fromdata = (void*)from;

1851:   /* move data into receive scatter */
1852:   PetscMalloc2(size,PetscInt,&lowner,nsends+1,PetscInt,&start);
1853:   count = 0; from->starts[0] = start[0] = 0;
1854:   for (i=0; i<size; i++) {
1855:     if (nprocs[i]) {
1856:       lowner[i]            = count;
1857:       from->procs[count++] = i;
1858:       from->starts[count]  = start[count] = start[count-1] + nprocs[i];
1859:     }
1860:   }

1862:   for (i=0; i<nx; i++) {
1863:     if (owner[i] != rank) {
1864:       from->indices[start[lowner[owner[i]]]++] = bs*inidy[i];
1865:       if (bs*inidy[i] >= lengthy) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Scattering past end of TO vector");
1866:     }
1867:   }
1868:   PetscFree2(lowner,start);
1869:   PetscFree2(nprocs,owner);

1871:   /* wait on sends */
1872:   if (nsends) {
1873:     PetscMalloc(nsends*sizeof(MPI_Status),&send_status);
1874:     MPI_Waitall(nsends,send_waits,send_status);
1875:     PetscFree(send_status);
1876:   }
1877:   PetscFree3(svalues,send_waits,starts);

1879:   if (nprocslocal) {
1880:     PetscInt nt = from->local.n = to->local.n = nprocslocal;
1881:     /* we have a scatter to ourselves */
1882:     PetscMalloc(nt*sizeof(PetscInt),&to->local.vslots);
1883:     PetscMalloc(nt*sizeof(PetscInt),&from->local.vslots);
1884:     nt   = 0;
1885:     for (i=0; i<nx; i++) {
1886:       idx = bs*inidx[i];
1887:       if (idx >= owners[rank] && idx < owners[rank+1]) {
1888:         to->local.vslots[nt]     = idx - owners[rank];
1889:         from->local.vslots[nt++] = bs*inidy[i];
1890:         if (bs*inidy[i] >= lengthy) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Scattering past end of TO vector");
1891:       }
1892:     }
1893:   } else {
1894:     from->local.n      = 0;
1895:     from->local.vslots = 0;
1896:     to->local.n        = 0;
1897:     to->local.vslots   = 0;
1898:   }

1900:   from->local.nonmatching_computed = PETSC_FALSE;
1901:   from->local.n_nonmatching        = 0;
1902:   from->local.slots_nonmatching    = 0;
1903:   to->local.nonmatching_computed   = PETSC_FALSE;
1904:   to->local.n_nonmatching          = 0;
1905:   to->local.slots_nonmatching      = 0;

1907:   from->type = VEC_SCATTER_MPI_GENERAL;
1908:   to->type   = VEC_SCATTER_MPI_GENERAL;
1909:   from->bs   = bs;
1910:   to->bs     = bs;

1912:   VecScatterCreateCommon_PtoS(from,to,ctx);
1913:   return(0);
1914: }

1916: /*
1917:    bs indicates how many elements there are in each block. Normally this would be 1.
1918: */
1921: PetscErrorCode VecScatterCreateCommon_PtoS(VecScatter_MPI_General *from,VecScatter_MPI_General *to,VecScatter ctx)
1922: {
1923:   MPI_Comm       comm;
1924:   PetscMPIInt    tag  = ((PetscObject)ctx)->tag, tagr;
1925:   PetscInt       bs   = to->bs;
1926:   PetscMPIInt    size;
1927:   PetscInt       i, n;

1931:   PetscObjectGetComm((PetscObject)ctx,&comm);
1932:   PetscObjectGetNewTag((PetscObject)ctx,&tagr);
1933:   ctx->destroy = VecScatterDestroy_PtoP;

1935:   ctx->reproduce = PETSC_FALSE;
1936:   to->sendfirst  = PETSC_FALSE;
1937:   PetscOptionsGetBool(NULL,"-vecscatter_reproduce",&ctx->reproduce,NULL);
1938:   PetscOptionsGetBool(NULL,"-vecscatter_sendfirst",&to->sendfirst,NULL);
1939:   from->sendfirst = to->sendfirst;

1941:   MPI_Comm_size(comm,&size);
1942:   /* check if the receives are ALL going into contiguous locations; if so can skip indexing */
1943:   to->contiq = PETSC_FALSE;
1944:   n = from->starts[from->n];
1945:   from->contiq = PETSC_TRUE;
1946:   for (i=1; i<n; i++) {
1947:     if (from->indices[i] != from->indices[i-1] + bs) {
1948:       from->contiq = PETSC_FALSE;
1949:       break;
1950:     }
1951:   }

1953:   to->use_alltoallv = PETSC_FALSE;
1954:   PetscOptionsGetBool(NULL,"-vecscatter_alltoall",&to->use_alltoallv,NULL);
1955:   from->use_alltoallv = to->use_alltoallv;
1956:   if (from->use_alltoallv) PetscInfo(ctx,"Using MPI_Alltoallv() for scatter\n");
1957: #if defined(PETSC_HAVE_MPI_ALLTOALLW)  && !defined(PETSC_USE_64BIT_INDICES)
1958:   if (to->use_alltoallv) {
1959:     to->use_alltoallw = PETSC_FALSE;
1960:     PetscOptionsGetBool(NULL,"-vecscatter_nopack",&to->use_alltoallw,NULL);
1961:   }
1962:   from->use_alltoallw = to->use_alltoallw;
1963:   if (from->use_alltoallw) PetscInfo(ctx,"Using MPI_Alltoallw() for scatter\n");
1964: #endif

1966: #if defined(PETSC_HAVE_MPI_WIN_CREATE)
1967:   to->use_window = PETSC_FALSE;
1968:   PetscOptionsGetBool(NULL,"-vecscatter_window",&to->use_window,NULL);
1969:   from->use_window = to->use_window;
1970: #endif

1972:   if (to->use_alltoallv) {

1974:     PetscMalloc2(size,PetscMPIInt,&to->counts,size,PetscMPIInt,&to->displs);
1975:     PetscMemzero(to->counts,size*sizeof(PetscMPIInt));
1976:     for (i=0; i<to->n; i++) to->counts[to->procs[i]] = bs*(to->starts[i+1] - to->starts[i]);

1978:     to->displs[0] = 0;
1979:     for (i=1; i<size; i++) to->displs[i] = to->displs[i-1] + to->counts[i-1];

1981:     PetscMalloc2(size,PetscMPIInt,&from->counts,size,PetscMPIInt,&from->displs);
1982:     PetscMemzero(from->counts,size*sizeof(PetscMPIInt));
1983:     for (i=0; i<from->n; i++) from->counts[from->procs[i]] = bs*(from->starts[i+1] - from->starts[i]);
1984:     from->displs[0] = 0;
1985:     for (i=1; i<size; i++) from->displs[i] = from->displs[i-1] + from->counts[i-1];

1987: #if defined(PETSC_HAVE_MPI_ALLTOALLW) && !defined(PETSC_USE_64BIT_INDICES)
1988:     if (to->use_alltoallw) {
1989:       PetscMPIInt mpibs, mpilen;

1991:       ctx->packtogether = PETSC_FALSE;
1992:       PetscMPIIntCast(bs,&mpibs);
1993:       PetscMalloc3(size,PetscMPIInt,&to->wcounts,size,PetscMPIInt,&to->wdispls,size,MPI_Datatype,&to->types);
1994:       PetscMemzero(to->wcounts,size*sizeof(PetscMPIInt));
1995:       PetscMemzero(to->wdispls,size*sizeof(PetscMPIInt));
1996:       for (i=0; i<size; i++) to->types[i] = MPIU_SCALAR;

1998:       for (i=0; i<to->n; i++) {
1999:         to->wcounts[to->procs[i]] = 1;
2000:         PetscMPIIntCast(to->starts[i+1]-to->starts[i],&mpilen);
2001:         MPI_Type_create_indexed_block(mpilen,mpibs,to->indices+to->starts[i],MPIU_SCALAR,to->types+to->procs[i]);
2002:         MPI_Type_commit(to->types+to->procs[i]);
2003:       }
2004:       PetscMalloc3(size,PetscMPIInt,&from->wcounts,size,PetscMPIInt,&from->wdispls,size,MPI_Datatype,&from->types);
2005:       PetscMemzero(from->wcounts,size*sizeof(PetscMPIInt));
2006:       PetscMemzero(from->wdispls,size*sizeof(PetscMPIInt));
2007:       for (i=0; i<size; i++) from->types[i] = MPIU_SCALAR;

2009:       if (from->contiq) {
2010:         PetscInfo(ctx,"Scattered vector entries are stored contiquously, taking advantage of this with -vecscatter_alltoall\n");
2011:         for (i=0; i<from->n; i++) from->wcounts[from->procs[i]] = bs*(from->starts[i+1] - from->starts[i]);

2013:         if (from->n) from->wdispls[from->procs[0]] = sizeof(PetscScalar)*from->indices[0];
2014:         for (i=1; i<from->n; i++) from->wdispls[from->procs[i]] = from->wdispls[from->procs[i-1]] + sizeof(PetscScalar)*from->wcounts[from->procs[i-1]];

2016:       } else {
2017:         for (i=0; i<from->n; i++) {
2018:           from->wcounts[from->procs[i]] = 1;
2019:           PetscMPIIntCast(from->starts[i+1]-from->starts[i],&mpilen);
2020:           MPI_Type_create_indexed_block(mpilen,mpibs,from->indices+from->starts[i],MPIU_SCALAR,from->types+from->procs[i]);
2021:           MPI_Type_commit(from->types+from->procs[i]);
2022:         }
2023:       }
2024:     } else ctx->copy = VecScatterCopy_PtoP_AllToAll;

2026: #else
2027:     to->use_alltoallw   = PETSC_FALSE;
2028:     from->use_alltoallw = PETSC_FALSE;
2029:     ctx->copy           = VecScatterCopy_PtoP_AllToAll;
2030: #endif
2031: #if defined(PETSC_HAVE_MPI_WIN_CREATE)
2032:   } else if (to->use_window) {
2033:     PetscMPIInt temptag,winsize;
2034:     MPI_Request *request;
2035:     MPI_Status  *status;

2037:     PetscObjectGetNewTag((PetscObject)ctx,&temptag);
2038:     winsize = (to->n ? to->starts[to->n] : 0)*bs*sizeof(PetscScalar);
2039:     MPI_Win_create(to->values ? to->values : MPI_BOTTOM,winsize,sizeof(PetscScalar),MPI_INFO_NULL,comm,&to->window);
2040:     PetscMalloc(to->n*sizeof(PetscInt),&to->winstarts);
2041:     PetscMalloc2(to->n,MPI_Request,&request,to->n,MPI_Status,&status);
2042:     for (i=0; i<to->n; i++) {
2043:       MPI_Irecv(to->winstarts+i,1,MPIU_INT,to->procs[i],temptag,comm,request+i);
2044:     }
2045:     for (i=0; i<from->n; i++) {
2046:       MPI_Send(from->starts+i,1,MPIU_INT,from->procs[i],temptag,comm);
2047:     }
2048:     MPI_Waitall(to->n,request,status);
2049:     PetscFree2(request,status);

2051:     winsize = (from->n ? from->starts[from->n] : 0)*bs*sizeof(PetscScalar);
2052:     MPI_Win_create(from->values ? from->values : MPI_BOTTOM,winsize,sizeof(PetscScalar),MPI_INFO_NULL,comm,&from->window);
2053:     PetscMalloc(from->n*sizeof(PetscInt),&from->winstarts);
2054:     PetscMalloc2(from->n,MPI_Request,&request,from->n,MPI_Status,&status);
2055:     for (i=0; i<from->n; i++) {
2056:       MPI_Irecv(from->winstarts+i,1,MPIU_INT,from->procs[i],temptag,comm,request+i);
2057:     }
2058:     for (i=0; i<to->n; i++) {
2059:       MPI_Send(to->starts+i,1,MPIU_INT,to->procs[i],temptag,comm);
2060:     }
2061:     MPI_Waitall(from->n,request,status);
2062:     PetscFree2(request,status);
2063: #endif
2064:   } else {
2065:     PetscBool   use_rsend = PETSC_FALSE, use_ssend = PETSC_FALSE;
2066:     PetscInt    *sstarts  = to->starts,  *rstarts = from->starts;
2067:     PetscMPIInt *sprocs   = to->procs,   *rprocs  = from->procs;
2068:     MPI_Request *swaits   = to->requests,*rwaits  = from->requests;
2069:     MPI_Request *rev_swaits,*rev_rwaits;
2070:     PetscScalar *Ssvalues = to->values, *Srvalues = from->values;

2072:     /* allocate additional wait variables for the "reverse" scatter */
2073:     PetscMalloc(to->n*sizeof(MPI_Request),&rev_rwaits);
2074:     PetscMalloc(from->n*sizeof(MPI_Request),&rev_swaits);
2075:     to->rev_requests   = rev_rwaits;
2076:     from->rev_requests = rev_swaits;

2078:     /* Register the receives that you will use later (sends for scatter reverse) */
2079:     PetscOptionsGetBool(NULL,"-vecscatter_rsend",&use_rsend,NULL);
2080:     PetscOptionsGetBool(NULL,"-vecscatter_ssend",&use_ssend,NULL);
2081:     if (use_rsend) {
2082:       PetscInfo(ctx,"Using VecScatter ready receiver mode\n");
2083:       to->use_readyreceiver   = PETSC_TRUE;
2084:       from->use_readyreceiver = PETSC_TRUE;
2085:     } else {
2086:       to->use_readyreceiver   = PETSC_FALSE;
2087:       from->use_readyreceiver = PETSC_FALSE;
2088:     }
2089:     if (use_ssend) {
2090:       PetscInfo(ctx,"Using VecScatter Ssend mode\n");
2091:     }

2093:     for (i=0; i<from->n; i++) {
2094:       if (use_rsend) {
2095:         MPI_Rsend_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tagr,comm,rev_swaits+i);
2096:       } else if (use_ssend) {
2097:         MPI_Ssend_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tagr,comm,rev_swaits+i);
2098:       } else {
2099:         MPI_Send_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tagr,comm,rev_swaits+i);
2100:       }
2101:     }

2103:     for (i=0; i<to->n; i++) {
2104:       if (use_rsend) {
2105:         MPI_Rsend_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
2106:       } else if (use_ssend) {
2107:         MPI_Ssend_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
2108:       } else {
2109:         MPI_Send_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
2110:       }
2111:     }
2112:     /* Register receives for scatter and reverse */
2113:     for (i=0; i<from->n; i++) {
2114:       MPI_Recv_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);
2115:     }
2116:     for (i=0; i<to->n; i++) {
2117:       MPI_Recv_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tagr,comm,rev_rwaits+i);
2118:     }
2119:     if (use_rsend) {
2120:       if (to->n)   {MPI_Startall_irecv(to->starts[to->n]*to->bs,to->n,to->rev_requests);}
2121:       if (from->n) {MPI_Startall_irecv(from->starts[from->n]*from->bs,from->n,from->requests);}
2122:       MPI_Barrier(comm);
2123:     }

2125:     ctx->copy = VecScatterCopy_PtoP_X;
2126:   }
2127:   PetscInfo1(ctx,"Using blocksize %D scatter\n",bs);

2129: #if defined(PETSC_USE_DEBUG)
2130:   MPI_Allreduce(&bs,&i,1,MPIU_INT,MPI_MIN,PetscObjectComm((PetscObject)ctx));
2131:   MPI_Allreduce(&bs,&n,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)ctx));
2132:   if (bs!=i || bs!=n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Blocks size %D != %D or %D",bs,i,n);
2133: #endif

2135:   switch (bs) {
2136:   case 12:
2137:     ctx->begin = VecScatterBegin_12;
2138:     ctx->end   = VecScatterEnd_12;
2139:     break;
2140:   case 8:
2141:     ctx->begin = VecScatterBegin_8;
2142:     ctx->end   = VecScatterEnd_8;
2143:     break;
2144:   case 7:
2145:     ctx->begin = VecScatterBegin_7;
2146:     ctx->end   = VecScatterEnd_7;
2147:     break;
2148:   case 6:
2149:     ctx->begin = VecScatterBegin_6;
2150:     ctx->end   = VecScatterEnd_6;
2151:     break;
2152:   case 5:
2153:     ctx->begin = VecScatterBegin_5;
2154:     ctx->end   = VecScatterEnd_5;
2155:     break;
2156:   case 4:
2157:     ctx->begin = VecScatterBegin_4;
2158:     ctx->end   = VecScatterEnd_4;
2159:     break;
2160:   case 3:
2161:     ctx->begin = VecScatterBegin_3;
2162:     ctx->end   = VecScatterEnd_3;
2163:     break;
2164:   case 2:
2165:     ctx->begin = VecScatterBegin_2;
2166:     ctx->end   = VecScatterEnd_2;
2167:     break;
2168:   case 1:
2169:     ctx->begin = VecScatterBegin_1;
2170:     ctx->end   = VecScatterEnd_1;
2171:     break;
2172:   default:
2173:     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Blocksize not supported");
2174:   }
2175:   ctx->view = VecScatterView_MPI;
2176:   /* Check if the local scatter is actually a copy; important special case */
2177:   if (to->local.n) {
2178:     VecScatterLocalOptimizeCopy_Private(ctx,&to->local,&from->local,bs);
2179:   }
2180:   return(0);
2181: }



2185: /* ------------------------------------------------------------------------------------*/
2186: /*
2187:          Scatter from local Seq vectors to a parallel vector.
2188:          Reverses the order of the arguments, calls VecScatterCreate_PtoS() then
2189:          reverses the result.
2190: */
2193: PetscErrorCode VecScatterCreate_StoP(PetscInt nx,const PetscInt *inidx,PetscInt ny,const PetscInt *inidy,Vec xin,Vec yin,PetscInt bs,VecScatter ctx)
2194: {
2195:   PetscErrorCode         ierr;
2196:   MPI_Request            *waits;
2197:   VecScatter_MPI_General *to,*from;

2200:   VecScatterCreate_PtoS(ny,inidy,nx,inidx,yin,xin,bs,ctx);
2201:   to            = (VecScatter_MPI_General*)ctx->fromdata;
2202:   from          = (VecScatter_MPI_General*)ctx->todata;
2203:   ctx->todata   = (void*)to;
2204:   ctx->fromdata = (void*)from;
2205:   /* these two are special, they are ALWAYS stored in to struct */
2206:   to->sstatus   = from->sstatus;
2207:   to->rstatus   = from->rstatus;

2209:   from->sstatus = 0;
2210:   from->rstatus = 0;

2212:   waits              = from->rev_requests;
2213:   from->rev_requests = from->requests;
2214:   from->requests     = waits;
2215:   waits              = to->rev_requests;
2216:   to->rev_requests   = to->requests;
2217:   to->requests       = waits;
2218:   return(0);
2219: }

2221: /* ---------------------------------------------------------------------------------*/
2224: PetscErrorCode VecScatterCreate_PtoP(PetscInt nx,const PetscInt *inidx,PetscInt ny,const PetscInt *inidy,Vec xin,Vec yin,PetscInt bs,VecScatter ctx)
2225: {
2227:   PetscMPIInt    size,rank,tag,imdex,n;
2228:   PetscInt       *owners = xin->map->range;
2229:   PetscMPIInt    *nprocs = NULL;
2230:   PetscInt       i,j,idx,nsends,*local_inidx = NULL,*local_inidy = NULL;
2231:   PetscInt       *owner   = NULL,*starts  = NULL,count,slen;
2232:   PetscInt       *rvalues = NULL,*svalues = NULL,base,*values = NULL,*rsvalues,recvtotal,lastidx;
2233:   PetscMPIInt    *onodes1,*olengths1,nrecvs;
2234:   MPI_Comm       comm;
2235:   MPI_Request    *send_waits = NULL,*recv_waits = NULL;
2236:   MPI_Status     recv_status,*send_status = NULL;
2237:   PetscBool      duplicate = PETSC_FALSE;
2238: #if defined(PETSC_USE_DEBUG)
2239:   PetscBool      found = PETSC_FALSE;
2240: #endif

2243:   PetscObjectGetNewTag((PetscObject)ctx,&tag);
2244:   PetscObjectGetComm((PetscObject)xin,&comm);
2245:   MPI_Comm_size(comm,&size);
2246:   MPI_Comm_rank(comm,&rank);
2247:   if (size == 1) {
2248:     VecScatterCreate_StoP(nx,inidx,ny,inidy,xin,yin,bs,ctx);
2249:     return(0);
2250:   }

2252:   /*
2253:      Each processor ships off its inidx[j] and inidy[j] to the appropriate processor
2254:      They then call the StoPScatterCreate()
2255:   */
2256:   /*  first count number of contributors to each processor */
2257:   PetscMalloc3(size,PetscMPIInt,&nprocs,nx,PetscInt,&owner,(size+1),PetscInt,&starts);
2258:   PetscMemzero(nprocs,size*sizeof(PetscMPIInt));

2260:   lastidx = -1;
2261:   j       = 0;
2262:   for (i=0; i<nx; i++) {
2263:     /* if indices are NOT locally sorted, need to start search at the beginning */
2264:     if (lastidx > (idx = bs*inidx[i])) j = 0;
2265:     lastidx = idx;
2266:     for (; j<size; j++) {
2267:       if (idx >= owners[j] && idx < owners[j+1]) {
2268:         nprocs[j]++;
2269:         owner[i] = j;
2270: #if defined(PETSC_USE_DEBUG)
2271:         found = PETSC_TRUE;
2272: #endif
2273:         break;
2274:       }
2275:     }
2276: #if defined(PETSC_USE_DEBUG)
2277:     if (!found) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index %D out of range",idx);
2278:     found = PETSC_FALSE;
2279: #endif
2280:   }
2281:   nsends = 0;
2282:   for (i=0; i<size; i++) nsends += (nprocs[i] > 0);

2284:   /* inform other processors of number of messages and max length*/
2285:   PetscGatherNumberOfMessages(comm,NULL,nprocs,&nrecvs);
2286:   PetscGatherMessageLengths(comm,nsends,nrecvs,nprocs,&onodes1,&olengths1);
2287:   PetscSortMPIIntWithArray(nrecvs,onodes1,olengths1);
2288:   recvtotal = 0; for (i=0; i<nrecvs; i++) recvtotal += olengths1[i];

2290:   /* post receives:   */
2291:   PetscMalloc5(2*recvtotal,PetscInt,&rvalues,2*nx,PetscInt,&svalues,nrecvs,MPI_Request,&recv_waits,nsends,MPI_Request,&send_waits,nsends,MPI_Status,&send_status);

2293:   count = 0;
2294:   for (i=0; i<nrecvs; i++) {
2295:     MPI_Irecv((rvalues+2*count),2*olengths1[i],MPIU_INT,onodes1[i],tag,comm,recv_waits+i);
2296:     count += olengths1[i];
2297:   }
2298:   PetscFree(onodes1);

2300:   /* do sends:
2301:       1) starts[i] gives the starting index in svalues for stuff going to
2302:          the ith processor
2303:   */
2304:   starts[0]= 0;
2305:   for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[i-1];
2306:   for (i=0; i<nx; i++) {
2307:     svalues[2*starts[owner[i]]]       = bs*inidx[i];
2308:     svalues[1 + 2*starts[owner[i]]++] = bs*inidy[i];
2309:   }

2311:   starts[0] = 0;
2312:   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[i-1];
2313:   count = 0;
2314:   for (i=0; i<size; i++) {
2315:     if (nprocs[i]) {
2316:       MPI_Isend(svalues+2*starts[i],2*nprocs[i],MPIU_INT,i,tag,comm,send_waits+count);
2317:       count++;
2318:     }
2319:   }
2320:   PetscFree3(nprocs,owner,starts);

2322:   /*  wait on receives */
2323:   count = nrecvs;
2324:   slen  = 0;
2325:   while (count) {
2326:     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
2327:     /* unpack receives into our local space */
2328:     MPI_Get_count(&recv_status,MPIU_INT,&n);
2329:     slen += n/2;
2330:     count--;
2331:   }
2332:   if (slen != recvtotal) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Total message lengths %D not as expected %D",slen,recvtotal);

2334:   PetscMalloc2(slen,PetscInt,&local_inidx,slen,PetscInt,&local_inidy);
2335:   base     = owners[rank];
2336:   count    = 0;
2337:   rsvalues = rvalues;
2338:   for (i=0; i<nrecvs; i++) {
2339:     values    = rsvalues;
2340:     rsvalues += 2*olengths1[i];
2341:     for (j=0; j<olengths1[i]; j++) {
2342:       local_inidx[count]   = values[2*j] - base;
2343:       local_inidy[count++] = values[2*j+1];
2344:     }
2345:   }
2346:   PetscFree(olengths1);

2348:   /* wait on sends */
2349:   if (nsends) {MPI_Waitall(nsends,send_waits,send_status);}
2350:   PetscFree5(rvalues,svalues,recv_waits,send_waits,send_status);

2352:   /*
2353:      should sort and remove duplicates from local_inidx,local_inidy
2354:   */

2356: #if defined(do_it_slow)
2357:   /* sort on the from index */
2358:   PetscSortIntWithArray(slen,local_inidx,local_inidy);
2359:   start = 0;
2360:   while (start < slen) {
2361:     count = start+1;
2362:     last  = local_inidx[start];
2363:     while (count < slen && last == local_inidx[count]) count++;
2364:     if (count > start + 1) { /* found 2 or more same local_inidx[] in a row */
2365:       /* sort on to index */
2366:       PetscSortInt(count-start,local_inidy+start);
2367:     }
2368:     /* remove duplicates; not most efficient way, but probably good enough */
2369:     i = start;
2370:     while (i < count-1) {
2371:       if (local_inidy[i] != local_inidy[i+1]) i++;
2372:       else { /* found a duplicate */
2373:         duplicate = PETSC_TRUE;
2374:         for (j=i; j<slen-1; j++) {
2375:           local_inidx[j] = local_inidx[j+1];
2376:           local_inidy[j] = local_inidy[j+1];
2377:         }
2378:         slen--;
2379:         count--;
2380:       }
2381:     }
2382:     start = count;
2383:   }
2384: #endif
2385:   if (duplicate) {
2386:     PetscInfo(ctx,"Duplicate from to indices passed in VecScatterCreate(), they are ignored\n");
2387:   }
2388:   VecScatterCreate_StoP(slen,local_inidx,slen,local_inidy,xin,yin,bs,ctx);
2389:   PetscFree2(local_inidx,local_inidy);
2390:   return(0);
2391: }