Actual source code: vpscat.c

  1: #define PETSCVEC_DLL

  3: /*
  4:     Defines parallel vector scatters.
  5: */

 7:  #include private/isimpl.h
 8:  #include private/vecimpl.h
 9:  #include ../src/vec/vec/impls/dvecimpl.h
 10:  #include ../src/vec/vec/impls/mpi/pvecimpl.h

 14: PetscErrorCode VecScatterView_MPI(VecScatter ctx,PetscViewer viewer)
 15: {
 16:   VecScatter_MPI_General *to=(VecScatter_MPI_General*)ctx->todata;
 17:   VecScatter_MPI_General *from=(VecScatter_MPI_General*)ctx->fromdata;
 18:   PetscErrorCode         ierr;
 19:   PetscInt               i;
 20:   PetscMPIInt            rank;
 21:   PetscViewerFormat      format;
 22:   PetscTruth             iascii;

 25:   PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);
 26:   if (iascii) {
 27:     MPI_Comm_rank(((PetscObject)ctx)->comm,&rank);
 28:     PetscViewerGetFormat(viewer,&format);
 29:     if (format ==  PETSC_VIEWER_ASCII_INFO) {
 30:       PetscInt nsend_max,nrecv_max,lensend_max,lenrecv_max,alldata,itmp;

 32:       MPI_Reduce(&to->n,&nsend_max,1,MPIU_INT,MPI_MAX,0,((PetscObject)ctx)->comm);
 33:       MPI_Reduce(&from->n,&nrecv_max,1,MPIU_INT,MPI_MAX,0,((PetscObject)ctx)->comm);
 34:       itmp = to->starts[to->n+1];
 35:       MPI_Reduce(&itmp,&lensend_max,1,MPIU_INT,MPI_MAX,0,((PetscObject)ctx)->comm);
 36:       itmp = from->starts[from->n+1];
 37:       MPI_Reduce(&itmp,&lenrecv_max,1,MPIU_INT,MPI_MAX,0,((PetscObject)ctx)->comm);
 38:       MPI_Reduce(&itmp,&alldata,1,MPIU_INT,MPI_SUM,0,((PetscObject)ctx)->comm);

 40:       PetscViewerASCIIPrintf(viewer,"VecScatter statistics\n");
 41:       PetscViewerASCIIPrintf(viewer,"  Maximum number sends %D\n",nsend_max);
 42:       PetscViewerASCIIPrintf(viewer,"  Maximum number receives %D\n",nrecv_max);
 43:       PetscViewerASCIIPrintf(viewer,"  Maximum data sent %D\n",(int)(lensend_max*to->bs*sizeof(PetscScalar)));
 44:       PetscViewerASCIIPrintf(viewer,"  Maximum data received %D\n",(int)(lenrecv_max*to->bs*sizeof(PetscScalar)));
 45:       PetscViewerASCIIPrintf(viewer,"  Total data sent %D\n",(int)(alldata*to->bs*sizeof(PetscScalar)));

 47:     } else {
 48:       PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number sends = %D; Number to self = %D\n",rank,to->n,to->local.n);
 49:       if (to->n) {
 50:         for (i=0; i<to->n; i++){
 51:           PetscViewerASCIISynchronizedPrintf(viewer,"[%d]   %D length = %D to whom %D\n",rank,i,to->starts[i+1]-to->starts[i],to->procs[i]);
 52:         }
 53:         PetscViewerASCIISynchronizedPrintf(viewer,"Now the indices for all remote sends (in order by process sent to)\n");
 54:         for (i=0; i<to->starts[to->n]; i++){
 55:           PetscViewerASCIISynchronizedPrintf(viewer,"[%d]%D \n",rank,to->indices[i]);
 56:         }
 57:       }

 59:       PetscViewerASCIISynchronizedPrintf(viewer,"[%d]Number receives = %D; Number from self = %D\n",rank,from->n,from->local.n);
 60:       if (from->n) {
 61:         for (i=0; i<from->n; i++){
 62:           PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D length %D from whom %D\n",rank,i,from->starts[i+1]-from->starts[i],from->procs[i]);
 63:         }

 65:         PetscViewerASCIISynchronizedPrintf(viewer,"Now the indices for all remote receives (in order by process received from)\n");
 66:         for (i=0; i<from->starts[from->n]; i++){
 67:           PetscViewerASCIISynchronizedPrintf(viewer,"[%d]%D \n",rank,from->indices[i]);
 68:         }
 69:       }
 70:       if (to->local.n) {
 71:         PetscViewerASCIISynchronizedPrintf(viewer,"[%d]Indices for local part of scatter\n",rank);
 72:         for (i=0; i<to->local.n; i++){
 73:           PetscViewerASCIISynchronizedPrintf(viewer,"[%d]From %D to %D \n",rank,from->local.vslots[i],to->local.vslots[i]);
 74:         }
 75:       }

 77:       PetscViewerFlush(viewer);
 78:     }
 79:   } else {
 80:     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported for this scatter",((PetscObject)viewer)->type_name);
 81:   }
 82:   return(0);
 83: }

 85: /* -----------------------------------------------------------------------------------*/
 86: /*
 87:       The next routine determines what part of  the local part of the scatter is an
 88:   exact copy of values into their current location. We check this here and
 89:   then know that we need not perform that portion of the scatter when the vector is
 90:   scattering to itself with INSERT_VALUES.

 92:      This is currently not used but would speed up, for example DALocalToLocalBegin/End()

 94: */
 97: PetscErrorCode VecScatterLocalOptimize_Private(VecScatter scatter,VecScatter_Seq_General *to,VecScatter_Seq_General *from)
 98: {
 99:   PetscInt       n = to->n,n_nonmatching = 0,i,*to_slots = to->vslots,*from_slots = from->vslots;
101:   PetscInt       *nto_slots,*nfrom_slots,j = 0;
102: 
104:   for (i=0; i<n; i++) {
105:     if (to_slots[i] != from_slots[i]) n_nonmatching++;
106:   }

108:   if (!n_nonmatching) {
109:     to->nonmatching_computed = PETSC_TRUE;
110:     to->n_nonmatching        = from->n_nonmatching = 0;
111:     PetscInfo1(scatter,"Reduced %D to 0\n", n);
112:   } else if (n_nonmatching == n) {
113:     to->nonmatching_computed = PETSC_FALSE;
114:     PetscInfo(scatter,"All values non-matching\n");
115:   } else {
116:     to->nonmatching_computed= PETSC_TRUE;
117:     to->n_nonmatching       = from->n_nonmatching = n_nonmatching;
118:     PetscMalloc(n_nonmatching*sizeof(PetscInt),&nto_slots);
119:     PetscMalloc(n_nonmatching*sizeof(PetscInt),&nfrom_slots);
120:     to->slots_nonmatching   = nto_slots;
121:     from->slots_nonmatching = nfrom_slots;
122:     for (i=0; i<n; i++) {
123:       if (to_slots[i] != from_slots[i]) {
124:         nto_slots[j]   = to_slots[i];
125:         nfrom_slots[j] = from_slots[i];
126:         j++;
127:       }
128:     }
129:     PetscInfo2(scatter,"Reduced %D to %D\n",n,n_nonmatching);
130:   }
131:   return(0);
132: }

134: /* --------------------------------------------------------------------------------------*/

136: /* -------------------------------------------------------------------------------------*/
139: PetscErrorCode VecScatterDestroy_PtoP(VecScatter ctx)
140: {
141:   VecScatter_MPI_General *to   = (VecScatter_MPI_General*)ctx->todata;
142:   VecScatter_MPI_General *from = (VecScatter_MPI_General*)ctx->fromdata;
143:   PetscErrorCode         ierr;
144:   PetscInt               i;

147:   if (to->use_readyreceiver) {
148:     /*
149:        Since we have already posted sends we must cancel them before freeing 
150:        the requests
151:     */
152:     for (i=0; i<from->n; i++) {
153:       MPI_Cancel(from->requests+i);
154:     }
155:     for (i=0; i<to->n; i++) {
156:       MPI_Cancel(to->rev_requests+i);
157:     }
158:     MPI_Waitall(from->n,from->requests,to->rstatus);
159:     MPI_Waitall(to->n,to->rev_requests,to->rstatus);
160:   }

162: #if defined(PETSC_HAVE_MPI_ALLTOALLW) && !defined(PETSC_USE_64BIT_INDICES)
163:   if (to->use_alltoallw) {
164:     PetscFree3(to->wcounts,to->wdispls,to->types);
165:     PetscFree3(from->wcounts,from->wdispls,from->types);
166:   }
167: #endif

169: #if defined(PETSC_HAVE_MPI_WIN_CREATE)
170:   if (to->use_window) {
171:     MPI_Win_free(&from->window);
172:     MPI_Win_free(&to->window);
173:     PetscFree(from->winstarts);
174:     PetscFree(to->winstarts);
175:   }
176: #endif

178:   if (to->use_alltoallv) {
179:     PetscFree2(to->counts,to->displs);
180:     PetscFree2(from->counts,from->displs);
181:   }

183:   /* release MPI resources obtained with MPI_Send_init() and MPI_Recv_init() */
184:   /* 
185:      IBM's PE version of MPI has a bug where freeing these guys will screw up later
186:      message passing.
187:   */
188: #if !defined(PETSC_HAVE_BROKEN_REQUEST_FREE)
189:   if (!to->use_alltoallv && !to->use_window) {   /* currently the to->requests etc are ALWAYS allocated even if not used */
190:     if (to->requests) {
191:       for (i=0; i<to->n; i++) {
192:         MPI_Request_free(to->requests + i);
193:       }
194:     }
195:     if (to->rev_requests) {
196:       for (i=0; i<to->n; i++) {
197:         MPI_Request_free(to->rev_requests + i);
198:       }
199:     }
200:   }
201:   /*
202:       MPICH could not properly cancel requests thus with ready receiver mode we
203:     cannot free the requests. It may be fixed now, if not then put the following 
204:     code inside a if !to->use_readyreceiver) {
205:   */
206:   if (!to->use_alltoallv && !to->use_window) {    /* currently the from->requests etc are ALWAYS allocated even if not used */
207:     if (from->requests) {
208:       for (i=0; i<from->n; i++) {
209:         MPI_Request_free(from->requests + i);
210:       }
211:     }

213:     if (from->rev_requests) {
214:       for (i=0; i<from->n; i++) {
215:         MPI_Request_free(from->rev_requests + i);
216:       }
217:     }
218:   }
219: #endif

221:   PetscFree(to->local.vslots);
222:   PetscFree(from->local.vslots);
223:   PetscFree2(to->counts,to->displs);
224:   PetscFree2(from->counts,from->displs);
225:   PetscFree(to->local.slots_nonmatching);
226:   PetscFree(from->local.slots_nonmatching);
227:   PetscFree(to->rev_requests);
228:   PetscFree(from->rev_requests);
229:   PetscFree(to->requests);
230:   PetscFree(from->requests);
231:   PetscFree4(to->values,to->indices,to->starts,to->procs);
232:   PetscFree2(to->sstatus,to->rstatus);
233:   PetscFree4(from->values,from->indices,from->starts,from->procs);
234:   PetscFree(from);
235:   PetscFree(to);
236:   PetscHeaderDestroy(ctx);
237:   return(0);
238: }



242: /* --------------------------------------------------------------------------------------*/
243: /*
244:     Special optimization to see if the local part of the scatter is actually 
245:     a copy. The scatter routines call PetscMemcpy() instead.
246:  
247: */
250: PetscErrorCode VecScatterLocalOptimizeCopy_Private(VecScatter scatter,VecScatter_Seq_General *to,VecScatter_Seq_General *from,PetscInt bs)
251: {
252:   PetscInt       n = to->n,i,*to_slots = to->vslots,*from_slots = from->vslots;
253:   PetscInt       to_start,from_start;

257:   to_start   = to_slots[0];
258:   from_start = from_slots[0];

260:   for (i=1; i<n; i++) {
261:     to_start   += bs;
262:     from_start += bs;
263:     if (to_slots[i]   != to_start)   return(0);
264:     if (from_slots[i] != from_start) return(0);
265:   }
266:   to->is_copy       = PETSC_TRUE;
267:   to->copy_start    = to_slots[0];
268:   to->copy_length   = bs*sizeof(PetscScalar)*n;
269:   from->is_copy     = PETSC_TRUE;
270:   from->copy_start  = from_slots[0];
271:   from->copy_length = bs*sizeof(PetscScalar)*n;
272:   PetscInfo(scatter,"Local scatter is a copy, optimizing for it\n");
273:   return(0);
274: }

276: /* --------------------------------------------------------------------------------------*/

280: PetscErrorCode VecScatterCopy_PtoP_X(VecScatter in,VecScatter out)
281: {
282:   VecScatter_MPI_General *in_to   = (VecScatter_MPI_General*)in->todata;
283:   VecScatter_MPI_General *in_from = (VecScatter_MPI_General*)in->fromdata,*out_to,*out_from;
284:   PetscErrorCode         ierr;
285:   PetscInt               ny,bs = in_from->bs;

288:   out->begin     = in->begin;
289:   out->end       = in->end;
290:   out->copy      = in->copy;
291:   out->destroy   = in->destroy;
292:   out->view      = in->view;

294:   /* allocate entire send scatter context */
295:   PetscNewLog(out,VecScatter_MPI_General,&out_to);
296:   PetscNewLog(out,VecScatter_MPI_General,&out_from);

298:   ny                = in_to->starts[in_to->n];
299:   out_to->n         = in_to->n;
300:   out_to->type      = in_to->type;
301:   out_to->sendfirst = in_to->sendfirst;

303:   PetscMalloc(out_to->n*sizeof(MPI_Request),&out_to->requests);
304:   PetscMalloc4(bs*ny,PetscScalar,&out_to->values,ny,PetscInt,&out_to->indices,out_to->n+1,PetscInt,&out_to->starts,out_to->n,PetscMPIInt,&out_to->procs);
305:   PetscMalloc2(PetscMax(in_to->n,in_from->n),MPI_Status,&out_to->sstatus,PetscMax(in_to->n,in_from->n),MPI_Status,&out_to->rstatus);
306:   PetscMemcpy(out_to->indices,in_to->indices,ny*sizeof(PetscInt));
307:   PetscMemcpy(out_to->starts,in_to->starts,(out_to->n+1)*sizeof(PetscInt));
308:   PetscMemcpy(out_to->procs,in_to->procs,(out_to->n)*sizeof(PetscMPIInt));
309: 
310:   out->todata       = (void*)out_to;
311:   out_to->local.n   = in_to->local.n;
312:   out_to->local.nonmatching_computed = PETSC_FALSE;
313:   out_to->local.n_nonmatching        = 0;
314:   out_to->local.slots_nonmatching    = 0;
315:   if (in_to->local.n) {
316:     PetscMalloc(in_to->local.n*sizeof(PetscInt),&out_to->local.vslots);
317:     PetscMalloc(in_from->local.n*sizeof(PetscInt),&out_from->local.vslots);
318:     PetscMemcpy(out_to->local.vslots,in_to->local.vslots,in_to->local.n*sizeof(PetscInt));
319:     PetscMemcpy(out_from->local.vslots,in_from->local.vslots,in_from->local.n*sizeof(PetscInt));
320:   } else {
321:     out_to->local.vslots   = 0;
322:     out_from->local.vslots = 0;
323:   }

325:   /* allocate entire receive context */
326:   out_from->type      = in_from->type;
327:   ny                  = in_from->starts[in_from->n];
328:   out_from->n         = in_from->n;
329:   out_from->sendfirst = in_from->sendfirst;

331:   PetscMalloc(out_from->n*sizeof(MPI_Request),&out_from->requests);
332:   PetscMalloc4(ny*bs,PetscScalar,&out_from->values,ny,PetscInt,&out_from->indices,out_from->n+1,PetscInt,&out_from->starts,out_from->n,PetscMPIInt,&out_from->procs);
333:   PetscMemcpy(out_from->indices,in_from->indices,ny*sizeof(PetscInt));
334:   PetscMemcpy(out_from->starts,in_from->starts,(out_from->n+1)*sizeof(PetscInt));
335:   PetscMemcpy(out_from->procs,in_from->procs,(out_from->n)*sizeof(PetscMPIInt));
336:   out->fromdata       = (void*)out_from;
337:   out_from->local.n   = in_from->local.n;
338:   out_from->local.nonmatching_computed = PETSC_FALSE;
339:   out_from->local.n_nonmatching        = 0;
340:   out_from->local.slots_nonmatching    = 0;

342:   /* 
343:       set up the request arrays for use with isend_init() and irecv_init()
344:   */
345:   {
346:     PetscMPIInt tag;
347:     MPI_Comm    comm;
348:     PetscInt    *sstarts = out_to->starts,  *rstarts = out_from->starts;
349:     PetscMPIInt *sprocs  = out_to->procs,   *rprocs  = out_from->procs;
350:     PetscInt    i;
351:     PetscTruth  flg;
352:     MPI_Request *swaits  = out_to->requests,*rwaits  = out_from->requests;
353:     MPI_Request *rev_swaits,*rev_rwaits;
354:     PetscScalar *Ssvalues = out_to->values, *Srvalues = out_from->values;

356:     PetscMalloc(in_to->n*sizeof(MPI_Request),&out_to->rev_requests);
357:     PetscMalloc(in_from->n*sizeof(MPI_Request),&out_from->rev_requests);

359:     rev_rwaits = out_to->rev_requests;
360:     rev_swaits = out_from->rev_requests;

362:     out_from->bs = out_to->bs = bs;
363:     tag     = ((PetscObject)out)->tag;
364:     comm    = ((PetscObject)out)->comm;

366:     /* Register the receives that you will use later (sends for scatter reverse) */
367:     for (i=0; i<out_from->n; i++) {
368:       MPI_Recv_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);
369:       MPI_Send_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tag,comm,rev_swaits+i);
370:     }

372:     flg  = PETSC_FALSE;
373:     PetscOptionsGetTruth(PETSC_NULL,"-vecscatter_rsend",&flg,PETSC_NULL);
374:     if (flg) {
375:       out_to->use_readyreceiver    = PETSC_TRUE;
376:       out_from->use_readyreceiver  = PETSC_TRUE;
377:       for (i=0; i<out_to->n; i++) {
378:         MPI_Rsend_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
379:       }
380:       if (out_from->n) {MPI_Startall_irecv(out_from->starts[out_from->n]*out_from->bs,out_from->n,out_from->requests);}
381:       MPI_Barrier(comm);
382:       PetscInfo(in,"Using VecScatter ready receiver mode\n");
383:     } else {
384:       out_to->use_readyreceiver    = PETSC_FALSE;
385:       out_from->use_readyreceiver  = PETSC_FALSE;
386:       flg                          = PETSC_FALSE;
387:       PetscOptionsGetTruth(PETSC_NULL,"-vecscatter_ssend",&flg,PETSC_NULL);
388:       if (flg) {
389:         PetscInfo(in,"Using VecScatter Ssend mode\n");
390:       }
391:       for (i=0; i<out_to->n; i++) {
392:         if (!flg) {
393:           MPI_Send_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
394:         } else {
395:           MPI_Ssend_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
396:         }
397:       }
398:     }
399:     /* Register receives for scatter reverse */
400:     for (i=0; i<out_to->n; i++) {
401:       MPI_Recv_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,rev_rwaits+i);
402:     }
403:   }

405:   return(0);
406: }

410: PetscErrorCode VecScatterCopy_PtoP_AllToAll(VecScatter in,VecScatter out)
411: {
412:   VecScatter_MPI_General *in_to   = (VecScatter_MPI_General*)in->todata;
413:   VecScatter_MPI_General *in_from = (VecScatter_MPI_General*)in->fromdata,*out_to,*out_from;
414:   PetscErrorCode         ierr;
415:   PetscInt               ny,bs = in_from->bs;
416:   PetscMPIInt            size;

419:   MPI_Comm_size(((PetscObject)in)->comm,&size);
420:   out->begin     = in->begin;
421:   out->end       = in->end;
422:   out->copy      = in->copy;
423:   out->destroy   = in->destroy;
424:   out->view      = in->view;

426:   /* allocate entire send scatter context */
427:   PetscNewLog(out,VecScatter_MPI_General,&out_to);
428:   PetscNewLog(out,VecScatter_MPI_General,&out_from);

430:   ny                = in_to->starts[in_to->n];
431:   out_to->n         = in_to->n;
432:   out_to->type      = in_to->type;
433:   out_to->sendfirst = in_to->sendfirst;

435:   PetscMalloc(out_to->n*sizeof(MPI_Request),&out_to->requests);
436:   PetscMalloc4(bs*ny,PetscScalar,&out_to->values,ny,PetscInt,&out_to->indices,out_to->n+1,PetscInt,&out_to->starts,out_to->n,PetscMPIInt,&out_to->procs);
437:   PetscMalloc2(PetscMax(in_to->n,in_from->n),MPI_Status,&out_to->sstatus,PetscMax(in_to->n,in_from->n),MPI_Status,&out_to->rstatus);
438:   PetscMemcpy(out_to->indices,in_to->indices,ny*sizeof(PetscInt));
439:   PetscMemcpy(out_to->starts,in_to->starts,(out_to->n+1)*sizeof(PetscInt));
440:   PetscMemcpy(out_to->procs,in_to->procs,(out_to->n)*sizeof(PetscMPIInt));
441: 
442:   out->todata       = (void*)out_to;
443:   out_to->local.n   = in_to->local.n;
444:   out_to->local.nonmatching_computed = PETSC_FALSE;
445:   out_to->local.n_nonmatching        = 0;
446:   out_to->local.slots_nonmatching    = 0;
447:   if (in_to->local.n) {
448:     PetscMalloc(in_to->local.n*sizeof(PetscInt),&out_to->local.vslots);
449:     PetscMalloc(in_from->local.n*sizeof(PetscInt),&out_from->local.vslots);
450:     PetscMemcpy(out_to->local.vslots,in_to->local.vslots,in_to->local.n*sizeof(PetscInt));
451:     PetscMemcpy(out_from->local.vslots,in_from->local.vslots,in_from->local.n*sizeof(PetscInt));
452:   } else {
453:     out_to->local.vslots   = 0;
454:     out_from->local.vslots = 0;
455:   }

457:   /* allocate entire receive context */
458:   out_from->type      = in_from->type;
459:   ny                  = in_from->starts[in_from->n];
460:   out_from->n         = in_from->n;
461:   out_from->sendfirst = in_from->sendfirst;

463:   PetscMalloc(out_from->n*sizeof(MPI_Request),&out_from->requests);
464:   PetscMalloc4(ny*bs,PetscScalar,&out_from->values,ny,PetscInt,&out_from->indices,out_from->n+1,PetscInt,&out_from->starts,out_from->n,PetscMPIInt,&out_from->procs);
465:   PetscMemcpy(out_from->indices,in_from->indices,ny*sizeof(PetscInt));
466:   PetscMemcpy(out_from->starts,in_from->starts,(out_from->n+1)*sizeof(PetscInt));
467:   PetscMemcpy(out_from->procs,in_from->procs,(out_from->n)*sizeof(PetscMPIInt));
468:   out->fromdata       = (void*)out_from;
469:   out_from->local.n   = in_from->local.n;
470:   out_from->local.nonmatching_computed = PETSC_FALSE;
471:   out_from->local.n_nonmatching        = 0;
472:   out_from->local.slots_nonmatching    = 0;

474:   out_to->use_alltoallv = out_from->use_alltoallv = PETSC_TRUE;

476:   PetscMalloc2(size,PetscMPIInt,&out_to->counts,size,PetscMPIInt,&out_to->displs);
477:   PetscMemcpy(out_to->counts,in_to->counts,size*sizeof(PetscMPIInt));
478:   PetscMemcpy(out_to->displs,in_to->displs,size*sizeof(PetscMPIInt));

480:   PetscMalloc2(size,PetscMPIInt,&out_from->counts,size,PetscMPIInt,&out_from->displs);
481:   PetscMemcpy(out_from->counts,in_from->counts,size*sizeof(PetscMPIInt));
482:   PetscMemcpy(out_from->displs,in_from->displs,size*sizeof(PetscMPIInt));
483:   return(0);
484: }
485: /* --------------------------------------------------------------------------------------------------
486:     Packs and unpacks the message data into send or from receive buffers. 

488:     These could be generated automatically. 

490:     Fortran kernels etc. could be used.
491: */
492: PETSC_STATIC_INLINE void Pack_1(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y)
493: {
494:   PetscInt i;
495:   for (i=0; i<n; i++) {
496:     y[i]  = x[indicesx[i]];
497:   }
498: }
499: PETSC_STATIC_INLINE void UnPack_1(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
500: {
501:   PetscInt i;
502:   switch (addv) {
503:   case INSERT_VALUES:
504:     for (i=0; i<n; i++) {
505:       y[indicesy[i]] = x[i];
506:     }
507:     break;
508:   case ADD_VALUES:
509:     for (i=0; i<n; i++) {
510:       y[indicesy[i]] += x[i];
511:     }
512:     break;
513: #if !defined(PETSC_USE_COMPLEX)
514:   case MAX_VALUES:
515:     for (i=0; i<n; i++) {
516:       y[indicesy[i]] = PetscMax(y[indicesy[i]],x[i]);
517:     }
518: #else
519:   case MAX_VALUES:
520: #endif
521:   case NOT_SET_VALUES:
522:     break;
523:   }
524: }

526: PETSC_STATIC_INLINE void Scatter_1(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
527: {
528:   PetscInt i;
529:   switch (addv) {
530:   case INSERT_VALUES:
531:     for (i=0; i<n; i++) {
532:       y[indicesy[i]] = x[indicesx[i]];
533:     }
534:     break;
535:   case ADD_VALUES:
536:     for (i=0; i<n; i++) {
537:       y[indicesy[i]] += x[indicesx[i]];
538:     }
539:     break;
540: #if !defined(PETSC_USE_COMPLEX)
541:   case MAX_VALUES:
542:     for (i=0; i<n; i++) {
543:       y[indicesy[i]] = PetscMax(y[indicesy[i]],x[indicesx[i]]);
544:     }
545: #else
546:   case MAX_VALUES:
547: #endif
548:   case NOT_SET_VALUES:
549:     break;
550:   }
551: }

553:   /* ----------------------------------------------------------------------------------------------- */
554: PETSC_STATIC_INLINE void Pack_2(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y)
555: {
556:   PetscInt i,idx;

558:   for (i=0; i<n; i++) {
559:     idx   = *indicesx++;
560:     y[0]  = x[idx];
561:     y[1]  = x[idx+1];
562:     y    += 2;
563:   }
564: }
565: PETSC_STATIC_INLINE void UnPack_2(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
566: {
567:   PetscInt i,idy;

569:   switch (addv) {
570:   case INSERT_VALUES:
571:     for (i=0; i<n; i++) {
572:       idy       = *indicesy++;
573:       y[idy]    = x[0];
574:       y[idy+1]  = x[1];
575:       x    += 2;
576:     }
577:     break;
578:   case ADD_VALUES:
579:     for (i=0; i<n; i++) {
580:       idy       = *indicesy++;
581:       y[idy]    += x[0];
582:       y[idy+1]  += x[1];
583:       x    += 2;
584:     }
585:     break;
586: #if !defined(PETSC_USE_COMPLEX)
587:   case MAX_VALUES:
588:     for (i=0; i<n; i++) {
589:       idy       = *indicesy++;
590:       y[idy]    = PetscMax(y[idy],x[0]);
591:       y[idy+1]  = PetscMax(y[idy+1],x[1]);
592:       x    += 2;
593:     }
594: #else
595:   case MAX_VALUES:
596: #endif
597:   case NOT_SET_VALUES:
598:     break;
599:   }
600: }

602: PETSC_STATIC_INLINE void Scatter_2(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
603: {
604:   PetscInt i,idx,idy;

606:   switch (addv) {
607:   case INSERT_VALUES:
608:     for (i=0; i<n; i++) {
609:       idx       = *indicesx++;
610:       idy       = *indicesy++;
611:       y[idy]    = x[idx];
612:       y[idy+1]  = x[idx+1];
613:     }
614:     break;
615:   case ADD_VALUES:
616:     for (i=0; i<n; i++) {
617:       idx       = *indicesx++;
618:       idy       = *indicesy++;
619:       y[idy]    += x[idx];
620:       y[idy+1]  += x[idx+1];
621:     }
622:     break;
623: #if !defined(PETSC_USE_COMPLEX)
624:   case MAX_VALUES:
625:     for (i=0; i<n; i++) {
626:       idx       = *indicesx++;
627:       idy       = *indicesy++;
628:       y[idy]    = PetscMax(y[idy],x[idx]);
629:       y[idy+1]  = PetscMax(y[idy+1],x[idx+1]);
630:     }
631: #else
632:   case MAX_VALUES:
633: #endif
634:   case NOT_SET_VALUES:
635:     break;
636:   }
637: }
638:   /* ----------------------------------------------------------------------------------------------- */
639: PETSC_STATIC_INLINE void Pack_3(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y)
640: {
641:   PetscInt i,idx;

643:   for (i=0; i<n; i++) {
644:     idx   = *indicesx++;
645:     y[0]  = x[idx];
646:     y[1]  = x[idx+1];
647:     y[2]  = x[idx+2];
648:     y    += 3;
649:   }
650: }
651: PETSC_STATIC_INLINE void UnPack_3(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
652: {
653:   PetscInt i,idy;

655:   switch (addv) {
656:   case INSERT_VALUES:
657:     for (i=0; i<n; i++) {
658:       idy       = *indicesy++;
659:       y[idy]    = x[0];
660:       y[idy+1]  = x[1];
661:       y[idy+2]  = x[2];
662:       x    += 3;
663:     }
664:     break;
665:   case ADD_VALUES:
666:     for (i=0; i<n; i++) {
667:       idy       = *indicesy++;
668:       y[idy]    += x[0];
669:       y[idy+1]  += x[1];
670:       y[idy+2]  += x[2];
671:       x    += 3;
672:     }
673:     break;
674: #if !defined(PETSC_USE_COMPLEX)
675:   case MAX_VALUES:
676:     for (i=0; i<n; i++) {
677:       idy       = *indicesy++;
678:       y[idy]    = PetscMax(y[idy],x[0]);
679:       y[idy+1]  = PetscMax(y[idy+1],x[1]);
680:       y[idy+2]  = PetscMax(y[idy+2],x[2]);
681:       x    += 3;
682:     }
683: #else
684:   case MAX_VALUES:
685: #endif
686:   case NOT_SET_VALUES:
687:     break;
688:   }
689: }

691: PETSC_STATIC_INLINE void Scatter_3(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
692: {
693:   PetscInt i,idx,idy;

695:   switch (addv) {
696:   case INSERT_VALUES:
697:     for (i=0; i<n; i++) {
698:       idx       = *indicesx++;
699:       idy       = *indicesy++;
700:       y[idy]    = x[idx];
701:       y[idy+1]  = x[idx+1];
702:       y[idy+2]  = x[idx+2];
703:     }
704:     break;
705:   case ADD_VALUES:
706:     for (i=0; i<n; i++) {
707:       idx       = *indicesx++;
708:       idy       = *indicesy++;
709:       y[idy]    += x[idx];
710:       y[idy+1]  += x[idx+1];
711:       y[idy+2]  += x[idx+2];
712:     }
713:     break;
714: #if !defined(PETSC_USE_COMPLEX)
715:   case MAX_VALUES:
716:     for (i=0; i<n; i++) {
717:       idx       = *indicesx++;
718:       idy       = *indicesy++;
719:       y[idy]    = PetscMax(y[idy],x[idx]);
720:       y[idy+1]  = PetscMax(y[idy+1],x[idx+1]);
721:       y[idy+2]  = PetscMax(y[idy+2],x[idx+2]);
722:     }
723: #else
724:   case MAX_VALUES:
725: #endif
726:   case NOT_SET_VALUES:
727:     break;
728:   }
729: }
730:   /* ----------------------------------------------------------------------------------------------- */
731: PETSC_STATIC_INLINE void Pack_4(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y)
732: {
733:   PetscInt i,idx;

735:   for (i=0; i<n; i++) {
736:     idx   = *indicesx++;
737:     y[0]  = x[idx];
738:     y[1]  = x[idx+1];
739:     y[2]  = x[idx+2];
740:     y[3]  = x[idx+3];
741:     y    += 4;
742:   }
743: }
744: PETSC_STATIC_INLINE void UnPack_4(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
745: {
746:   PetscInt i,idy;

748:   switch (addv) {
749:   case INSERT_VALUES:
750:     for (i=0; i<n; i++) {
751:       idy       = *indicesy++;
752:       y[idy]    = x[0];
753:       y[idy+1]  = x[1];
754:       y[idy+2]  = x[2];
755:       y[idy+3]  = x[3];
756:       x    += 4;
757:     }
758:     break;
759:   case ADD_VALUES:
760:     for (i=0; i<n; i++) {
761:       idy       = *indicesy++;
762:       y[idy]    += x[0];
763:       y[idy+1]  += x[1];
764:       y[idy+2]  += x[2];
765:       y[idy+3]  += x[3];
766:       x    += 4;
767:     }
768:     break;
769: #if !defined(PETSC_USE_COMPLEX)
770:   case MAX_VALUES:
771:     for (i=0; i<n; i++) {
772:       idy       = *indicesy++;
773:       y[idy]    = PetscMax(y[idy],x[0]);
774:       y[idy+1]  = PetscMax(y[idy+1],x[1]);
775:       y[idy+2]  = PetscMax(y[idy+2],x[2]);
776:       y[idy+3]  = PetscMax(y[idy+3],x[3]);
777:       x    += 4;
778:     }
779: #else
780:   case MAX_VALUES:
781: #endif
782:   case NOT_SET_VALUES:
783:     break;
784:   }
785: }

787: PETSC_STATIC_INLINE void Scatter_4(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
788: {
789:   PetscInt i,idx,idy;

791:   switch (addv) {
792:   case INSERT_VALUES:
793:     for (i=0; i<n; i++) {
794:       idx       = *indicesx++;
795:       idy       = *indicesy++;
796:       y[idy]    = x[idx];
797:       y[idy+1]  = x[idx+1];
798:       y[idy+2]  = x[idx+2];
799:       y[idy+3]  = x[idx+3];
800:     }
801:     break;
802:   case ADD_VALUES:
803:     for (i=0; i<n; i++) {
804:       idx       = *indicesx++;
805:       idy       = *indicesy++;
806:       y[idy]    += x[idx];
807:       y[idy+1]  += x[idx+1];
808:       y[idy+2]  += x[idx+2];
809:       y[idy+3]  += x[idx+3];
810:     }
811:     break;
812: #if !defined(PETSC_USE_COMPLEX)
813:   case MAX_VALUES:
814:     for (i=0; i<n; i++) {
815:       idx       = *indicesx++;
816:       idy       = *indicesy++;
817:       y[idy]    = PetscMax(y[idy],x[idx]);
818:       y[idy+1]  = PetscMax(y[idy+1],x[idx+1]);
819:       y[idy+2]  = PetscMax(y[idy+2],x[idx+2]);
820:       y[idy+3]  = PetscMax(y[idy+3],x[idx+3]);
821:     }
822: #else
823:   case MAX_VALUES:
824: #endif
825:   case NOT_SET_VALUES:
826:     break;
827:   }
828: }
829:   /* ----------------------------------------------------------------------------------------------- */
830: PETSC_STATIC_INLINE void Pack_5(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y)
831: {
832:   PetscInt i,idx;

834:   for (i=0; i<n; i++) {
835:     idx   = *indicesx++;
836:     y[0]  = x[idx];
837:     y[1]  = x[idx+1];
838:     y[2]  = x[idx+2];
839:     y[3]  = x[idx+3];
840:     y[4]  = x[idx+4];
841:     y    += 5;
842:   }
843: }
844: PETSC_STATIC_INLINE void UnPack_5(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
845: {
846:   PetscInt i,idy;

848:   switch (addv) {
849:   case INSERT_VALUES:
850:     for (i=0; i<n; i++) {
851:       idy       = *indicesy++;
852:       y[idy]    = x[0];
853:       y[idy+1]  = x[1];
854:       y[idy+2]  = x[2];
855:       y[idy+3]  = x[3];
856:       y[idy+4]  = x[4];
857:       x    += 5;
858:     }
859:     break;
860:   case ADD_VALUES:
861:     for (i=0; i<n; i++) {
862:       idy       = *indicesy++;
863:       y[idy]    += x[0];
864:       y[idy+1]  += x[1];
865:       y[idy+2]  += x[2];
866:       y[idy+3]  += x[3];
867:       y[idy+4]  += x[4];
868:       x    += 5;
869:     }
870:     break;
871: #if !defined(PETSC_USE_COMPLEX)
872:   case MAX_VALUES:
873:     for (i=0; i<n; i++) {
874:       idy       = *indicesy++;
875:       y[idy]    = PetscMax(y[idy],x[0]);
876:       y[idy+1]  = PetscMax(y[idy+1],x[1]);
877:       y[idy+2]  = PetscMax(y[idy+2],x[2]);
878:       y[idy+3]  = PetscMax(y[idy+3],x[3]);
879:       y[idy+4]  = PetscMax(y[idy+4],x[4]);
880:       x    += 5;
881:     }
882: #else
883:   case MAX_VALUES:
884: #endif
885:   case NOT_SET_VALUES:
886:     break;
887:   }
888: }

890: PETSC_STATIC_INLINE void Scatter_5(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
891: {
892:   PetscInt i,idx,idy;

894:   switch (addv) {
895:   case INSERT_VALUES:
896:     for (i=0; i<n; i++) {
897:       idx       = *indicesx++;
898:       idy       = *indicesy++;
899:       y[idy]    = x[idx];
900:       y[idy+1]  = x[idx+1];
901:       y[idy+2]  = x[idx+2];
902:       y[idy+3]  = x[idx+3];
903:       y[idy+4]  = x[idx+4];
904:     }
905:     break;
906:   case ADD_VALUES:
907:     for (i=0; i<n; i++) {
908:       idx       = *indicesx++;
909:       idy       = *indicesy++;
910:       y[idy]    += x[idx];
911:       y[idy+1]  += x[idx+1];
912:       y[idy+2]  += x[idx+2];
913:       y[idy+3]  += x[idx+3];
914:       y[idy+4]  += x[idx+4];
915:     }
916:     break;
917: #if !defined(PETSC_USE_COMPLEX)
918:   case MAX_VALUES:
919:     for (i=0; i<n; i++) {
920:       idx       = *indicesx++;
921:       idy       = *indicesy++;
922:       y[idy]    = PetscMax(y[idy],x[idx]);
923:       y[idy+1]  = PetscMax(y[idy+1],x[idx+1]);
924:       y[idy+2]  = PetscMax(y[idy+2],x[idx+2]);
925:       y[idy+3]  = PetscMax(y[idy+3],x[idx+3]);
926:       y[idy+4]  = PetscMax(y[idy+4],x[idx+4]);
927:     }
928: #else
929:   case MAX_VALUES:
930: #endif
931:   case NOT_SET_VALUES:
932:     break;
933:   }
934: }
935:   /* ----------------------------------------------------------------------------------------------- */
936: PETSC_STATIC_INLINE void Pack_6(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y)
937: {
938:   PetscInt i,idx;

940:   for (i=0; i<n; i++) {
941:     idx   = *indicesx++;
942:     y[0]  = x[idx];
943:     y[1]  = x[idx+1];
944:     y[2]  = x[idx+2];
945:     y[3]  = x[idx+3];
946:     y[4]  = x[idx+4];
947:     y[5]  = x[idx+5];
948:     y    += 6;
949:   }
950: }
951: PETSC_STATIC_INLINE void UnPack_6(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
952: {
953:   PetscInt i,idy;

955:   switch (addv) {
956:   case INSERT_VALUES:
957:     for (i=0; i<n; i++) {
958:       idy       = *indicesy++;
959:       y[idy]    = x[0];
960:       y[idy+1]  = x[1];
961:       y[idy+2]  = x[2];
962:       y[idy+3]  = x[3];
963:       y[idy+4]  = x[4];
964:       y[idy+5]  = x[5];
965:       x    += 6;
966:     }
967:     break;
968:   case ADD_VALUES:
969:     for (i=0; i<n; i++) {
970:       idy       = *indicesy++;
971:       y[idy]    += x[0];
972:       y[idy+1]  += x[1];
973:       y[idy+2]  += x[2];
974:       y[idy+3]  += x[3];
975:       y[idy+4]  += x[4];
976:       y[idy+5]  += x[5];
977:       x    += 6;
978:     }
979:     break;
980: #if !defined(PETSC_USE_COMPLEX)
981:   case MAX_VALUES:
982:     for (i=0; i<n; i++) {
983:       idy       = *indicesy++;
984:       y[idy]    = PetscMax(y[idy],x[0]);
985:       y[idy+1]  = PetscMax(y[idy+1],x[1]);
986:       y[idy+2]  = PetscMax(y[idy+2],x[2]);
987:       y[idy+3]  = PetscMax(y[idy+3],x[3]);
988:       y[idy+4]  = PetscMax(y[idy+4],x[4]);
989:       y[idy+5]  = PetscMax(y[idy+5],x[5]);
990:       x    += 6;
991:     }
992: #else
993:   case MAX_VALUES:
994: #endif
995:   case NOT_SET_VALUES:
996:     break;
997:   }
998: }

1000: PETSC_STATIC_INLINE void Scatter_6(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
1001: {
1002:   PetscInt i,idx,idy;

1004:   switch (addv) {
1005:   case INSERT_VALUES:
1006:     for (i=0; i<n; i++) {
1007:       idx       = *indicesx++;
1008:       idy       = *indicesy++;
1009:       y[idy]    = x[idx];
1010:       y[idy+1]  = x[idx+1];
1011:       y[idy+2]  = x[idx+2];
1012:       y[idy+3]  = x[idx+3];
1013:       y[idy+4]  = x[idx+4];
1014:       y[idy+5]  = x[idx+5];
1015:     }
1016:     break;
1017:   case ADD_VALUES:
1018:     for (i=0; i<n; i++) {
1019:       idx       = *indicesx++;
1020:       idy       = *indicesy++;
1021:       y[idy]    += x[idx];
1022:       y[idy+1]  += x[idx+1];
1023:       y[idy+2]  += x[idx+2];
1024:       y[idy+3]  += x[idx+3];
1025:       y[idy+4]  += x[idx+4];
1026:       y[idy+5]  += x[idx+5];
1027:     }
1028:     break;
1029: #if !defined(PETSC_USE_COMPLEX)
1030:   case MAX_VALUES:
1031:     for (i=0; i<n; i++) {
1032:       idx       = *indicesx++;
1033:       idy       = *indicesy++;
1034:       y[idy]    = PetscMax(y[idy],x[idx]);
1035:       y[idy+1]  = PetscMax(y[idy+1],x[idx+1]);
1036:       y[idy+2]  = PetscMax(y[idy+2],x[idx+2]);
1037:       y[idy+3]  = PetscMax(y[idy+3],x[idx+3]);
1038:       y[idy+4]  = PetscMax(y[idy+4],x[idx+4]);
1039:       y[idy+5]  = PetscMax(y[idy+5],x[idx+5]);
1040:     }
1041: #else
1042:   case MAX_VALUES:
1043: #endif
1044:   case NOT_SET_VALUES:
1045:     break;
1046:   }
1047: }
1048:   /* ----------------------------------------------------------------------------------------------- */
1049: PETSC_STATIC_INLINE void Pack_7(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y)
1050: {
1051:   PetscInt i,idx;

1053:   for (i=0; i<n; i++) {
1054:     idx   = *indicesx++;
1055:     y[0]  = x[idx];
1056:     y[1]  = x[idx+1];
1057:     y[2]  = x[idx+2];
1058:     y[3]  = x[idx+3];
1059:     y[4]  = x[idx+4];
1060:     y[5]  = x[idx+5];
1061:     y[6]  = x[idx+6];
1062:     y    += 7;
1063:   }
1064: }
1065: PETSC_STATIC_INLINE void UnPack_7(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
1066: {
1067:   PetscInt i,idy;

1069:   switch (addv) {
1070:   case INSERT_VALUES:
1071:     for (i=0; i<n; i++) {
1072:       idy       = *indicesy++;
1073:       y[idy]    = x[0];
1074:       y[idy+1]  = x[1];
1075:       y[idy+2]  = x[2];
1076:       y[idy+3]  = x[3];
1077:       y[idy+4]  = x[4];
1078:       y[idy+5]  = x[5];
1079:       y[idy+6]  = x[6];
1080:       x    += 7;
1081:     }
1082:     break;
1083:   case ADD_VALUES:
1084:     for (i=0; i<n; i++) {
1085:       idy       = *indicesy++;
1086:       y[idy]    += x[0];
1087:       y[idy+1]  += x[1];
1088:       y[idy+2]  += x[2];
1089:       y[idy+3]  += x[3];
1090:       y[idy+4]  += x[4];
1091:       y[idy+5]  += x[5];
1092:       y[idy+6]  += x[6];
1093:       x    += 7;
1094:     }
1095:     break;
1096: #if !defined(PETSC_USE_COMPLEX)
1097:   case MAX_VALUES:
1098:     for (i=0; i<n; i++) {
1099:       idy       = *indicesy++;
1100:       y[idy]    = PetscMax(y[idy],x[0]);
1101:       y[idy+1]  = PetscMax(y[idy+1],x[1]);
1102:       y[idy+2]  = PetscMax(y[idy+2],x[2]);
1103:       y[idy+3]  = PetscMax(y[idy+3],x[3]);
1104:       y[idy+4]  = PetscMax(y[idy+4],x[4]);
1105:       y[idy+5]  = PetscMax(y[idy+5],x[5]);
1106:       y[idy+6]  = PetscMax(y[idy+6],x[6]);
1107:       x    += 7;
1108:     }
1109: #else
1110:   case MAX_VALUES:
1111: #endif
1112:   case NOT_SET_VALUES:
1113:     break;
1114:   }
1115: }

1117: PETSC_STATIC_INLINE void Scatter_7(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
1118: {
1119:   PetscInt i,idx,idy;

1121:   switch (addv) {
1122:   case INSERT_VALUES:
1123:     for (i=0; i<n; i++) {
1124:       idx       = *indicesx++;
1125:       idy       = *indicesy++;
1126:       y[idy]    = x[idx];
1127:       y[idy+1]  = x[idx+1];
1128:       y[idy+2]  = x[idx+2];
1129:       y[idy+3]  = x[idx+3];
1130:       y[idy+4]  = x[idx+4];
1131:       y[idy+5]  = x[idx+5];
1132:       y[idy+6]  = x[idx+6];
1133:     }
1134:     break;
1135:   case ADD_VALUES:
1136:     for (i=0; i<n; i++) {
1137:       idx       = *indicesx++;
1138:       idy       = *indicesy++;
1139:       y[idy]    += x[idx];
1140:       y[idy+1]  += x[idx+1];
1141:       y[idy+2]  += x[idx+2];
1142:       y[idy+3]  += x[idx+3];
1143:       y[idy+4]  += x[idx+4];
1144:       y[idy+5]  += x[idx+5];
1145:       y[idy+6]  += x[idx+6];
1146:     }
1147:     break;
1148: #if !defined(PETSC_USE_COMPLEX)
1149:   case MAX_VALUES:
1150:     for (i=0; i<n; i++) {
1151:       idx       = *indicesx++;
1152:       idy       = *indicesy++;
1153:       y[idy]    = PetscMax(y[idy],x[idx]);
1154:       y[idy+1]  = PetscMax(y[idy+1],x[idx+1]);
1155:       y[idy+2]  = PetscMax(y[idy+2],x[idx+2]);
1156:       y[idy+3]  = PetscMax(y[idy+3],x[idx+3]);
1157:       y[idy+4]  = PetscMax(y[idy+4],x[idx+4]);
1158:       y[idy+5]  = PetscMax(y[idy+5],x[idx+5]);
1159:       y[idy+6]  = PetscMax(y[idy+6],x[idx+6]);
1160:     }
1161: #else
1162:   case MAX_VALUES:
1163: #endif
1164:   case NOT_SET_VALUES:
1165:     break;
1166:   }
1167: }
1168:   /* ----------------------------------------------------------------------------------------------- */
1169: PETSC_STATIC_INLINE void Pack_8(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y)
1170: {
1171:   PetscInt i,idx;

1173:   for (i=0; i<n; i++) {
1174:     idx   = *indicesx++;
1175:     y[0]  = x[idx];
1176:     y[1]  = x[idx+1];
1177:     y[2]  = x[idx+2];
1178:     y[3]  = x[idx+3];
1179:     y[4]  = x[idx+4];
1180:     y[5]  = x[idx+5];
1181:     y[6]  = x[idx+6];
1182:     y[7]  = x[idx+7];
1183:     y    += 8;
1184:   }
1185: }
1186: PETSC_STATIC_INLINE void UnPack_8(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
1187: {
1188:   PetscInt i,idy;

1190:   switch (addv) {
1191:   case INSERT_VALUES:
1192:     for (i=0; i<n; i++) {
1193:       idy       = *indicesy++;
1194:       y[idy]    = x[0];
1195:       y[idy+1]  = x[1];
1196:       y[idy+2]  = x[2];
1197:       y[idy+3]  = x[3];
1198:       y[idy+4]  = x[4];
1199:       y[idy+5]  = x[5];
1200:       y[idy+6]  = x[6];
1201:       y[idy+7]  = x[7];
1202:       x    += 8;
1203:     }
1204:     break;
1205:   case ADD_VALUES:
1206:     for (i=0; i<n; i++) {
1207:       idy       = *indicesy++;
1208:       y[idy]    += x[0];
1209:       y[idy+1]  += x[1];
1210:       y[idy+2]  += x[2];
1211:       y[idy+3]  += x[3];
1212:       y[idy+4]  += x[4];
1213:       y[idy+5]  += x[5];
1214:       y[idy+6]  += x[6];
1215:       y[idy+7]  += x[7];
1216:       x    += 8;
1217:     }
1218:     break;
1219: #if !defined(PETSC_USE_COMPLEX)
1220:   case MAX_VALUES:
1221:     for (i=0; i<n; i++) {
1222:       idy       = *indicesy++;
1223:       y[idy]    = PetscMax(y[idy],x[0]);
1224:       y[idy+1]  = PetscMax(y[idy+1],x[1]);
1225:       y[idy+2]  = PetscMax(y[idy+2],x[2]);
1226:       y[idy+3]  = PetscMax(y[idy+3],x[3]);
1227:       y[idy+4]  = PetscMax(y[idy+4],x[4]);
1228:       y[idy+5]  = PetscMax(y[idy+5],x[5]);
1229:       y[idy+6]  = PetscMax(y[idy+6],x[6]);
1230:       y[idy+7]  = PetscMax(y[idy+7],x[7]);
1231:       x    += 8;
1232:     }
1233: #else
1234:   case MAX_VALUES:
1235: #endif
1236:   case NOT_SET_VALUES:
1237:     break;
1238:   }
1239: }

1241: PETSC_STATIC_INLINE void Scatter_8(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
1242: {
1243:   PetscInt i,idx,idy;

1245:   switch (addv) {
1246:   case INSERT_VALUES:
1247:     for (i=0; i<n; i++) {
1248:       idx       = *indicesx++;
1249:       idy       = *indicesy++;
1250:       y[idy]    = x[idx];
1251:       y[idy+1]  = x[idx+1];
1252:       y[idy+2]  = x[idx+2];
1253:       y[idy+3]  = x[idx+3];
1254:       y[idy+4]  = x[idx+4];
1255:       y[idy+5]  = x[idx+5];
1256:       y[idy+6]  = x[idx+6];
1257:       y[idy+7]  = x[idx+7];
1258:     }
1259:     break;
1260:   case ADD_VALUES:
1261:     for (i=0; i<n; i++) {
1262:       idx       = *indicesx++;
1263:       idy       = *indicesy++;
1264:       y[idy]    += x[idx];
1265:       y[idy+1]  += x[idx+1];
1266:       y[idy+2]  += x[idx+2];
1267:       y[idy+3]  += x[idx+3];
1268:       y[idy+4]  += x[idx+4];
1269:       y[idy+5]  += x[idx+5];
1270:       y[idy+6]  += x[idx+6];
1271:       y[idy+7]  += x[idx+7];
1272:     }
1273:     break;
1274: #if !defined(PETSC_USE_COMPLEX)
1275:   case MAX_VALUES:
1276:     for (i=0; i<n; i++) {
1277:       idx       = *indicesx++;
1278:       idy       = *indicesy++;
1279:       y[idy]    = PetscMax(y[idy],x[idx]);
1280:       y[idy+1]  = PetscMax(y[idy+1],x[idx+1]);
1281:       y[idy+2]  = PetscMax(y[idy+2],x[idx+2]);
1282:       y[idy+3]  = PetscMax(y[idy+3],x[idx+3]);
1283:       y[idy+4]  = PetscMax(y[idy+4],x[idx+4]);
1284:       y[idy+5]  = PetscMax(y[idy+5],x[idx+5]);
1285:       y[idy+6]  = PetscMax(y[idy+6],x[idx+6]);
1286:       y[idy+7]  = PetscMax(y[idy+7],x[idx+7]);
1287:     }
1288: #else
1289:   case MAX_VALUES:
1290: #endif
1291:   case NOT_SET_VALUES:
1292:     break;
1293:   }
1294: }

1296:   /* ----------------------------------------------------------------------------------------------- */
1297: PETSC_STATIC_INLINE void Pack_12(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y)
1298: {
1299:   PetscInt i,idx;

1301:   for (i=0; i<n; i++) {
1302:     idx   = *indicesx++;
1303:     y[0]  = x[idx];
1304:     y[1]  = x[idx+1];
1305:     y[2]  = x[idx+2];
1306:     y[3]  = x[idx+3];
1307:     y[4]  = x[idx+4];
1308:     y[5]  = x[idx+5];
1309:     y[6]  = x[idx+6];
1310:     y[7]  = x[idx+7];
1311:     y[8]  = x[idx+8];
1312:     y[9]  = x[idx+9];
1313:     y[10] = x[idx+10];
1314:     y[11] = x[idx+11];
1315:     y    += 12;
1316:   }
1317: }
1318: PETSC_STATIC_INLINE void UnPack_12(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
1319: {
1320:   PetscInt i,idy;

1322:   switch (addv) {
1323:   case INSERT_VALUES:
1324:     for (i=0; i<n; i++) {
1325:       idy       = *indicesy++;
1326:       y[idy]    = x[0];
1327:       y[idy+1]  = x[1];
1328:       y[idy+2]  = x[2];
1329:       y[idy+3]  = x[3];
1330:       y[idy+4]  = x[4];
1331:       y[idy+5]  = x[5];
1332:       y[idy+6]  = x[6];
1333:       y[idy+7]  = x[7];
1334:       y[idy+8]  = x[8];
1335:       y[idy+9]  = x[9];
1336:       y[idy+10] = x[10];
1337:       y[idy+11] = x[11];
1338:       x    += 12;
1339:     }
1340:     break;
1341:   case ADD_VALUES:
1342:     for (i=0; i<n; i++) {
1343:       idy       = *indicesy++;
1344:       y[idy]    += x[0];
1345:       y[idy+1]  += x[1];
1346:       y[idy+2]  += x[2];
1347:       y[idy+3]  += x[3];
1348:       y[idy+4]  += x[4];
1349:       y[idy+5]  += x[5];
1350:       y[idy+6]  += x[6];
1351:       y[idy+7]  += x[7];
1352:       y[idy+8]  += x[8];
1353:       y[idy+9]  += x[9];
1354:       y[idy+10] += x[10];
1355:       y[idy+11] += x[11];
1356:       x    += 12;
1357:     }
1358:     break;
1359: #if !defined(PETSC_USE_COMPLEX)
1360:   case MAX_VALUES:
1361:     for (i=0; i<n; i++) {
1362:       idy       = *indicesy++;
1363:       y[idy]    = PetscMax(y[idy],x[0]);
1364:       y[idy+1]  = PetscMax(y[idy+1],x[1]);
1365:       y[idy+2]  = PetscMax(y[idy+2],x[2]);
1366:       y[idy+3]  = PetscMax(y[idy+3],x[3]);
1367:       y[idy+4]  = PetscMax(y[idy+4],x[4]);
1368:       y[idy+5]  = PetscMax(y[idy+5],x[5]);
1369:       y[idy+6]  = PetscMax(y[idy+6],x[6]);
1370:       y[idy+7]  = PetscMax(y[idy+7],x[7]);
1371:       y[idy+8]  = PetscMax(y[idy+8],x[8]);
1372:       y[idy+9]  = PetscMax(y[idy+9],x[9]);
1373:       y[idy+10] = PetscMax(y[idy+10],x[10]);
1374:       y[idy+11] = PetscMax(y[idy+11],x[11]);
1375:       x    += 12;
1376:     }
1377: #else
1378:   case MAX_VALUES:
1379: #endif
1380:   case NOT_SET_VALUES:
1381:     break;
1382:   }
1383: }

1385: PETSC_STATIC_INLINE void Scatter_12(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
1386: {
1387:   PetscInt i,idx,idy;

1389:   switch (addv) {
1390:   case INSERT_VALUES:
1391:     for (i=0; i<n; i++) {
1392:       idx       = *indicesx++;
1393:       idy       = *indicesy++;
1394:       y[idy]    = x[idx];
1395:       y[idy+1]  = x[idx+1];
1396:       y[idy+2]  = x[idx+2];
1397:       y[idy+3]  = x[idx+3];
1398:       y[idy+4]  = x[idx+4];
1399:       y[idy+5]  = x[idx+5];
1400:       y[idy+6]  = x[idx+6];
1401:       y[idy+7]  = x[idx+7];
1402:       y[idy+8]  = x[idx+8];
1403:       y[idy+9]  = x[idx+9];
1404:       y[idy+10] = x[idx+10];
1405:       y[idy+11] = x[idx+11];
1406:     }
1407:     break;
1408:   case ADD_VALUES:
1409:     for (i=0; i<n; i++) {
1410:       idx       = *indicesx++;
1411:       idy       = *indicesy++;
1412:       y[idy]    += x[idx];
1413:       y[idy+1]  += x[idx+1];
1414:       y[idy+2]  += x[idx+2];
1415:       y[idy+3]  += x[idx+3];
1416:       y[idy+4]  += x[idx+4];
1417:       y[idy+5]  += x[idx+5];
1418:       y[idy+6]  += x[idx+6];
1419:       y[idy+7]  += x[idx+7];
1420:       y[idy+8]  += x[idx+8];
1421:       y[idy+9]  += x[idx+9];
1422:       y[idy+10] += x[idx+10];
1423:       y[idy+11] += x[idx+11];
1424:     }
1425:     break;
1426: #if !defined(PETSC_USE_COMPLEX)
1427:   case MAX_VALUES:
1428:     for (i=0; i<n; i++) {
1429:       idx       = *indicesx++;
1430:       idy       = *indicesy++;
1431:       y[idy]    = PetscMax(y[idy],x[idx]);
1432:       y[idy+1]  = PetscMax(y[idy+1],x[idx+1]);
1433:       y[idy+2]  = PetscMax(y[idy+2],x[idx+2]);
1434:       y[idy+3]  = PetscMax(y[idy+3],x[idx+3]);
1435:       y[idy+4]  = PetscMax(y[idy+4],x[idx+4]);
1436:       y[idy+5]  = PetscMax(y[idy+5],x[idx+5]);
1437:       y[idy+6]  = PetscMax(y[idy+6],x[idx+6]);
1438:       y[idy+7]  = PetscMax(y[idy+7],x[idx+7]);
1439:       y[idy+8]  = PetscMax(y[idy+8],x[idx+8]);
1440:       y[idy+9]  = PetscMax(y[idy+9],x[idx+9]);
1441:       y[idy+10] = PetscMax(y[idy+10],x[idx+10]);
1442:       y[idy+11] = PetscMax(y[idy+11],x[idx+11]);
1443:     }
1444: #else
1445:   case MAX_VALUES:
1446: #endif
1447:   case NOT_SET_VALUES:
1448:     break;
1449:   }
1450: }

1452: /* Create the VecScatterBegin/End_P for our chosen block sizes */
1453: #define BS 1
1454:  #include ../src/vec/vec/utils/vpscat.h
1455: #define BS 2
1456:  #include ../src/vec/vec/utils/vpscat.h
1457: #define BS 3
1458:  #include ../src/vec/vec/utils/vpscat.h
1459: #define BS 4
1460:  #include ../src/vec/vec/utils/vpscat.h
1461: #define BS 5
1462:  #include ../src/vec/vec/utils/vpscat.h
1463: #define BS 6
1464:  #include ../src/vec/vec/utils/vpscat.h
1465: #define BS 7
1466:  #include ../src/vec/vec/utils/vpscat.h
1467: #define BS 8
1468:  #include ../src/vec/vec/utils/vpscat.h
1469: #define BS 12
1470:  #include ../src/vec/vec/utils/vpscat.h

1472: /* ==========================================================================================*/

1474: /*              create parallel to sequential scatter context                           */

1476: PetscErrorCode VecScatterCreateCommon_PtoS(VecScatter_MPI_General *,VecScatter_MPI_General *,VecScatter);

1480: /*@
1481:      VecScatterCreateLocal - Creates a VecScatter from a list of messages it must send and receive.

1483:      Collective on VecScatter

1485:    Input Parameters:
1486: +     VecScatter - obtained with VecScatterCreateEmpty()
1487: .     nsends -
1488: .     sendSizes -
1489: .     sendProcs -
1490: .     sendIdx - indices where the sent entries are obtained from (in local, on process numbering), this is one long array of size \sum_{i=0,i<nsends} sendSizes[i]
1491: .     nrecvs - number of receives to expect
1492: .     recvSizes - 
1493: .     recvProcs - processes who are sending to me
1494: .     recvIdx - indices of where received entries are to be put, (in local, on process numbering), this is one long array of size \sum_{i=0,i<nrecvs} recvSizes[i]
1495: -     bs - size of block

1497:      Notes:  sendSizes[] and recvSizes[] cannot have any 0 entries. If you want to support having 0 entries you need
1498:       to change the code below to "compress out" the sendProcs[] and recvProcs[] entries that have 0 entries.

1500:        Probably does not handle sends to self properly. It should remove those from the counts that are used
1501:       in allocating space inside of the from struct

1503:   Level: intermediate

1505: @*/
1506: PetscErrorCode VecScatterCreateLocal(VecScatter ctx,PetscInt nsends,const PetscInt sendSizes[],const PetscInt sendProcs[],const PetscInt sendIdx[],PetscInt nrecvs,const PetscInt recvSizes[],const PetscInt recvProcs[],const PetscInt recvIdx[],PetscInt bs)
1507: {
1508:   VecScatter_MPI_General *from, *to;
1509:   PetscInt               sendSize, recvSize;
1510:   PetscInt               n, i;
1511:   PetscErrorCode         ierr;

1513:   /* allocate entire send scatter context */
1514:   PetscNewLog(ctx,VecScatter_MPI_General,&to);
1515:   to->n = nsends;
1516:   for(n = 0, sendSize = 0; n < to->n; n++) {sendSize += sendSizes[n];}
1517:   PetscMalloc(to->n*sizeof(MPI_Request),&to->requests);
1518:   PetscMalloc4(bs*sendSize,PetscScalar,&to->values,sendSize,PetscInt,&to->indices,to->n+1,PetscInt,&to->starts,to->n,PetscMPIInt,&to->procs);
1519:   PetscMalloc2(PetscMax(to->n,nrecvs),MPI_Status,&to->sstatus,PetscMax(to->n,nrecvs),MPI_Status,&to->rstatus);
1520:   to->starts[0] = 0;
1521:   for(n = 0; n < to->n; n++) {
1522:     if (sendSizes[n] <=0 ) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"sendSizes[n=%D] = %D cannot be less than 1",n,sendSizes[n]);
1523:     to->starts[n+1] = to->starts[n] + sendSizes[n];
1524:     to->procs[n] = sendProcs[n];
1525:     for(i = to->starts[n]; i < to->starts[n]+sendSizes[n]; i++) {
1526:       to->indices[i] = sendIdx[i];
1527:     }
1528:   }
1529:   ctx->todata = (void *) to;

1531:   /* allocate entire receive scatter context */
1532:   PetscNewLog(ctx,VecScatter_MPI_General,&from);
1533:   from->n = nrecvs;
1534:   for(n = 0, recvSize = 0; n < from->n; n++) {recvSize += recvSizes[n];}
1535:   PetscMalloc(from->n*sizeof(MPI_Request),&from->requests);
1536:   PetscMalloc4(bs*recvSize,PetscScalar,&from->values,recvSize,PetscInt,&from->indices,from->n+1,PetscInt,&from->starts,from->n,PetscMPIInt,&from->procs);
1537:   from->starts[0] = 0;
1538:   for(n = 0; n < from->n; n++) {
1539:     if (recvSizes[n] <=0 ) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"recvSizes[n=%D] = %D cannot be less than 1",n,recvSizes[n]);
1540:     from->starts[n+1] = from->starts[n] + recvSizes[n];
1541:     from->procs[n] = recvProcs[n];
1542:     for(i = from->starts[n]; i < from->starts[n]+recvSizes[n]; i++) {
1543:       from->indices[i] = recvIdx[i];
1544:     }
1545:   }
1546:   ctx->fromdata = (void *)from;

1548:   /* No local scatter optimization */
1549:   from->local.n      = 0;
1550:   from->local.vslots = 0;
1551:   to->local.n        = 0;
1552:   to->local.vslots   = 0;
1553:   from->local.nonmatching_computed = PETSC_FALSE;
1554:   from->local.n_nonmatching        = 0;
1555:   from->local.slots_nonmatching    = 0;
1556:   to->local.nonmatching_computed   = PETSC_FALSE;
1557:   to->local.n_nonmatching          = 0;
1558:   to->local.slots_nonmatching      = 0;

1560:   from->type = VEC_SCATTER_MPI_GENERAL;
1561:   to->type   = VEC_SCATTER_MPI_GENERAL;
1562:   from->bs = bs;
1563:   to->bs   = bs;
1564:   VecScatterCreateCommon_PtoS(from, to, ctx);
1565:   return(0);
1566: }

1568: /*
1569:    bs indicates how many elements there are in each block. Normally this would be 1.

1571:    contains check that PetscMPIInt can handle the sizes needed 
1572: */
1575: PetscErrorCode VecScatterCreate_PtoS(PetscInt nx,const PetscInt *inidx,PetscInt ny,const PetscInt *inidy,Vec xin,Vec yin,PetscInt bs,VecScatter ctx)
1576: {
1577:   VecScatter_MPI_General *from,*to;
1578:   PetscMPIInt            size,rank,imdex,tag,n;
1579:   PetscInt               *source = PETSC_NULL,*owners = PETSC_NULL;
1580:   PetscInt               *lowner = PETSC_NULL,*start = PETSC_NULL,lengthy,lengthx;
1581:   PetscMPIInt            *nprocs = PETSC_NULL,nrecvs;
1582:   PetscInt               i,j,idx,nsends;
1583:   PetscInt               *owner = PETSC_NULL,*starts = PETSC_NULL,count,slen;
1584:   PetscInt               *rvalues,*svalues,base,*values,nprocslocal,recvtotal,*rsvalues;
1585:   PetscMPIInt            *onodes1,*olengths1;
1586:   MPI_Comm               comm;
1587:   MPI_Request            *send_waits = PETSC_NULL,*recv_waits = PETSC_NULL;
1588:   MPI_Status             recv_status,*send_status;
1589:   PetscErrorCode         ierr;

1592:   PetscObjectGetNewTag((PetscObject)ctx,&tag);
1593:   PetscObjectGetComm((PetscObject)xin,&comm);
1594:   MPI_Comm_rank(comm,&rank);
1595:   MPI_Comm_size(comm,&size);
1596:   owners = xin->map->range;
1597:   VecGetSize(yin,&lengthy);
1598:   VecGetSize(xin,&lengthx);

1600:   /*  first count number of contributors to each processor */
1601:   PetscMalloc2(size,PetscMPIInt,&nprocs,nx,PetscInt,&owner);
1602:   PetscMemzero(nprocs,size*sizeof(PetscMPIInt));
1603:   j      = 0;
1604:   nsends = 0;
1605:   for (i=0; i<nx; i++) {
1606:     idx = inidx[i];
1607:     if (idx < owners[j]) j = 0;
1608:     for (; j<size; j++) {
1609:       if (idx < owners[j+1]) {
1610:         if (!nprocs[j]++) nsends++;
1611:         owner[i] = j;
1612:         break;
1613:       }
1614:     }
1615:   }
1616:   nprocslocal  = nprocs[rank];
1617:   nprocs[rank] = 0;
1618:   if (nprocslocal) nsends--;
1619:   /* inform other processors of number of messages and max length*/
1620:   PetscGatherNumberOfMessages(comm,PETSC_NULL,nprocs,&nrecvs);
1621:   PetscGatherMessageLengths(comm,nsends,nrecvs,nprocs,&onodes1,&olengths1);
1622:   PetscSortMPIIntWithArray(nrecvs,onodes1,olengths1);
1623:   recvtotal = 0; for (i=0; i<nrecvs; i++) recvtotal += olengths1[i];

1625:   /* post receives:   */
1626:   PetscMalloc3(recvtotal,PetscInt,&rvalues,nrecvs,PetscInt,&source,nrecvs,MPI_Request,&recv_waits);
1627:   count  = 0;
1628:   for (i=0; i<nrecvs; i++) {
1629:     MPI_Irecv((rvalues+count),olengths1[i],MPIU_INT,onodes1[i],tag,comm,recv_waits+i);
1630:     count += olengths1[i];
1631:   }

1633:   /* do sends:
1634:      1) starts[i] gives the starting index in svalues for stuff going to 
1635:      the ith processor
1636:   */
1637:   PetscMalloc3(nx,PetscInt,&svalues,nsends,MPI_Request,&send_waits,size+1,PetscInt,&starts);
1638:   starts[0]  = 0;
1639:   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
1640:   for (i=0; i<nx; i++) {
1641:     if (owner[i] != rank) {
1642:       svalues[starts[owner[i]]++] = inidx[i];
1643:     }
1644:   }

1646:   starts[0] = 0;
1647:   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
1648:   count = 0;
1649:   for (i=0; i<size; i++) {
1650:     if (nprocs[i]) {
1651:       MPI_Isend(svalues+starts[i],nprocs[i],MPIU_INT,i,tag,comm,send_waits+count++);
1652:     }
1653:   }

1655:   /*  wait on receives */
1656:   count  = nrecvs;
1657:   slen   = 0;
1658:   while (count) {
1659:     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
1660:     /* unpack receives into our local space */
1661:     MPI_Get_count(&recv_status,MPIU_INT,&n);
1662:     slen += n;
1663:     count--;
1664:   }

1666:   if (slen != recvtotal) SETERRQ2(PETSC_ERR_PLIB,"Total message lengths %D not expected %D",slen,recvtotal);
1667: 
1668:   /* allocate entire send scatter context */
1669:   PetscNewLog(ctx,VecScatter_MPI_General,&to);
1670:   to->n = nrecvs;
1671:   PetscMalloc(nrecvs*sizeof(MPI_Request),&to->requests);
1672:   PetscMalloc4(bs*slen,PetscScalar,&to->values,slen,PetscInt,&to->indices,nrecvs+1,PetscInt,&to->starts,nrecvs,PetscMPIInt,&to->procs);
1673:   PetscMalloc2(PetscMax(to->n,nsends),MPI_Status,&to->sstatus,PetscMax(to->n,nsends),MPI_Status,&to->rstatus);
1674:   ctx->todata   = (void*)to;
1675:   to->starts[0] = 0;

1677:   if (nrecvs) {

1679:     /* move the data into the send scatter */
1680:     base     = owners[rank];
1681:     rsvalues = rvalues;
1682:     for (i=0; i<nrecvs; i++) {
1683:       to->starts[i+1] = to->starts[i] + olengths1[i];
1684:       to->procs[i]    = onodes1[i];
1685:       values = rsvalues;
1686:       rsvalues += olengths1[i];
1687:       for (j=0; j<olengths1[i]; j++) {
1688:         to->indices[to->starts[i] + j] = values[j] - base;
1689:       }
1690:     }
1691:   }
1692:   PetscFree(olengths1);
1693:   PetscFree(onodes1);
1694:   PetscFree3(rvalues,source,recv_waits);

1696:   /* allocate entire receive scatter context */
1697:   PetscNewLog(ctx,VecScatter_MPI_General,&from);
1698:   from->n = nsends;

1700:   PetscMalloc(nsends*sizeof(MPI_Request),&from->requests);
1701:   PetscMalloc4((ny-nprocslocal)*bs,PetscScalar,&from->values,ny-nprocslocal,PetscInt,&from->indices,nsends+1,PetscInt,&from->starts,from->n,PetscMPIInt,&from->procs);
1702:   ctx->fromdata  = (void*)from;

1704:   /* move data into receive scatter */
1705:   PetscMalloc2(size,PetscInt,&lowner,nsends+1,PetscInt,&start);
1706:   count = 0; from->starts[0] = start[0] = 0;
1707:   for (i=0; i<size; i++) {
1708:     if (nprocs[i]) {
1709:       lowner[i]            = count;
1710:       from->procs[count++] = i;
1711:       from->starts[count]  = start[count] = start[count-1] + nprocs[i];
1712:     }
1713:   }

1715:   for (i=0; i<nx; i++) {
1716:     if (owner[i] != rank) {
1717:       from->indices[start[lowner[owner[i]]]++] = inidy[i];
1718:       if (inidy[i] >= lengthy) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Scattering past end of TO vector");
1719:     }
1720:   }
1721:   PetscFree2(lowner,start);
1722:   PetscFree2(nprocs,owner);
1723: 
1724:   /* wait on sends */
1725:   if (nsends) {
1726:     PetscMalloc(nsends*sizeof(MPI_Status),&send_status);
1727:     MPI_Waitall(nsends,send_waits,send_status);
1728:     PetscFree(send_status);
1729:   }
1730:   PetscFree3(svalues,send_waits,starts);

1732:   if (nprocslocal) {
1733:     PetscInt nt = from->local.n = to->local.n = nprocslocal;
1734:     /* we have a scatter to ourselves */
1735:     PetscMalloc(nt*sizeof(PetscInt),&to->local.vslots);
1736:     PetscMalloc(nt*sizeof(PetscInt),&from->local.vslots);
1737:     nt   = 0;
1738:     for (i=0; i<nx; i++) {
1739:       idx = inidx[i];
1740:       if (idx >= owners[rank] && idx < owners[rank+1]) {
1741:         to->local.vslots[nt]     = idx - owners[rank];
1742:         from->local.vslots[nt++] = inidy[i];
1743:         if (inidy[i] >= lengthy) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Scattering past end of TO vector");
1744:       }
1745:     }
1746:   } else {
1747:     from->local.n      = 0;
1748:     from->local.vslots = 0;
1749:     to->local.n        = 0;
1750:     to->local.vslots   = 0;
1751:   }

1753:   from->local.nonmatching_computed = PETSC_FALSE;
1754:   from->local.n_nonmatching        = 0;
1755:   from->local.slots_nonmatching    = 0;
1756:   to->local.nonmatching_computed   = PETSC_FALSE;
1757:   to->local.n_nonmatching          = 0;
1758:   to->local.slots_nonmatching      = 0;

1760:   from->type = VEC_SCATTER_MPI_GENERAL;
1761:   to->type   = VEC_SCATTER_MPI_GENERAL;
1762:   from->bs = bs;
1763:   to->bs   = bs;
1764:   VecScatterCreateCommon_PtoS(from,to,ctx);
1765:   return(0);
1766: }

1768: /*
1769:    bs indicates how many elements there are in each block. Normally this would be 1.
1770: */
1773: PetscErrorCode VecScatterCreateCommon_PtoS(VecScatter_MPI_General *from,VecScatter_MPI_General *to,VecScatter ctx)
1774: {
1775:   MPI_Comm       comm = ((PetscObject)ctx)->comm;
1776:   PetscMPIInt    tag  = ((PetscObject)ctx)->tag, tagr;
1777:   PetscInt       bs   = to->bs;
1778:   PetscMPIInt    size;
1779:   PetscInt       i, n;
1781: 
1783:   PetscObjectGetNewTag((PetscObject)ctx,&tagr);
1784:   ctx->destroy = VecScatterDestroy_PtoP;

1786:   ctx->reproduce = PETSC_FALSE;
1787:   to->sendfirst  = PETSC_FALSE;
1788:   PetscOptionsGetTruth(PETSC_NULL,"-vecscatter_reproduce",&ctx->reproduce,PETSC_NULL);
1789:   PetscOptionsGetTruth(PETSC_NULL,"-vecscatter_sendfirst",&to->sendfirst,PETSC_NULL);
1790:   from->sendfirst = to->sendfirst;

1792:   MPI_Comm_size(comm,&size);
1793:   /* check if the receives are ALL going into contiguous locations; if so can skip indexing */
1794:   to->contiq = PETSC_FALSE;
1795:   n = from->starts[from->n];
1796:   from->contiq = PETSC_TRUE;
1797:   for (i=1; i<n; i++) {
1798:     if (from->indices[i] != from->indices[i-1] + bs) {
1799:       from->contiq = PETSC_FALSE;
1800:       break;
1801:     }
1802:   }

1804:   to->use_alltoallv = PETSC_FALSE;
1805:   PetscOptionsGetTruth(PETSC_NULL,"-vecscatter_alltoall",&to->use_alltoallv,PETSC_NULL);
1806:   from->use_alltoallv = to->use_alltoallv;
1807:   if (from->use_alltoallv) PetscInfo(ctx,"Using MPI_Alltoallv() for scatter\n");
1808: #if defined(PETSC_HAVE_MPI_ALLTOALLW)  && !defined(PETSC_USE_64BIT_INDICES)
1809:   if (to->use_alltoallv) {
1810:     to->use_alltoallw = PETSC_FALSE;
1811:     PetscOptionsGetTruth(PETSC_NULL,"-vecscatter_nopack",&to->use_alltoallw,PETSC_NULL);
1812:   }
1813:   from->use_alltoallw = to->use_alltoallw;
1814:   if (from->use_alltoallw) PetscInfo(ctx,"Using MPI_Alltoallw() for scatter\n");
1815: #endif

1817: #if defined(PETSC_HAVE_MPI_WIN_CREATE)
1818:   to->use_window = PETSC_FALSE;
1819:   PetscOptionsGetTruth(PETSC_NULL,"-vecscatter_window",&to->use_window,PETSC_NULL);
1820:   from->use_window = to->use_window;
1821: #endif

1823:   if (to->use_alltoallv) {

1825:     PetscMalloc2(size,PetscMPIInt,&to->counts,size,PetscMPIInt,&to->displs);
1826:     PetscMemzero(to->counts,size*sizeof(PetscMPIInt));
1827:     for (i=0; i<to->n; i++) {
1828:       to->counts[to->procs[i]] = bs*(to->starts[i+1] - to->starts[i]);
1829:     }
1830:     to->displs[0] = 0;
1831:     for (i=1; i<size; i++) {
1832:       to->displs[i] = to->displs[i-1] + to->counts[i-1];
1833:     }

1835:     PetscMalloc2(size,PetscMPIInt,&from->counts,size,PetscMPIInt,&from->displs);
1836:     PetscMemzero(from->counts,size*sizeof(PetscMPIInt));
1837:     for (i=0; i<from->n; i++) {
1838:       from->counts[from->procs[i]] = bs*(from->starts[i+1] - from->starts[i]);
1839:     }
1840:     from->displs[0] = 0;
1841:     for (i=1; i<size; i++) {
1842:       from->displs[i] = from->displs[i-1] + from->counts[i-1];
1843:     }
1844: #if defined(PETSC_HAVE_MPI_ALLTOALLW) && !defined(PETSC_USE_64BIT_INDICES)
1845:     if (to->use_alltoallw) {
1846:       PetscMPIInt mpibs = PetscMPIIntCast(bs), mpilen;
1847:       ctx->packtogether = PETSC_FALSE;
1848:       PetscMalloc3(size,PetscMPIInt,&to->wcounts,size,PetscMPIInt,&to->wdispls,size,MPI_Datatype,&to->types);
1849:       PetscMemzero(to->wcounts,size*sizeof(PetscMPIInt));
1850:       PetscMemzero(to->wdispls,size*sizeof(PetscMPIInt));
1851:       for (i=0; i<size; i++) {
1852:         to->types[i] = MPIU_SCALAR;
1853:       }

1855:       for (i=0; i<to->n; i++) {
1856:         to->wcounts[to->procs[i]] = 1;
1857:         mpilen = PetscMPIIntCast(to->starts[i+1]-to->starts[i]);
1858:         MPI_Type_create_indexed_block(mpilen,mpibs,to->indices+to->starts[i],MPIU_SCALAR,to->types+to->procs[i]);
1859:         MPI_Type_commit(to->types+to->procs[i]);
1860:       }
1861:       PetscMalloc3(size,PetscMPIInt,&from->wcounts,size,PetscMPIInt,&from->wdispls,size,MPI_Datatype,&from->types);
1862:       PetscMemzero(from->wcounts,size*sizeof(PetscMPIInt));
1863:       PetscMemzero(from->wdispls,size*sizeof(PetscMPIInt));
1864:       for (i=0; i<size; i++) {
1865:         from->types[i] = MPIU_SCALAR;
1866:       }
1867:       if (from->contiq) {
1868:         PetscInfo(ctx,"Scattered vector entries are stored contiquously, taking advantage of this with -vecscatter_alltoall\n");
1869:         for (i=0; i<from->n; i++) {
1870:           from->wcounts[from->procs[i]] = bs*(from->starts[i+1] - from->starts[i]);
1871:         }
1872:         if (from->n) from->wdispls[from->procs[0]] = sizeof(PetscScalar)*from->indices[0];
1873:         for (i=1; i<from->n; i++) {
1874:           from->wdispls[from->procs[i]] = from->wdispls[from->procs[i-1]] + sizeof(PetscScalar)*from->wcounts[from->procs[i-1]];
1875:         }
1876:       } else {
1877:         for (i=0; i<from->n; i++) {
1878:           from->wcounts[from->procs[i]] = 1;
1879:           mpilen = PetscMPIIntCast(from->starts[i+1]-from->starts[i]);
1880:           MPI_Type_create_indexed_block(mpilen,mpibs,from->indices+from->starts[i],MPIU_SCALAR,from->types+from->procs[i]);
1881:           MPI_Type_commit(from->types+from->procs[i]);
1882:         }
1883:       }
1884:     } else {
1885:       ctx->copy = VecScatterCopy_PtoP_AllToAll;
1886:     }
1887: #else 
1888:     to->use_alltoallw   = PETSC_FALSE;
1889:     from->use_alltoallw = PETSC_FALSE;
1890:     ctx->copy           = VecScatterCopy_PtoP_AllToAll;
1891: #endif
1892: #if defined(PETSC_HAVE_MPI_WIN_CREATE)
1893:   } else if (to->use_window) {
1894:     PetscMPIInt temptag,winsize;
1895:     MPI_Request *request;
1896:     MPI_Status  *status;
1897: 
1898:     PetscObjectGetNewTag((PetscObject)ctx,&temptag);
1899:     winsize = (to->n ? to->starts[to->n] : 0)*bs*sizeof(PetscScalar);
1900:     MPI_Win_create(to->values ? to->values : MPI_BOTTOM,winsize,sizeof(PetscScalar),MPI_INFO_NULL,comm,&to->window);
1901:     PetscMalloc(to->n,&to->winstarts);
1902:     PetscMalloc2(to->n,MPI_Request,&request,to->n,MPI_Status,&status);
1903:     for (i=0; i<to->n; i++) {
1904:       MPI_Irecv(to->winstarts+i,1,MPIU_INT,to->procs[i],temptag,comm,request+i);
1905:     }
1906:     for (i=0; i<from->n; i++) {
1907:       MPI_Send(from->starts+i,1,MPIU_INT,from->procs[i],temptag,comm);
1908:     }
1909:     MPI_Waitall(to->n,request,status);
1910:     PetscFree2(request,status);

1912:     winsize = (from->n ? from->starts[from->n] : 0)*bs*sizeof(PetscScalar);
1913:     MPI_Win_create(from->values ? from->values : MPI_BOTTOM,winsize,sizeof(PetscScalar),MPI_INFO_NULL,comm,&from->window);
1914:     PetscMalloc(from->n,&from->winstarts);
1915:     PetscMalloc2(from->n,MPI_Request,&request,from->n,MPI_Status,&status);
1916:     for (i=0; i<from->n; i++) {
1917:       MPI_Irecv(from->winstarts+i,1,MPIU_INT,from->procs[i],temptag,comm,request+i);
1918:     }
1919:     for (i=0; i<to->n; i++) {
1920:       MPI_Send(to->starts+i,1,MPIU_INT,to->procs[i],temptag,comm);
1921:     }
1922:     MPI_Waitall(from->n,request,status);
1923:     PetscFree2(request,status);
1924: #endif
1925:   } else {
1926:     PetscTruth  use_rsend = PETSC_FALSE, use_ssend = PETSC_FALSE;
1927:     PetscInt    *sstarts = to->starts,  *rstarts = from->starts;
1928:     PetscMPIInt *sprocs  = to->procs,   *rprocs  = from->procs;
1929:     MPI_Request *swaits  = to->requests,*rwaits  = from->requests;
1930:     MPI_Request *rev_swaits,*rev_rwaits;
1931:     PetscScalar *Ssvalues = to->values, *Srvalues = from->values;

1933:     /* allocate additional wait variables for the "reverse" scatter */
1934:     PetscMalloc(to->n*sizeof(MPI_Request),&rev_rwaits);
1935:     PetscMalloc(from->n*sizeof(MPI_Request),&rev_swaits);
1936:     to->rev_requests   = rev_rwaits;
1937:     from->rev_requests = rev_swaits;

1939:     /* Register the receives that you will use later (sends for scatter reverse) */
1940:     PetscOptionsGetTruth(PETSC_NULL,"-vecscatter_rsend",&use_rsend,PETSC_NULL);
1941:     PetscOptionsGetTruth(PETSC_NULL,"-vecscatter_ssend",&use_ssend,PETSC_NULL);
1942:     if (use_rsend) {
1943:       PetscInfo(ctx,"Using VecScatter ready receiver mode\n");
1944:       to->use_readyreceiver    = PETSC_TRUE;
1945:       from->use_readyreceiver  = PETSC_TRUE;
1946:     } else {
1947:       to->use_readyreceiver    = PETSC_FALSE;
1948:       from->use_readyreceiver  = PETSC_FALSE;
1949:     }
1950:     if (use_ssend) {
1951:       PetscInfo(ctx,"Using VecScatter Ssend mode\n");
1952:     }

1954:     for (i=0; i<from->n; i++) {
1955:       if (use_rsend) {
1956:         MPI_Rsend_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tagr,comm,rev_swaits+i);
1957:       } else if (use_ssend) {
1958:         MPI_Ssend_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tagr,comm,rev_swaits+i);
1959:       } else {
1960:         MPI_Send_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tagr,comm,rev_swaits+i);
1961:       }
1962:     }

1964:     for (i=0; i<to->n; i++) {
1965:       if (use_rsend) {
1966:         MPI_Rsend_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
1967:       } else if (use_ssend) {
1968:         MPI_Ssend_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
1969:       } else {
1970:         MPI_Send_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
1971:       }
1972:     }
1973:     /* Register receives for scatter and reverse */
1974:     for (i=0; i<from->n; i++) {
1975:       MPI_Recv_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);
1976:     }
1977:     for (i=0; i<to->n; i++) {
1978:       MPI_Recv_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tagr,comm,rev_rwaits+i);
1979:     }
1980:     if (use_rsend) {
1981:       if (to->n)   {MPI_Startall_irecv(to->starts[to->n]*to->bs,to->n,to->rev_requests);}
1982:       if (from->n) {MPI_Startall_irecv(from->starts[from->n]*from->bs,from->n,from->requests);}
1983:       MPI_Barrier(comm);
1984:     }

1986:     ctx->copy      = VecScatterCopy_PtoP_X;
1987:   }
1988:   PetscInfo1(ctx,"Using blocksize %D scatter\n",bs);
1989:   switch (bs) {
1990:   case 12:
1991:     ctx->begin     = VecScatterBegin_12;
1992:     ctx->end       = VecScatterEnd_12;
1993:     break;
1994:   case 8:
1995:     ctx->begin     = VecScatterBegin_8;
1996:     ctx->end       = VecScatterEnd_8;
1997:     break;
1998:   case 7:
1999:     ctx->begin     = VecScatterBegin_7;
2000:     ctx->end       = VecScatterEnd_7;
2001:     break;
2002:   case 6:
2003:     ctx->begin     = VecScatterBegin_6;
2004:     ctx->end       = VecScatterEnd_6;
2005:     break;
2006:   case 5:
2007:     ctx->begin     = VecScatterBegin_5;
2008:     ctx->end       = VecScatterEnd_5;
2009:     break;
2010:   case 4:
2011:     ctx->begin     = VecScatterBegin_4;
2012:     ctx->end       = VecScatterEnd_4;
2013:     break;
2014:   case 3:
2015:     ctx->begin     = VecScatterBegin_3;
2016:     ctx->end       = VecScatterEnd_3;
2017:     break;
2018:   case 2:
2019:     ctx->begin     = VecScatterBegin_2;
2020:     ctx->end       = VecScatterEnd_2;
2021:     break;
2022:   case 1:
2023:     ctx->begin     = VecScatterBegin_1;
2024:     ctx->end       = VecScatterEnd_1;
2025:     break;
2026:   default:
2027:     SETERRQ(PETSC_ERR_SUP,"Blocksize not supported");
2028:   }
2029:   ctx->view      = VecScatterView_MPI;
2030:   /* Check if the local scatter is actually a copy; important special case */
2031:   if (to->local.n) {
2032:     VecScatterLocalOptimizeCopy_Private(ctx,&to->local,&from->local,bs);
2033:   }
2034:   return(0);
2035: }



2039: /* ------------------------------------------------------------------------------------*/
2040: /*
2041:          Scatter from local Seq vectors to a parallel vector. 
2042:          Reverses the order of the arguments, calls VecScatterCreate_PtoS() then
2043:          reverses the result.
2044: */
2047: PetscErrorCode VecScatterCreate_StoP(PetscInt nx,const PetscInt *inidx,PetscInt ny,const PetscInt *inidy,Vec xin,Vec yin,PetscInt bs,VecScatter ctx)
2048: {
2049:   PetscErrorCode         ierr;
2050:   MPI_Request            *waits;
2051:   VecScatter_MPI_General *to,*from;

2054:   VecScatterCreate_PtoS(ny,inidy,nx,inidx,yin,xin,bs,ctx);
2055:   to            = (VecScatter_MPI_General*)ctx->fromdata;
2056:   from          = (VecScatter_MPI_General*)ctx->todata;
2057:   ctx->todata   = (void*)to;
2058:   ctx->fromdata = (void*)from;
2059:   /* these two are special, they are ALWAYS stored in to struct */
2060:   to->sstatus   = from->sstatus;
2061:   to->rstatus   = from->rstatus;

2063:   from->sstatus = 0;
2064:   from->rstatus = 0;

2066:   waits              = from->rev_requests;
2067:   from->rev_requests = from->requests;
2068:   from->requests     = waits;
2069:   waits              = to->rev_requests;
2070:   to->rev_requests   = to->requests;
2071:   to->requests       = waits;
2072:   return(0);
2073: }

2075: /* ---------------------------------------------------------------------------------*/
2078: PetscErrorCode VecScatterCreate_PtoP(PetscInt nx,const PetscInt *inidx,PetscInt ny,const PetscInt *inidy,Vec xin,Vec yin,VecScatter ctx)
2079: {
2081:   PetscMPIInt    size,rank,tag,imdex,n;
2082:   PetscInt       *owners = xin->map->range;
2083:   PetscMPIInt    *nprocs = PETSC_NULL;
2084:   PetscInt       i,j,idx,nsends,*local_inidx = PETSC_NULL,*local_inidy = PETSC_NULL;
2085:   PetscInt       *owner = PETSC_NULL,*starts = PETSC_NULL,count,slen;
2086:   PetscInt       *rvalues = PETSC_NULL,*svalues = PETSC_NULL,base,*values = PETSC_NULL,*rsvalues,recvtotal,lastidx;
2087:   PetscMPIInt    *onodes1,*olengths1,nrecvs;
2088:   MPI_Comm       comm;
2089:   MPI_Request    *send_waits = PETSC_NULL,*recv_waits = PETSC_NULL;
2090:   MPI_Status     recv_status,*send_status = PETSC_NULL;
2091:   PetscTruth     duplicate = PETSC_FALSE;
2092: #if defined(PETSC_USE_DEBUG)
2093:   PetscTruth     found = PETSC_FALSE;
2094: #endif

2097:   PetscObjectGetNewTag((PetscObject)ctx,&tag);
2098:   PetscObjectGetComm((PetscObject)xin,&comm);
2099:   MPI_Comm_size(comm,&size);
2100:   MPI_Comm_rank(comm,&rank);
2101:   if (size == 1) {
2102:     VecScatterCreate_StoP(nx,inidx,ny,inidy,xin,yin,1,ctx);
2103:     return(0);
2104:   }

2106:   /*
2107:      Each processor ships off its inidx[j] and inidy[j] to the appropriate processor
2108:      They then call the StoPScatterCreate()
2109:   */
2110:   /*  first count number of contributors to each processor */
2111:   PetscMalloc3(size,PetscMPIInt,&nprocs,nx,PetscInt,&owner,(size+1),PetscInt,&starts);
2112:   PetscMemzero(nprocs,size*sizeof(PetscMPIInt));
2113:   lastidx = -1;
2114:   j       = 0;
2115:   for (i=0; i<nx; i++) {
2116:     /* if indices are NOT locally sorted, need to start search at the beginning */
2117:     if (lastidx > (idx = inidx[i])) j = 0;
2118:     lastidx = idx;
2119:     for (; j<size; j++) {
2120:       if (idx >= owners[j] && idx < owners[j+1]) {
2121:         nprocs[j]++;
2122:         owner[i] = j;
2123: #if defined(PETSC_USE_DEBUG)
2124:         found = PETSC_TRUE;
2125: #endif
2126:         break;
2127:       }
2128:     }
2129: #if defined(PETSC_USE_DEBUG)
2130:     if (!found) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Index %D out of range",idx);
2131:     found = PETSC_FALSE;
2132: #endif
2133:   }
2134:   nsends = 0;  for (i=0; i<size; i++) { nsends += (nprocs[i] > 0);}

2136:   /* inform other processors of number of messages and max length*/
2137:   PetscGatherNumberOfMessages(comm,PETSC_NULL,nprocs,&nrecvs);
2138:   PetscGatherMessageLengths(comm,nsends,nrecvs,nprocs,&onodes1,&olengths1);
2139:   PetscSortMPIIntWithArray(nrecvs,onodes1,olengths1);
2140:   recvtotal = 0; for (i=0; i<nrecvs; i++) recvtotal += olengths1[i];

2142:   /* post receives:   */
2143:   PetscMalloc5(2*recvtotal,PetscInt,&rvalues,2*nx,PetscInt,&svalues,nrecvs,MPI_Request,&recv_waits,nsends,MPI_Request,&send_waits,nsends,MPI_Status,&send_status);

2145:   count = 0;
2146:   for (i=0; i<nrecvs; i++) {
2147:     MPI_Irecv((rvalues+2*count),2*olengths1[i],MPIU_INT,onodes1[i],tag,comm,recv_waits+i);
2148:     count += olengths1[i];
2149:   }
2150:   PetscFree(onodes1);

2152:   /* do sends:
2153:       1) starts[i] gives the starting index in svalues for stuff going to 
2154:          the ith processor
2155:   */
2156:   starts[0]= 0;
2157:   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
2158:   for (i=0; i<nx; i++) {
2159:     svalues[2*starts[owner[i]]]       = inidx[i];
2160:     svalues[1 + 2*starts[owner[i]]++] = inidy[i];
2161:   }

2163:   starts[0] = 0;
2164:   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
2165:   count = 0;
2166:   for (i=0; i<size; i++) {
2167:     if (nprocs[i]) {
2168:       MPI_Isend(svalues+2*starts[i],2*nprocs[i],MPIU_INT,i,tag,comm,send_waits+count);
2169:       count++;
2170:     }
2171:   }
2172:   PetscFree3(nprocs,owner,starts);

2174:   /*  wait on receives */
2175:   count = nrecvs;
2176:   slen  = 0;
2177:   while (count) {
2178:     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
2179:     /* unpack receives into our local space */
2180:     MPI_Get_count(&recv_status,MPIU_INT,&n);
2181:     slen += n/2;
2182:     count--;
2183:   }
2184:   if (slen != recvtotal) SETERRQ2(PETSC_ERR_PLIB,"Total message lengths %D not as expected %D",slen,recvtotal);
2185: 
2186:   PetscMalloc2(slen,PetscInt,&local_inidx,slen,PetscInt,&local_inidy);
2187:   base  = owners[rank];
2188:   count = 0;
2189:   rsvalues = rvalues;
2190:   for (i=0; i<nrecvs; i++) {
2191:     values = rsvalues;
2192:     rsvalues += 2*olengths1[i];
2193:     for (j=0; j<olengths1[i]; j++) {
2194:       local_inidx[count]   = values[2*j] - base;
2195:       local_inidy[count++] = values[2*j+1];
2196:     }
2197:   }
2198:   PetscFree(olengths1);

2200:   /* wait on sends */
2201:   if (nsends) {MPI_Waitall(nsends,send_waits,send_status);}
2202:   PetscFree5(rvalues,svalues,recv_waits,send_waits,send_status);

2204:   /*
2205:      should sort and remove duplicates from local_inidx,local_inidy 
2206:   */

2208: #if defined(do_it_slow)
2209:   /* sort on the from index */
2210:   PetscSortIntWithArray(slen,local_inidx,local_inidy);
2211:   start = 0;
2212:   while (start < slen) {
2213:     count = start+1;
2214:     last  = local_inidx[start];
2215:     while (count < slen && last == local_inidx[count]) count++;
2216:     if (count > start + 1) { /* found 2 or more same local_inidx[] in a row */
2217:       /* sort on to index */
2218:       PetscSortInt(count-start,local_inidy+start);
2219:     }
2220:     /* remove duplicates; not most efficient way, but probably good enough */
2221:     i = start;
2222:     while (i < count-1) {
2223:       if (local_inidy[i] != local_inidy[i+1]) {
2224:         i++;
2225:       } else { /* found a duplicate */
2226:         duplicate = PETSC_TRUE;
2227:         for (j=i; j<slen-1; j++) {
2228:           local_inidx[j] = local_inidx[j+1];
2229:           local_inidy[j] = local_inidy[j+1];
2230:         }
2231:         slen--;
2232:         count--;
2233:       }
2234:     }
2235:     start = count;
2236:   }
2237: #endif
2238:   if (duplicate) {
2239:     PetscInfo(ctx,"Duplicate from to indices passed in VecScatterCreate(), they are ignored\n");
2240:   }
2241:   VecScatterCreate_StoP(slen,local_inidx,slen,local_inidy,xin,yin,1,ctx);
2242:   PetscFree2(local_inidx,local_inidy);
2243:   return(0);
2244: }