Actual source code: vpscat_mpi1.c

petsc-3.9.4 2018-09-11
Report Typos and Errors

  2: /*
  3:     Defines parallel vector scatters using MPI1.
  4: */

  6:  #include <../src/vec/vec/impls/dvecimpl.h>
  7:  #include <../src/vec/vec/impls/mpi/pvecimpl.h>
  8:  #include <petscsf.h>

 10: PetscErrorCode VecScatterView_MPI_MPI1(VecScatter ctx,PetscViewer viewer)
 11: {
 12:   VecScatter_MPI_General *to  =(VecScatter_MPI_General*)ctx->todata;
 13:   VecScatter_MPI_General *from=(VecScatter_MPI_General*)ctx->fromdata;
 14:   PetscErrorCode         ierr;
 15:   PetscInt               i;
 16:   PetscMPIInt            rank;
 17:   PetscViewerFormat      format;
 18:   PetscBool              iascii;

 21:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
 22:   if (iascii) {
 23:     MPI_Comm_rank(PetscObjectComm((PetscObject)ctx),&rank);
 24:     PetscViewerGetFormat(viewer,&format);
 25:     if (format ==  PETSC_VIEWER_ASCII_INFO) {
 26:       PetscInt nsend_max,nrecv_max,lensend_max,lenrecv_max,alldata,itmp;

 28:       MPI_Reduce(&to->n,&nsend_max,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)ctx));
 29:       MPI_Reduce(&from->n,&nrecv_max,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)ctx));
 30:       itmp = to->starts[to->n+1];
 31:       MPI_Reduce(&itmp,&lensend_max,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)ctx));
 32:       itmp = from->starts[from->n+1];
 33:       MPI_Reduce(&itmp,&lenrecv_max,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)ctx));
 34:       MPI_Reduce(&itmp,&alldata,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)ctx));

 36:       PetscViewerASCIIPrintf(viewer,"VecScatter statistics\n");
 37:       PetscViewerASCIIPrintf(viewer,"  Maximum number sends %D\n",nsend_max);
 38:       PetscViewerASCIIPrintf(viewer,"  Maximum number receives %D\n",nrecv_max);
 39:       PetscViewerASCIIPrintf(viewer,"  Maximum data sent %D\n",(int)(lensend_max*to->bs*sizeof(PetscScalar)));
 40:       PetscViewerASCIIPrintf(viewer,"  Maximum data received %D\n",(int)(lenrecv_max*to->bs*sizeof(PetscScalar)));
 41:       PetscViewerASCIIPrintf(viewer,"  Total data sent %D\n",(int)(alldata*to->bs*sizeof(PetscScalar)));

 43:     } else {
 44:       PetscViewerASCIIPushSynchronized(viewer);
 45:       PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number sends = %D; Number to self = %D\n",rank,to->n,to->local.n);
 46:       if (to->n) {
 47:         for (i=0; i<to->n; i++) {
 48:           PetscViewerASCIISynchronizedPrintf(viewer,"[%d]   %D length = %D to whom %d\n",rank,i,to->starts[i+1]-to->starts[i],to->procs[i]);
 49:         }
 50:         PetscViewerASCIISynchronizedPrintf(viewer,"Now the indices for all remote sends (in order by process sent to)\n");
 51:         for (i=0; i<to->starts[to->n]; i++) {
 52:           PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D \n",rank,to->indices[i]);
 53:         }
 54:       }

 56:       PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number receives = %D; Number from self = %D\n",rank,from->n,from->local.n);
 57:       if (from->n) {
 58:         for (i=0; i<from->n; i++) {
 59:           PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D length %D from whom %d\n",rank,i,from->starts[i+1]-from->starts[i],from->procs[i]);
 60:         }

 62:         PetscViewerASCIISynchronizedPrintf(viewer,"Now the indices for all remote receives (in order by process received from)\n");
 63:         for (i=0; i<from->starts[from->n]; i++) {
 64:           PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D \n",rank,from->indices[i]);
 65:         }
 66:       }
 67:       if (to->local.n) {
 68:         PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Indices for local part of scatter\n",rank);
 69:         for (i=0; i<to->local.n; i++) {  /* the to and from have the opposite meaning from what you would expect */
 70:           PetscViewerASCIISynchronizedPrintf(viewer,"[%d] From %D to %D \n",rank,to->local.vslots[i],from->local.vslots[i]);
 71:         }
 72:       }

 74:       PetscViewerFlush(viewer);
 75:       PetscViewerASCIIPopSynchronized(viewer);
 76:     }
 77:   }
 78:   return(0);
 79: }

 81: /* -------------------------------------------------------------------------------------*/
 82: PetscErrorCode VecScatterDestroy_PtoP_MPI1(VecScatter ctx)
 83: {
 84:   VecScatter_MPI_General *to   = (VecScatter_MPI_General*)ctx->todata;
 85:   VecScatter_MPI_General *from = (VecScatter_MPI_General*)ctx->fromdata;
 86:   PetscErrorCode         ierr;
 87:   PetscInt               i;

 90:   if (to->use_readyreceiver) {
 91:     /*
 92:        Since we have already posted sends we must cancel them before freeing
 93:        the requests
 94:     */
 95:     for (i=0; i<from->n; i++) {
 96:       MPI_Cancel(from->requests+i);
 97:     }
 98:     for (i=0; i<to->n; i++) {
 99:       MPI_Cancel(to->rev_requests+i);
100:     }
101:     MPI_Waitall(from->n,from->requests,to->rstatus);
102:     MPI_Waitall(to->n,to->rev_requests,to->rstatus);
103:   }

105: #if defined(PETSC_HAVE_MPI_ALLTOALLW) && !defined(PETSC_USE_64BIT_INDICES)
106:   if (to->use_alltoallw) {
107:     for (i=0; i<to->n; i++) {
108:       MPI_Type_free(to->types+to->procs[i]);
109:     }
110:     PetscFree3(to->wcounts,to->wdispls,to->types);
111:     if (!from->contiq) {
112:       for (i=0; i<from->n; i++) {
113:         MPI_Type_free(from->types+from->procs[i]);
114:       }
115:     }
116:     PetscFree3(from->wcounts,from->wdispls,from->types);
117:   }
118: #endif

120: #if defined(PETSC_HAVE_MPI_WIN_CREATE_FEATURE)
121:   if (to->use_window) {
122:     MPI_Win_free(&from->window);
123:     MPI_Win_free(&to->window);
124:     PetscFree(from->winstarts);
125:     PetscFree(to->winstarts);
126:   }
127: #endif

129:   if (to->use_alltoallv) {
130:     PetscFree2(to->counts,to->displs);
131:     PetscFree2(from->counts,from->displs);
132:   }

134:   /* release MPI resources obtained with MPI_Send_init() and MPI_Recv_init() */
135:   /*
136:      IBM's PE version of MPI has a bug where freeing these guys will screw up later
137:      message passing.
138:   */
139: #if !defined(PETSC_HAVE_BROKEN_REQUEST_FREE)
140:   if (!to->use_alltoallv && !to->use_window) {   /* currently the to->requests etc are ALWAYS allocated even if not used */
141:     if (to->requests) {
142:       for (i=0; i<to->n; i++) {
143:         MPI_Request_free(to->requests + i);
144:       }
145:     }
146:     if (to->rev_requests) {
147:       for (i=0; i<to->n; i++) {
148:         MPI_Request_free(to->rev_requests + i);
149:       }
150:     }
151:   }
152:   /*
153:       MPICH could not properly cancel requests thus with ready receiver mode we
154:     cannot free the requests. It may be fixed now, if not then put the following
155:     code inside a if (!to->use_readyreceiver) {
156:   */
157:   if (!to->use_alltoallv && !to->use_window) {    /* currently the from->requests etc are ALWAYS allocated even if not used */
158:     if (from->requests) {
159:       for (i=0; i<from->n; i++) {
160:         MPI_Request_free(from->requests + i);
161:       }
162:     }

164:     if (from->rev_requests) {
165:       for (i=0; i<from->n; i++) {
166:         MPI_Request_free(from->rev_requests + i);
167:       }
168:     }
169:   }
170: #endif

172:   PetscFree(to->local.vslots);
173:   PetscFree(from->local.vslots);
174:   PetscFree2(to->counts,to->displs);
175:   PetscFree2(from->counts,from->displs);
176:   PetscFree(to->local.slots_nonmatching);
177:   PetscFree(from->local.slots_nonmatching);
178:   PetscFree(to->rev_requests);
179:   PetscFree(from->rev_requests);
180:   PetscFree(to->requests);
181:   PetscFree(from->requests);
182:   PetscFree4(to->values,to->indices,to->starts,to->procs);
183:   PetscFree2(to->sstatus,to->rstatus);
184:   PetscFree4(from->values,from->indices,from->starts,from->procs);
185:   PetscFree(from);
186:   PetscFree(to);
187:   return(0);
188: }



192: /* --------------------------------------------------------------------------------------*/
193: /*
194:     Special optimization to see if the local part of the scatter is actually
195:     a copy. The scatter routines call PetscMemcpy() instead.

197: */
198: PetscErrorCode VecScatterLocalOptimizeCopy_Private_MPI1(VecScatter scatter,VecScatter_Seq_General *to,VecScatter_Seq_General *from,PetscInt bs)
199: {
200:   PetscInt       n = to->n,i,*to_slots = to->vslots,*from_slots = from->vslots;
201:   PetscInt       to_start,from_start;

205:   to_start   = to_slots[0];
206:   from_start = from_slots[0];

208:   for (i=1; i<n; i++) {
209:     to_start   += bs;
210:     from_start += bs;
211:     if (to_slots[i]   != to_start)   return(0);
212:     if (from_slots[i] != from_start) return(0);
213:   }
214:   to->is_copy       = PETSC_TRUE;
215:   to->copy_start    = to_slots[0];
216:   to->copy_length   = bs*sizeof(PetscScalar)*n;
217:   from->is_copy     = PETSC_TRUE;
218:   from->copy_start  = from_slots[0];
219:   from->copy_length = bs*sizeof(PetscScalar)*n;
220:   PetscInfo(scatter,"Local scatter is a copy, optimizing for it\n");
221:   return(0);
222: }

224: /* --------------------------------------------------------------------------------------*/

226: PetscErrorCode VecScatterCopy_PtoP_X_MPI1(VecScatter in,VecScatter out)
227: {
228:   VecScatter_MPI_General *in_to   = (VecScatter_MPI_General*)in->todata;
229:   VecScatter_MPI_General *in_from = (VecScatter_MPI_General*)in->fromdata,*out_to,*out_from;
230:   PetscErrorCode         ierr;
231:   PetscInt               ny,bs = in_from->bs;

234:   out->ops->begin   = in->ops->begin;
235:   out->ops->end     = in->ops->end;
236:   out->ops->copy    = in->ops->copy;
237:   out->ops->destroy = in->ops->destroy;
238:   out->ops->view    = in->ops->view;

240:   /* allocate entire send scatter context */
241:   PetscNewLog(out,&out_to);
242:   PetscNewLog(out,&out_from);

244:   ny                = in_to->starts[in_to->n];
245:   out_to->n         = in_to->n;
246:   out_to->format    = in_to->format;
247:   out_to->sendfirst = in_to->sendfirst;

249:   PetscMalloc1(out_to->n,&out_to->requests);
250:   PetscMalloc4(bs*ny,&out_to->values,ny,&out_to->indices,out_to->n+1,&out_to->starts,out_to->n,&out_to->procs);
251:   PetscMalloc2(PetscMax(in_to->n,in_from->n),&out_to->sstatus,PetscMax(in_to->n,in_from->n),&out_to->rstatus);
252:   PetscMemcpy(out_to->indices,in_to->indices,ny*sizeof(PetscInt));
253:   PetscMemcpy(out_to->starts,in_to->starts,(out_to->n+1)*sizeof(PetscInt));
254:   PetscMemcpy(out_to->procs,in_to->procs,(out_to->n)*sizeof(PetscMPIInt));

256:   out->todata                        = (void*)out_to;
257:   out_to->local.n                    = in_to->local.n;
258:   out_to->local.nonmatching_computed = PETSC_FALSE;
259:   out_to->local.n_nonmatching        = 0;
260:   out_to->local.slots_nonmatching    = 0;
261:   if (in_to->local.n) {
262:     PetscMalloc1(in_to->local.n,&out_to->local.vslots);
263:     PetscMalloc1(in_from->local.n,&out_from->local.vslots);
264:     PetscMemcpy(out_to->local.vslots,in_to->local.vslots,in_to->local.n*sizeof(PetscInt));
265:     PetscMemcpy(out_from->local.vslots,in_from->local.vslots,in_from->local.n*sizeof(PetscInt));
266:   } else {
267:     out_to->local.vslots   = 0;
268:     out_from->local.vslots = 0;
269:   }

271:   /* allocate entire receive context */
272:   out_from->format    = in_from->format;
273:   ny                  = in_from->starts[in_from->n];
274:   out_from->n         = in_from->n;
275:   out_from->sendfirst = in_from->sendfirst;

277:   PetscMalloc1(out_from->n,&out_from->requests);
278:   PetscMalloc4(ny*bs,&out_from->values,ny,&out_from->indices,out_from->n+1,&out_from->starts,out_from->n,&out_from->procs);
279:   PetscMemcpy(out_from->indices,in_from->indices,ny*sizeof(PetscInt));
280:   PetscMemcpy(out_from->starts,in_from->starts,(out_from->n+1)*sizeof(PetscInt));
281:   PetscMemcpy(out_from->procs,in_from->procs,(out_from->n)*sizeof(PetscMPIInt));

283:   out->fromdata                        = (void*)out_from;
284:   out_from->local.n                    = in_from->local.n;
285:   out_from->local.nonmatching_computed = PETSC_FALSE;
286:   out_from->local.n_nonmatching        = 0;
287:   out_from->local.slots_nonmatching    = 0;

289:   /*
290:       set up the request arrays for use with isend_init() and irecv_init()
291:   */
292:   {
293:     PetscMPIInt tag;
294:     MPI_Comm    comm;
295:     PetscInt    *sstarts = out_to->starts,  *rstarts = out_from->starts;
296:     PetscMPIInt *sprocs  = out_to->procs,   *rprocs  = out_from->procs;
297:     PetscInt    i;
298:     PetscBool   flg;
299:     MPI_Request *swaits   = out_to->requests,*rwaits  = out_from->requests;
300:     MPI_Request *rev_swaits,*rev_rwaits;
301:     PetscScalar *Ssvalues = out_to->values, *Srvalues = out_from->values;

303:     PetscMalloc1(in_to->n,&out_to->rev_requests);
304:     PetscMalloc1(in_from->n,&out_from->rev_requests);

306:     rev_rwaits = out_to->rev_requests;
307:     rev_swaits = out_from->rev_requests;

309:     out_from->bs = out_to->bs = bs;
310:     tag          = ((PetscObject)out)->tag;
311:     PetscObjectGetComm((PetscObject)out,&comm);

313:     /* Register the receives that you will use later (sends for scatter reverse) */
314:     for (i=0; i<out_from->n; i++) {
315:       MPI_Recv_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);
316:       MPI_Send_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tag,comm,rev_swaits+i);
317:     }

319:     flg  = PETSC_FALSE;
320:     PetscOptionsGetBool(NULL,NULL,"-vecscatter_rsend",&flg,NULL);
321:     if (flg) {
322:       out_to->use_readyreceiver   = PETSC_TRUE;
323:       out_from->use_readyreceiver = PETSC_TRUE;
324:       for (i=0; i<out_to->n; i++) {
325:         MPI_Rsend_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
326:       }
327:       if (out_from->n) {MPI_Startall_irecv(out_from->starts[out_from->n]*out_from->bs,out_from->n,out_from->requests);}
328:       MPI_Barrier(comm);
329:       PetscInfo(in,"Using VecScatter ready receiver mode\n");
330:     } else {
331:       out_to->use_readyreceiver   = PETSC_FALSE;
332:       out_from->use_readyreceiver = PETSC_FALSE;
333:       flg                         = PETSC_FALSE;
334:       PetscOptionsGetBool(NULL,NULL,"-vecscatter_ssend",&flg,NULL);
335:       if (flg) {
336:         PetscInfo(in,"Using VecScatter Ssend mode\n");
337:       }
338:       for (i=0; i<out_to->n; i++) {
339:         if (!flg) {
340:           MPI_Send_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
341:         } else {
342:           MPI_Ssend_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
343:         }
344:       }
345:     }
346:     /* Register receives for scatter reverse */
347:     for (i=0; i<out_to->n; i++) {
348:       MPI_Recv_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,rev_rwaits+i);
349:     }
350:   }
351:   return(0);
352: }

354: PetscErrorCode VecScatterCopy_PtoP_AllToAll_MPI1(VecScatter in,VecScatter out)
355: {
356:   VecScatter_MPI_General *in_to   = (VecScatter_MPI_General*)in->todata;
357:   VecScatter_MPI_General *in_from = (VecScatter_MPI_General*)in->fromdata,*out_to,*out_from;
358:   PetscErrorCode         ierr;
359:   PetscInt               ny,bs = in_from->bs;
360:   PetscMPIInt            size;

363:   MPI_Comm_size(PetscObjectComm((PetscObject)in),&size);

365:   out->ops->begin     = in->ops->begin;
366:   out->ops->end       = in->ops->end;
367:   out->ops->copy      = in->ops->copy;
368:   out->ops->destroy   = in->ops->destroy;
369:   out->ops->view      = in->ops->view;

371:   /* allocate entire send scatter context */
372:   PetscNewLog(out,&out_to);
373:   PetscNewLog(out,&out_from);

375:   ny                = in_to->starts[in_to->n];
376:   out_to->n         = in_to->n;
377:   out_to->format    = in_to->format;
378:   out_to->sendfirst = in_to->sendfirst;

380:   PetscMalloc1(out_to->n,&out_to->requests);
381:   PetscMalloc4(bs*ny,&out_to->values,ny,&out_to->indices,out_to->n+1,&out_to->starts,out_to->n,&out_to->procs);
382:   PetscMalloc2(PetscMax(in_to->n,in_from->n),&out_to->sstatus,PetscMax(in_to->n,in_from->n),&out_to->rstatus);
383:   PetscMemcpy(out_to->indices,in_to->indices,ny*sizeof(PetscInt));
384:   PetscMemcpy(out_to->starts,in_to->starts,(out_to->n+1)*sizeof(PetscInt));
385:   PetscMemcpy(out_to->procs,in_to->procs,(out_to->n)*sizeof(PetscMPIInt));

387:   out->todata                        = (void*)out_to;
388:   out_to->local.n                    = in_to->local.n;
389:   out_to->local.nonmatching_computed = PETSC_FALSE;
390:   out_to->local.n_nonmatching        = 0;
391:   out_to->local.slots_nonmatching    = 0;
392:   if (in_to->local.n) {
393:     PetscMalloc1(in_to->local.n,&out_to->local.vslots);
394:     PetscMalloc1(in_from->local.n,&out_from->local.vslots);
395:     PetscMemcpy(out_to->local.vslots,in_to->local.vslots,in_to->local.n*sizeof(PetscInt));
396:     PetscMemcpy(out_from->local.vslots,in_from->local.vslots,in_from->local.n*sizeof(PetscInt));
397:   } else {
398:     out_to->local.vslots   = 0;
399:     out_from->local.vslots = 0;
400:   }

402:   /* allocate entire receive context */
403:   out_from->format    = in_from->format;
404:   ny                  = in_from->starts[in_from->n];
405:   out_from->n         = in_from->n;
406:   out_from->sendfirst = in_from->sendfirst;

408:   PetscMalloc1(out_from->n,&out_from->requests);
409:   PetscMalloc4(ny*bs,&out_from->values,ny,&out_from->indices,out_from->n+1,&out_from->starts,out_from->n,&out_from->procs);
410:   PetscMemcpy(out_from->indices,in_from->indices,ny*sizeof(PetscInt));
411:   PetscMemcpy(out_from->starts,in_from->starts,(out_from->n+1)*sizeof(PetscInt));
412:   PetscMemcpy(out_from->procs,in_from->procs,(out_from->n)*sizeof(PetscMPIInt));

414:   out->fromdata                        = (void*)out_from;
415:   out_from->local.n                    = in_from->local.n;
416:   out_from->local.nonmatching_computed = PETSC_FALSE;
417:   out_from->local.n_nonmatching        = 0;
418:   out_from->local.slots_nonmatching    = 0;

420:   out_to->use_alltoallv = out_from->use_alltoallv = PETSC_TRUE;

422:   PetscMalloc2(size,&out_to->counts,size,&out_to->displs);
423:   PetscMemcpy(out_to->counts,in_to->counts,size*sizeof(PetscMPIInt));
424:   PetscMemcpy(out_to->displs,in_to->displs,size*sizeof(PetscMPIInt));

426:   PetscMalloc2(size,&out_from->counts,size,&out_from->displs);
427:   PetscMemcpy(out_from->counts,in_from->counts,size*sizeof(PetscMPIInt));
428:   PetscMemcpy(out_from->displs,in_from->displs,size*sizeof(PetscMPIInt));
429:   return(0);
430: }
431: /* --------------------------------------------------------------------------------------------------
432:     Packs and unpacks the message data into send or from receive buffers.

434:     These could be generated automatically.

436:     Fortran kernels etc. could be used.
437: */
438: PETSC_STATIC_INLINE void Pack_MPI1_1(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
439: {
440:   PetscInt i;
441:   for (i=0; i<n; i++) y[i] = x[indicesx[i]];
442: }

444: PETSC_STATIC_INLINE PetscErrorCode UnPack_MPI1_1(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
445: {
446:   PetscInt i;

449:   switch (addv) {
450:   case INSERT_VALUES:
451:   case INSERT_ALL_VALUES:
452:     for (i=0; i<n; i++) y[indicesy[i]] = x[i];
453:     break;
454:   case ADD_VALUES:
455:   case ADD_ALL_VALUES:
456:     for (i=0; i<n; i++) y[indicesy[i]] += x[i];
457:     break;
458: #if !defined(PETSC_USE_COMPLEX)
459:   case MAX_VALUES:
460:     for (i=0; i<n; i++) y[indicesy[i]] = PetscMax(y[indicesy[i]],x[i]);
461: #else
462:   case MAX_VALUES:
463: #endif
464:   case NOT_SET_VALUES:
465:     break;
466:   default:
467:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
468:   }
469:   return(0);
470: }

472: PETSC_STATIC_INLINE PetscErrorCode Scatter_MPI1_1(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
473: {
474:   PetscInt i;

477:   switch (addv) {
478:   case INSERT_VALUES:
479:   case INSERT_ALL_VALUES:
480:     for (i=0; i<n; i++) y[indicesy[i]] = x[indicesx[i]];
481:     break;
482:   case ADD_VALUES:
483:   case ADD_ALL_VALUES:
484:     for (i=0; i<n; i++) y[indicesy[i]] += x[indicesx[i]];
485:     break;
486: #if !defined(PETSC_USE_COMPLEX)
487:   case MAX_VALUES:
488:     for (i=0; i<n; i++) y[indicesy[i]] = PetscMax(y[indicesy[i]],x[indicesx[i]]);
489: #else
490:   case MAX_VALUES:
491: #endif
492:   case NOT_SET_VALUES:
493:     break;
494:   default:
495:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
496:   }
497:   return(0);
498: }

500: /* ----------------------------------------------------------------------------------------------- */
501: PETSC_STATIC_INLINE void Pack_MPI1_2(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
502: {
503:   PetscInt i,idx;

505:   for (i=0; i<n; i++) {
506:     idx  = *indicesx++;
507:     y[0] = x[idx];
508:     y[1] = x[idx+1];
509:     y   += 2;
510:   }
511: }

513: PETSC_STATIC_INLINE PetscErrorCode UnPack_MPI1_2(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
514: {
515:   PetscInt i,idy;

518:   switch (addv) {
519:   case INSERT_VALUES:
520:   case INSERT_ALL_VALUES:
521:     for (i=0; i<n; i++) {
522:       idy      = *indicesy++;
523:       y[idy]   = x[0];
524:       y[idy+1] = x[1];
525:       x       += 2;
526:     }
527:     break;
528:   case ADD_VALUES:
529:   case ADD_ALL_VALUES:
530:     for (i=0; i<n; i++) {
531:       idy       = *indicesy++;
532:       y[idy]   += x[0];
533:       y[idy+1] += x[1];
534:       x        += 2;
535:     }
536:     break;
537: #if !defined(PETSC_USE_COMPLEX)
538:   case MAX_VALUES:
539:     for (i=0; i<n; i++) {
540:       idy      = *indicesy++;
541:       y[idy]   = PetscMax(y[idy],x[0]);
542:       y[idy+1] = PetscMax(y[idy+1],x[1]);
543:       x       += 2;
544:     }
545: #else
546:   case MAX_VALUES:
547: #endif
548:   case NOT_SET_VALUES:
549:     break;
550:   default:
551:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
552:   }
553:   return(0);
554: }

556: PETSC_STATIC_INLINE PetscErrorCode Scatter_MPI1_2(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
557: {
558:   PetscInt i,idx,idy;

561:   switch (addv) {
562:   case INSERT_VALUES:
563:   case INSERT_ALL_VALUES:
564:     for (i=0; i<n; i++) {
565:       idx      = *indicesx++;
566:       idy      = *indicesy++;
567:       y[idy]   = x[idx];
568:       y[idy+1] = x[idx+1];
569:     }
570:     break;
571:   case ADD_VALUES:
572:   case ADD_ALL_VALUES:
573:     for (i=0; i<n; i++) {
574:       idx       = *indicesx++;
575:       idy       = *indicesy++;
576:       y[idy]   += x[idx];
577:       y[idy+1] += x[idx+1];
578:     }
579:     break;
580: #if !defined(PETSC_USE_COMPLEX)
581:   case MAX_VALUES:
582:     for (i=0; i<n; i++) {
583:       idx      = *indicesx++;
584:       idy      = *indicesy++;
585:       y[idy]   = PetscMax(y[idy],x[idx]);
586:       y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
587:     }
588: #else
589:   case MAX_VALUES:
590: #endif
591:   case NOT_SET_VALUES:
592:     break;
593:   default:
594:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
595:   }
596:   return(0);
597: }
598: /* ----------------------------------------------------------------------------------------------- */
599: PETSC_STATIC_INLINE void Pack_MPI1_3(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
600: {
601:   PetscInt i,idx;

603:   for (i=0; i<n; i++) {
604:     idx  = *indicesx++;
605:     y[0] = x[idx];
606:     y[1] = x[idx+1];
607:     y[2] = x[idx+2];
608:     y   += 3;
609:   }
610: }
611: PETSC_STATIC_INLINE PetscErrorCode UnPack_MPI1_3(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
612: {
613:   PetscInt i,idy;

616:   switch (addv) {
617:   case INSERT_VALUES:
618:   case INSERT_ALL_VALUES:
619:     for (i=0; i<n; i++) {
620:       idy      = *indicesy++;
621:       y[idy]   = x[0];
622:       y[idy+1] = x[1];
623:       y[idy+2] = x[2];
624:       x       += 3;
625:     }
626:     break;
627:   case ADD_VALUES:
628:   case ADD_ALL_VALUES:
629:     for (i=0; i<n; i++) {
630:       idy       = *indicesy++;
631:       y[idy]   += x[0];
632:       y[idy+1] += x[1];
633:       y[idy+2] += x[2];
634:       x        += 3;
635:     }
636:     break;
637: #if !defined(PETSC_USE_COMPLEX)
638:   case MAX_VALUES:
639:     for (i=0; i<n; i++) {
640:       idy      = *indicesy++;
641:       y[idy]   = PetscMax(y[idy],x[0]);
642:       y[idy+1] = PetscMax(y[idy+1],x[1]);
643:       y[idy+2] = PetscMax(y[idy+2],x[2]);
644:       x       += 3;
645:     }
646: #else
647:   case MAX_VALUES:
648: #endif
649:   case NOT_SET_VALUES:
650:     break;
651:   default:
652:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
653:   }
654:   return(0);
655: }

657: PETSC_STATIC_INLINE PetscErrorCode Scatter_MPI1_3(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
658: {
659:   PetscInt i,idx,idy;

662:   switch (addv) {
663:   case INSERT_VALUES:
664:   case INSERT_ALL_VALUES:
665:     for (i=0; i<n; i++) {
666:       idx      = *indicesx++;
667:       idy      = *indicesy++;
668:       y[idy]   = x[idx];
669:       y[idy+1] = x[idx+1];
670:       y[idy+2] = x[idx+2];
671:     }
672:     break;
673:   case ADD_VALUES:
674:   case ADD_ALL_VALUES:
675:     for (i=0; i<n; i++) {
676:       idx       = *indicesx++;
677:       idy       = *indicesy++;
678:       y[idy]   += x[idx];
679:       y[idy+1] += x[idx+1];
680:       y[idy+2] += x[idx+2];
681:     }
682:     break;
683: #if !defined(PETSC_USE_COMPLEX)
684:   case MAX_VALUES:
685:     for (i=0; i<n; i++) {
686:       idx      = *indicesx++;
687:       idy      = *indicesy++;
688:       y[idy]   = PetscMax(y[idy],x[idx]);
689:       y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
690:       y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
691:     }
692: #else
693:   case MAX_VALUES:
694: #endif
695:   case NOT_SET_VALUES:
696:     break;
697:   default:
698:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
699:   }
700:   return(0);
701: }
702: /* ----------------------------------------------------------------------------------------------- */
703: PETSC_STATIC_INLINE void Pack_MPI1_4(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
704: {
705:   PetscInt i,idx;

707:   for (i=0; i<n; i++) {
708:     idx  = *indicesx++;
709:     y[0] = x[idx];
710:     y[1] = x[idx+1];
711:     y[2] = x[idx+2];
712:     y[3] = x[idx+3];
713:     y   += 4;
714:   }
715: }
716: PETSC_STATIC_INLINE PetscErrorCode UnPack_MPI1_4(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
717: {
718:   PetscInt i,idy;

721:   switch (addv) {
722:   case INSERT_VALUES:
723:   case INSERT_ALL_VALUES:
724:     for (i=0; i<n; i++) {
725:       idy      = *indicesy++;
726:       y[idy]   = x[0];
727:       y[idy+1] = x[1];
728:       y[idy+2] = x[2];
729:       y[idy+3] = x[3];
730:       x       += 4;
731:     }
732:     break;
733:   case ADD_VALUES:
734:   case ADD_ALL_VALUES:
735:     for (i=0; i<n; i++) {
736:       idy       = *indicesy++;
737:       y[idy]   += x[0];
738:       y[idy+1] += x[1];
739:       y[idy+2] += x[2];
740:       y[idy+3] += x[3];
741:       x        += 4;
742:     }
743:     break;
744: #if !defined(PETSC_USE_COMPLEX)
745:   case MAX_VALUES:
746:     for (i=0; i<n; i++) {
747:       idy      = *indicesy++;
748:       y[idy]   = PetscMax(y[idy],x[0]);
749:       y[idy+1] = PetscMax(y[idy+1],x[1]);
750:       y[idy+2] = PetscMax(y[idy+2],x[2]);
751:       y[idy+3] = PetscMax(y[idy+3],x[3]);
752:       x       += 4;
753:     }
754: #else
755:   case MAX_VALUES:
756: #endif
757:   case NOT_SET_VALUES:
758:     break;
759:   default:
760:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
761:   }
762:   return(0);
763: }

765: PETSC_STATIC_INLINE PetscErrorCode Scatter_MPI1_4(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
766: {
767:   PetscInt i,idx,idy;

770:   switch (addv) {
771:   case INSERT_VALUES:
772:   case INSERT_ALL_VALUES:
773:     for (i=0; i<n; i++) {
774:       idx      = *indicesx++;
775:       idy      = *indicesy++;
776:       y[idy]   = x[idx];
777:       y[idy+1] = x[idx+1];
778:       y[idy+2] = x[idx+2];
779:       y[idy+3] = x[idx+3];
780:     }
781:     break;
782:   case ADD_VALUES:
783:   case ADD_ALL_VALUES:
784:     for (i=0; i<n; i++) {
785:       idx       = *indicesx++;
786:       idy       = *indicesy++;
787:       y[idy]   += x[idx];
788:       y[idy+1] += x[idx+1];
789:       y[idy+2] += x[idx+2];
790:       y[idy+3] += x[idx+3];
791:     }
792:     break;
793: #if !defined(PETSC_USE_COMPLEX)
794:   case MAX_VALUES:
795:     for (i=0; i<n; i++) {
796:       idx      = *indicesx++;
797:       idy      = *indicesy++;
798:       y[idy]   = PetscMax(y[idy],x[idx]);
799:       y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
800:       y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
801:       y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
802:     }
803: #else
804:   case MAX_VALUES:
805: #endif
806:   case NOT_SET_VALUES:
807:     break;
808:   default:
809:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
810:   }
811:   return(0);
812: }
813: /* ----------------------------------------------------------------------------------------------- */
814: PETSC_STATIC_INLINE void Pack_MPI1_5(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
815: {
816:   PetscInt i,idx;

818:   for (i=0; i<n; i++) {
819:     idx  = *indicesx++;
820:     y[0] = x[idx];
821:     y[1] = x[idx+1];
822:     y[2] = x[idx+2];
823:     y[3] = x[idx+3];
824:     y[4] = x[idx+4];
825:     y   += 5;
826:   }
827: }

829: PETSC_STATIC_INLINE PetscErrorCode UnPack_MPI1_5(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
830: {
831:   PetscInt i,idy;

834:   switch (addv) {
835:   case INSERT_VALUES:
836:   case INSERT_ALL_VALUES:
837:     for (i=0; i<n; i++) {
838:       idy      = *indicesy++;
839:       y[idy]   = x[0];
840:       y[idy+1] = x[1];
841:       y[idy+2] = x[2];
842:       y[idy+3] = x[3];
843:       y[idy+4] = x[4];
844:       x       += 5;
845:     }
846:     break;
847:   case ADD_VALUES:
848:   case ADD_ALL_VALUES:
849:     for (i=0; i<n; i++) {
850:       idy       = *indicesy++;
851:       y[idy]   += x[0];
852:       y[idy+1] += x[1];
853:       y[idy+2] += x[2];
854:       y[idy+3] += x[3];
855:       y[idy+4] += x[4];
856:       x        += 5;
857:     }
858:     break;
859: #if !defined(PETSC_USE_COMPLEX)
860:   case MAX_VALUES:
861:     for (i=0; i<n; i++) {
862:       idy      = *indicesy++;
863:       y[idy]   = PetscMax(y[idy],x[0]);
864:       y[idy+1] = PetscMax(y[idy+1],x[1]);
865:       y[idy+2] = PetscMax(y[idy+2],x[2]);
866:       y[idy+3] = PetscMax(y[idy+3],x[3]);
867:       y[idy+4] = PetscMax(y[idy+4],x[4]);
868:       x       += 5;
869:     }
870: #else
871:   case MAX_VALUES:
872: #endif
873:   case NOT_SET_VALUES:
874:     break;
875:   default:
876:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
877:   }
878:   return(0);
879: }

881: PETSC_STATIC_INLINE PetscErrorCode Scatter_MPI1_5(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
882: {
883:   PetscInt i,idx,idy;

886:   switch (addv) {
887:   case INSERT_VALUES:
888:   case INSERT_ALL_VALUES:
889:     for (i=0; i<n; i++) {
890:       idx      = *indicesx++;
891:       idy      = *indicesy++;
892:       y[idy]   = x[idx];
893:       y[idy+1] = x[idx+1];
894:       y[idy+2] = x[idx+2];
895:       y[idy+3] = x[idx+3];
896:       y[idy+4] = x[idx+4];
897:     }
898:     break;
899:   case ADD_VALUES:
900:   case ADD_ALL_VALUES:
901:     for (i=0; i<n; i++) {
902:       idx       = *indicesx++;
903:       idy       = *indicesy++;
904:       y[idy]   += x[idx];
905:       y[idy+1] += x[idx+1];
906:       y[idy+2] += x[idx+2];
907:       y[idy+3] += x[idx+3];
908:       y[idy+4] += x[idx+4];
909:     }
910:     break;
911: #if !defined(PETSC_USE_COMPLEX)
912:   case MAX_VALUES:
913:     for (i=0; i<n; i++) {
914:       idx      = *indicesx++;
915:       idy      = *indicesy++;
916:       y[idy]   = PetscMax(y[idy],x[idx]);
917:       y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
918:       y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
919:       y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
920:       y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
921:     }
922: #else
923:   case MAX_VALUES:
924: #endif
925:   case NOT_SET_VALUES:
926:     break;
927:   default:
928:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
929:   }
930:   return(0);
931: }
932: /* ----------------------------------------------------------------------------------------------- */
933: PETSC_STATIC_INLINE void Pack_MPI1_6(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
934: {
935:   PetscInt i,idx;

937:   for (i=0; i<n; i++) {
938:     idx  = *indicesx++;
939:     y[0] = x[idx];
940:     y[1] = x[idx+1];
941:     y[2] = x[idx+2];
942:     y[3] = x[idx+3];
943:     y[4] = x[idx+4];
944:     y[5] = x[idx+5];
945:     y   += 6;
946:   }
947: }

949: PETSC_STATIC_INLINE PetscErrorCode UnPack_MPI1_6(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
950: {
951:   PetscInt i,idy;

954:   switch (addv) {
955:   case INSERT_VALUES:
956:   case INSERT_ALL_VALUES:
957:     for (i=0; i<n; i++) {
958:       idy      = *indicesy++;
959:       y[idy]   = x[0];
960:       y[idy+1] = x[1];
961:       y[idy+2] = x[2];
962:       y[idy+3] = x[3];
963:       y[idy+4] = x[4];
964:       y[idy+5] = x[5];
965:       x       += 6;
966:     }
967:     break;
968:   case ADD_VALUES:
969:   case ADD_ALL_VALUES:
970:     for (i=0; i<n; i++) {
971:       idy       = *indicesy++;
972:       y[idy]   += x[0];
973:       y[idy+1] += x[1];
974:       y[idy+2] += x[2];
975:       y[idy+3] += x[3];
976:       y[idy+4] += x[4];
977:       y[idy+5] += x[5];
978:       x        += 6;
979:     }
980:     break;
981: #if !defined(PETSC_USE_COMPLEX)
982:   case MAX_VALUES:
983:     for (i=0; i<n; i++) {
984:       idy      = *indicesy++;
985:       y[idy]   = PetscMax(y[idy],x[0]);
986:       y[idy+1] = PetscMax(y[idy+1],x[1]);
987:       y[idy+2] = PetscMax(y[idy+2],x[2]);
988:       y[idy+3] = PetscMax(y[idy+3],x[3]);
989:       y[idy+4] = PetscMax(y[idy+4],x[4]);
990:       y[idy+5] = PetscMax(y[idy+5],x[5]);
991:       x       += 6;
992:     }
993: #else
994:   case MAX_VALUES:
995: #endif
996:   case NOT_SET_VALUES:
997:     break;
998:   default:
999:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1000:   }
1001:   return(0);
1002: }

1004: PETSC_STATIC_INLINE PetscErrorCode Scatter_MPI1_6(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1005: {
1006:   PetscInt i,idx,idy;

1009:   switch (addv) {
1010:   case INSERT_VALUES:
1011:   case INSERT_ALL_VALUES:
1012:     for (i=0; i<n; i++) {
1013:       idx      = *indicesx++;
1014:       idy      = *indicesy++;
1015:       y[idy]   = x[idx];
1016:       y[idy+1] = x[idx+1];
1017:       y[idy+2] = x[idx+2];
1018:       y[idy+3] = x[idx+3];
1019:       y[idy+4] = x[idx+4];
1020:       y[idy+5] = x[idx+5];
1021:     }
1022:     break;
1023:   case ADD_VALUES:
1024:   case ADD_ALL_VALUES:
1025:     for (i=0; i<n; i++) {
1026:       idx       = *indicesx++;
1027:       idy       = *indicesy++;
1028:       y[idy]   += x[idx];
1029:       y[idy+1] += x[idx+1];
1030:       y[idy+2] += x[idx+2];
1031:       y[idy+3] += x[idx+3];
1032:       y[idy+4] += x[idx+4];
1033:       y[idy+5] += x[idx+5];
1034:     }
1035:     break;
1036: #if !defined(PETSC_USE_COMPLEX)
1037:   case MAX_VALUES:
1038:     for (i=0; i<n; i++) {
1039:       idx      = *indicesx++;
1040:       idy      = *indicesy++;
1041:       y[idy]   = PetscMax(y[idy],x[idx]);
1042:       y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
1043:       y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
1044:       y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
1045:       y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1046:       y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
1047:     }
1048: #else
1049:   case MAX_VALUES:
1050: #endif
1051:   case NOT_SET_VALUES:
1052:     break;
1053:   default:
1054:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1055:   }
1056:   return(0);
1057: }
1058: /* ----------------------------------------------------------------------------------------------- */
1059: PETSC_STATIC_INLINE void Pack_MPI1_7(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1060: {
1061:   PetscInt i,idx;

1063:   for (i=0; i<n; i++) {
1064:     idx  = *indicesx++;
1065:     y[0] = x[idx];
1066:     y[1] = x[idx+1];
1067:     y[2] = x[idx+2];
1068:     y[3] = x[idx+3];
1069:     y[4] = x[idx+4];
1070:     y[5] = x[idx+5];
1071:     y[6] = x[idx+6];
1072:     y   += 7;
1073:   }
1074: }

1076: PETSC_STATIC_INLINE PetscErrorCode UnPack_MPI1_7(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1077: {
1078:   PetscInt i,idy;

1081:   switch (addv) {
1082:   case INSERT_VALUES:
1083:   case INSERT_ALL_VALUES:
1084:     for (i=0; i<n; i++) {
1085:       idy      = *indicesy++;
1086:       y[idy]   = x[0];
1087:       y[idy+1] = x[1];
1088:       y[idy+2] = x[2];
1089:       y[idy+3] = x[3];
1090:       y[idy+4] = x[4];
1091:       y[idy+5] = x[5];
1092:       y[idy+6] = x[6];
1093:       x       += 7;
1094:     }
1095:     break;
1096:   case ADD_VALUES:
1097:   case ADD_ALL_VALUES:
1098:     for (i=0; i<n; i++) {
1099:       idy       = *indicesy++;
1100:       y[idy]   += x[0];
1101:       y[idy+1] += x[1];
1102:       y[idy+2] += x[2];
1103:       y[idy+3] += x[3];
1104:       y[idy+4] += x[4];
1105:       y[idy+5] += x[5];
1106:       y[idy+6] += x[6];
1107:       x        += 7;
1108:     }
1109:     break;
1110: #if !defined(PETSC_USE_COMPLEX)
1111:   case MAX_VALUES:
1112:     for (i=0; i<n; i++) {
1113:       idy      = *indicesy++;
1114:       y[idy]   = PetscMax(y[idy],x[0]);
1115:       y[idy+1] = PetscMax(y[idy+1],x[1]);
1116:       y[idy+2] = PetscMax(y[idy+2],x[2]);
1117:       y[idy+3] = PetscMax(y[idy+3],x[3]);
1118:       y[idy+4] = PetscMax(y[idy+4],x[4]);
1119:       y[idy+5] = PetscMax(y[idy+5],x[5]);
1120:       y[idy+6] = PetscMax(y[idy+6],x[6]);
1121:       x       += 7;
1122:     }
1123: #else
1124:   case MAX_VALUES:
1125: #endif
1126:   case NOT_SET_VALUES:
1127:     break;
1128:   default:
1129:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1130:   }
1131:   return(0);
1132: }

1134: PETSC_STATIC_INLINE PetscErrorCode Scatter_MPI1_7(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1135: {
1136:   PetscInt i,idx,idy;

1139:   switch (addv) {
1140:   case INSERT_VALUES:
1141:   case INSERT_ALL_VALUES:
1142:     for (i=0; i<n; i++) {
1143:       idx      = *indicesx++;
1144:       idy      = *indicesy++;
1145:       y[idy]   = x[idx];
1146:       y[idy+1] = x[idx+1];
1147:       y[idy+2] = x[idx+2];
1148:       y[idy+3] = x[idx+3];
1149:       y[idy+4] = x[idx+4];
1150:       y[idy+5] = x[idx+5];
1151:       y[idy+6] = x[idx+6];
1152:     }
1153:     break;
1154:   case ADD_VALUES:
1155:   case ADD_ALL_VALUES:
1156:     for (i=0; i<n; i++) {
1157:       idx       = *indicesx++;
1158:       idy       = *indicesy++;
1159:       y[idy]   += x[idx];
1160:       y[idy+1] += x[idx+1];
1161:       y[idy+2] += x[idx+2];
1162:       y[idy+3] += x[idx+3];
1163:       y[idy+4] += x[idx+4];
1164:       y[idy+5] += x[idx+5];
1165:       y[idy+6] += x[idx+6];
1166:     }
1167:     break;
1168: #if !defined(PETSC_USE_COMPLEX)
1169:   case MAX_VALUES:
1170:     for (i=0; i<n; i++) {
1171:       idx      = *indicesx++;
1172:       idy      = *indicesy++;
1173:       y[idy]   = PetscMax(y[idy],x[idx]);
1174:       y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
1175:       y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
1176:       y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
1177:       y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1178:       y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
1179:       y[idy+6] = PetscMax(y[idy+6],x[idx+6]);
1180:     }
1181: #else
1182:   case MAX_VALUES:
1183: #endif
1184:   case NOT_SET_VALUES:
1185:     break;
1186:   default:
1187:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1188:   }
1189:   return(0);
1190: }
1191: /* ----------------------------------------------------------------------------------------------- */
1192: PETSC_STATIC_INLINE void Pack_MPI1_8(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1193: {
1194:   PetscInt i,idx;

1196:   for (i=0; i<n; i++) {
1197:     idx  = *indicesx++;
1198:     y[0] = x[idx];
1199:     y[1] = x[idx+1];
1200:     y[2] = x[idx+2];
1201:     y[3] = x[idx+3];
1202:     y[4] = x[idx+4];
1203:     y[5] = x[idx+5];
1204:     y[6] = x[idx+6];
1205:     y[7] = x[idx+7];
1206:     y   += 8;
1207:   }
1208: }

1210: PETSC_STATIC_INLINE PetscErrorCode UnPack_MPI1_8(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1211: {
1212:   PetscInt i,idy;

1215:   switch (addv) {
1216:   case INSERT_VALUES:
1217:   case INSERT_ALL_VALUES:
1218:     for (i=0; i<n; i++) {
1219:       idy      = *indicesy++;
1220:       y[idy]   = x[0];
1221:       y[idy+1] = x[1];
1222:       y[idy+2] = x[2];
1223:       y[idy+3] = x[3];
1224:       y[idy+4] = x[4];
1225:       y[idy+5] = x[5];
1226:       y[idy+6] = x[6];
1227:       y[idy+7] = x[7];
1228:       x       += 8;
1229:     }
1230:     break;
1231:   case ADD_VALUES:
1232:   case ADD_ALL_VALUES:
1233:     for (i=0; i<n; i++) {
1234:       idy       = *indicesy++;
1235:       y[idy]   += x[0];
1236:       y[idy+1] += x[1];
1237:       y[idy+2] += x[2];
1238:       y[idy+3] += x[3];
1239:       y[idy+4] += x[4];
1240:       y[idy+5] += x[5];
1241:       y[idy+6] += x[6];
1242:       y[idy+7] += x[7];
1243:       x        += 8;
1244:     }
1245:     break;
1246: #if !defined(PETSC_USE_COMPLEX)
1247:   case MAX_VALUES:
1248:     for (i=0; i<n; i++) {
1249:       idy      = *indicesy++;
1250:       y[idy]   = PetscMax(y[idy],x[0]);
1251:       y[idy+1] = PetscMax(y[idy+1],x[1]);
1252:       y[idy+2] = PetscMax(y[idy+2],x[2]);
1253:       y[idy+3] = PetscMax(y[idy+3],x[3]);
1254:       y[idy+4] = PetscMax(y[idy+4],x[4]);
1255:       y[idy+5] = PetscMax(y[idy+5],x[5]);
1256:       y[idy+6] = PetscMax(y[idy+6],x[6]);
1257:       y[idy+7] = PetscMax(y[idy+7],x[7]);
1258:       x       += 8;
1259:     }
1260: #else
1261:   case MAX_VALUES:
1262: #endif
1263:   case NOT_SET_VALUES:
1264:     break;
1265:   default:
1266:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1267:   }
1268:   return(0);
1269: }

1271: PETSC_STATIC_INLINE PetscErrorCode Scatter_MPI1_8(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1272: {
1273:   PetscInt i,idx,idy;

1276:   switch (addv) {
1277:   case INSERT_VALUES:
1278:   case INSERT_ALL_VALUES:
1279:     for (i=0; i<n; i++) {
1280:       idx      = *indicesx++;
1281:       idy      = *indicesy++;
1282:       y[idy]   = x[idx];
1283:       y[idy+1] = x[idx+1];
1284:       y[idy+2] = x[idx+2];
1285:       y[idy+3] = x[idx+3];
1286:       y[idy+4] = x[idx+4];
1287:       y[idy+5] = x[idx+5];
1288:       y[idy+6] = x[idx+6];
1289:       y[idy+7] = x[idx+7];
1290:     }
1291:     break;
1292:   case ADD_VALUES:
1293:   case ADD_ALL_VALUES:
1294:     for (i=0; i<n; i++) {
1295:       idx       = *indicesx++;
1296:       idy       = *indicesy++;
1297:       y[idy]   += x[idx];
1298:       y[idy+1] += x[idx+1];
1299:       y[idy+2] += x[idx+2];
1300:       y[idy+3] += x[idx+3];
1301:       y[idy+4] += x[idx+4];
1302:       y[idy+5] += x[idx+5];
1303:       y[idy+6] += x[idx+6];
1304:       y[idy+7] += x[idx+7];
1305:     }
1306:     break;
1307: #if !defined(PETSC_USE_COMPLEX)
1308:   case MAX_VALUES:
1309:     for (i=0; i<n; i++) {
1310:       idx      = *indicesx++;
1311:       idy      = *indicesy++;
1312:       y[idy]   = PetscMax(y[idy],x[idx]);
1313:       y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
1314:       y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
1315:       y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
1316:       y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1317:       y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
1318:       y[idy+6] = PetscMax(y[idy+6],x[idx+6]);
1319:       y[idy+7] = PetscMax(y[idy+7],x[idx+7]);
1320:     }
1321: #else
1322:   case MAX_VALUES:
1323: #endif
1324:   case NOT_SET_VALUES:
1325:     break;
1326:   default:
1327:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1328:   }
1329:   return(0);
1330: }

1332: PETSC_STATIC_INLINE void Pack_MPI1_9(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1333: {
1334:   PetscInt i,idx;

1336:   for (i=0; i<n; i++) {
1337:     idx   = *indicesx++;
1338:     y[0]  = x[idx];
1339:     y[1]  = x[idx+1];
1340:     y[2]  = x[idx+2];
1341:     y[3]  = x[idx+3];
1342:     y[4]  = x[idx+4];
1343:     y[5]  = x[idx+5];
1344:     y[6]  = x[idx+6];
1345:     y[7]  = x[idx+7];
1346:     y[8]  = x[idx+8];
1347:     y    += 9;
1348:   }
1349: }

1351: PETSC_STATIC_INLINE PetscErrorCode UnPack_MPI1_9(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1352: {
1353:   PetscInt i,idy;

1356:   switch (addv) {
1357:   case INSERT_VALUES:
1358:   case INSERT_ALL_VALUES:
1359:     for (i=0; i<n; i++) {
1360:       idy       = *indicesy++;
1361:       y[idy]    = x[0];
1362:       y[idy+1]  = x[1];
1363:       y[idy+2]  = x[2];
1364:       y[idy+3]  = x[3];
1365:       y[idy+4]  = x[4];
1366:       y[idy+5]  = x[5];
1367:       y[idy+6]  = x[6];
1368:       y[idy+7]  = x[7];
1369:       y[idy+8]  = x[8];
1370:       x        += 9;
1371:     }
1372:     break;
1373:   case ADD_VALUES:
1374:   case ADD_ALL_VALUES:
1375:     for (i=0; i<n; i++) {
1376:       idy        = *indicesy++;
1377:       y[idy]    += x[0];
1378:       y[idy+1]  += x[1];
1379:       y[idy+2]  += x[2];
1380:       y[idy+3]  += x[3];
1381:       y[idy+4]  += x[4];
1382:       y[idy+5]  += x[5];
1383:       y[idy+6]  += x[6];
1384:       y[idy+7]  += x[7];
1385:       y[idy+8]  += x[8];
1386:       x         += 9;
1387:     }
1388:     break;
1389: #if !defined(PETSC_USE_COMPLEX)
1390:   case MAX_VALUES:
1391:     for (i=0; i<n; i++) {
1392:       idy       = *indicesy++;
1393:       y[idy]    = PetscMax(y[idy],x[0]);
1394:       y[idy+1]  = PetscMax(y[idy+1],x[1]);
1395:       y[idy+2]  = PetscMax(y[idy+2],x[2]);
1396:       y[idy+3]  = PetscMax(y[idy+3],x[3]);
1397:       y[idy+4]  = PetscMax(y[idy+4],x[4]);
1398:       y[idy+5]  = PetscMax(y[idy+5],x[5]);
1399:       y[idy+6]  = PetscMax(y[idy+6],x[6]);
1400:       y[idy+7]  = PetscMax(y[idy+7],x[7]);
1401:       y[idy+8]  = PetscMax(y[idy+8],x[8]);
1402:       x        += 9;
1403:     }
1404: #else
1405:   case MAX_VALUES:
1406: #endif
1407:   case NOT_SET_VALUES:
1408:     break;
1409:   default:
1410:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1411:   }
1412:   return(0);
1413: }

1415: PETSC_STATIC_INLINE PetscErrorCode Scatter_MPI1_9(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1416: {
1417:   PetscInt i,idx,idy;

1420:   switch (addv) {
1421:   case INSERT_VALUES:
1422:   case INSERT_ALL_VALUES:
1423:     for (i=0; i<n; i++) {
1424:       idx       = *indicesx++;
1425:       idy       = *indicesy++;
1426:       y[idy]    = x[idx];
1427:       y[idy+1]  = x[idx+1];
1428:       y[idy+2]  = x[idx+2];
1429:       y[idy+3]  = x[idx+3];
1430:       y[idy+4]  = x[idx+4];
1431:       y[idy+5]  = x[idx+5];
1432:       y[idy+6]  = x[idx+6];
1433:       y[idy+7]  = x[idx+7];
1434:       y[idy+8]  = x[idx+8];
1435:     }
1436:     break;
1437:   case ADD_VALUES:
1438:   case ADD_ALL_VALUES:
1439:     for (i=0; i<n; i++) {
1440:       idx        = *indicesx++;
1441:       idy        = *indicesy++;
1442:       y[idy]    += x[idx];
1443:       y[idy+1]  += x[idx+1];
1444:       y[idy+2]  += x[idx+2];
1445:       y[idy+3]  += x[idx+3];
1446:       y[idy+4]  += x[idx+4];
1447:       y[idy+5]  += x[idx+5];
1448:       y[idy+6]  += x[idx+6];
1449:       y[idy+7]  += x[idx+7];
1450:       y[idy+8]  += x[idx+8];
1451:     }
1452:     break;
1453: #if !defined(PETSC_USE_COMPLEX)
1454:   case MAX_VALUES:
1455:     for (i=0; i<n; i++) {
1456:       idx       = *indicesx++;
1457:       idy       = *indicesy++;
1458:       y[idy]    = PetscMax(y[idy],x[idx]);
1459:       y[idy+1]  = PetscMax(y[idy+1],x[idx+1]);
1460:       y[idy+2]  = PetscMax(y[idy+2],x[idx+2]);
1461:       y[idy+3]  = PetscMax(y[idy+3],x[idx+3]);
1462:       y[idy+4]  = PetscMax(y[idy+4],x[idx+4]);
1463:       y[idy+5]  = PetscMax(y[idy+5],x[idx+5]);
1464:       y[idy+6]  = PetscMax(y[idy+6],x[idx+6]);
1465:       y[idy+7]  = PetscMax(y[idy+7],x[idx+7]);
1466:       y[idy+8]  = PetscMax(y[idy+8],x[idx+8]);
1467:     }
1468: #else
1469:   case MAX_VALUES:
1470: #endif
1471:   case NOT_SET_VALUES:
1472:     break;
1473:   default:
1474:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1475:   }
1476:   return(0);
1477: }

1479: PETSC_STATIC_INLINE void Pack_MPI1_10(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1480: {
1481:   PetscInt i,idx;

1483:   for (i=0; i<n; i++) {
1484:     idx   = *indicesx++;
1485:     y[0]  = x[idx];
1486:     y[1]  = x[idx+1];
1487:     y[2]  = x[idx+2];
1488:     y[3]  = x[idx+3];
1489:     y[4]  = x[idx+4];
1490:     y[5]  = x[idx+5];
1491:     y[6]  = x[idx+6];
1492:     y[7]  = x[idx+7];
1493:     y[8]  = x[idx+8];
1494:     y[9]  = x[idx+9];
1495:     y    += 10;
1496:   }
1497: }

1499: PETSC_STATIC_INLINE PetscErrorCode UnPack_MPI1_10(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1500: {
1501:   PetscInt i,idy;

1504:   switch (addv) {
1505:   case INSERT_VALUES:
1506:   case INSERT_ALL_VALUES:
1507:     for (i=0; i<n; i++) {
1508:       idy       = *indicesy++;
1509:       y[idy]    = x[0];
1510:       y[idy+1]  = x[1];
1511:       y[idy+2]  = x[2];
1512:       y[idy+3]  = x[3];
1513:       y[idy+4]  = x[4];
1514:       y[idy+5]  = x[5];
1515:       y[idy+6]  = x[6];
1516:       y[idy+7]  = x[7];
1517:       y[idy+8]  = x[8];
1518:       y[idy+9]  = x[9];
1519:       x        += 10;
1520:     }
1521:     break;
1522:   case ADD_VALUES:
1523:   case ADD_ALL_VALUES:
1524:     for (i=0; i<n; i++) {
1525:       idy        = *indicesy++;
1526:       y[idy]    += x[0];
1527:       y[idy+1]  += x[1];
1528:       y[idy+2]  += x[2];
1529:       y[idy+3]  += x[3];
1530:       y[idy+4]  += x[4];
1531:       y[idy+5]  += x[5];
1532:       y[idy+6]  += x[6];
1533:       y[idy+7]  += x[7];
1534:       y[idy+8]  += x[8];
1535:       y[idy+9]  += x[9];
1536:       x         += 10;
1537:     }
1538:     break;
1539: #if !defined(PETSC_USE_COMPLEX)
1540:   case MAX_VALUES:
1541:     for (i=0; i<n; i++) {
1542:       idy       = *indicesy++;
1543:       y[idy]    = PetscMax(y[idy],x[0]);
1544:       y[idy+1]  = PetscMax(y[idy+1],x[1]);
1545:       y[idy+2]  = PetscMax(y[idy+2],x[2]);
1546:       y[idy+3]  = PetscMax(y[idy+3],x[3]);
1547:       y[idy+4]  = PetscMax(y[idy+4],x[4]);
1548:       y[idy+5]  = PetscMax(y[idy+5],x[5]);
1549:       y[idy+6]  = PetscMax(y[idy+6],x[6]);
1550:       y[idy+7]  = PetscMax(y[idy+7],x[7]);
1551:       y[idy+8]  = PetscMax(y[idy+8],x[8]);
1552:       y[idy+9]  = PetscMax(y[idy+9],x[9]);
1553:       x        += 10;
1554:     }
1555: #else
1556:   case MAX_VALUES:
1557: #endif
1558:   case NOT_SET_VALUES:
1559:     break;
1560:   default:
1561:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1562:   }
1563:   return(0);
1564: }

1566: PETSC_STATIC_INLINE PetscErrorCode Scatter_MPI1_10(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1567: {
1568:   PetscInt i,idx,idy;

1571:   switch (addv) {
1572:   case INSERT_VALUES:
1573:   case INSERT_ALL_VALUES:
1574:     for (i=0; i<n; i++) {
1575:       idx       = *indicesx++;
1576:       idy       = *indicesy++;
1577:       y[idy]    = x[idx];
1578:       y[idy+1]  = x[idx+1];
1579:       y[idy+2]  = x[idx+2];
1580:       y[idy+3]  = x[idx+3];
1581:       y[idy+4]  = x[idx+4];
1582:       y[idy+5]  = x[idx+5];
1583:       y[idy+6]  = x[idx+6];
1584:       y[idy+7]  = x[idx+7];
1585:       y[idy+8]  = x[idx+8];
1586:       y[idy+9]  = x[idx+9];
1587:     }
1588:     break;
1589:   case ADD_VALUES:
1590:   case ADD_ALL_VALUES:
1591:     for (i=0; i<n; i++) {
1592:       idx        = *indicesx++;
1593:       idy        = *indicesy++;
1594:       y[idy]    += x[idx];
1595:       y[idy+1]  += x[idx+1];
1596:       y[idy+2]  += x[idx+2];
1597:       y[idy+3]  += x[idx+3];
1598:       y[idy+4]  += x[idx+4];
1599:       y[idy+5]  += x[idx+5];
1600:       y[idy+6]  += x[idx+6];
1601:       y[idy+7]  += x[idx+7];
1602:       y[idy+8]  += x[idx+8];
1603:       y[idy+9]  += x[idx+9];
1604:     }
1605:     break;
1606: #if !defined(PETSC_USE_COMPLEX)
1607:   case MAX_VALUES:
1608:     for (i=0; i<n; i++) {
1609:       idx       = *indicesx++;
1610:       idy       = *indicesy++;
1611:       y[idy]    = PetscMax(y[idy],x[idx]);
1612:       y[idy+1]  = PetscMax(y[idy+1],x[idx+1]);
1613:       y[idy+2]  = PetscMax(y[idy+2],x[idx+2]);
1614:       y[idy+3]  = PetscMax(y[idy+3],x[idx+3]);
1615:       y[idy+4]  = PetscMax(y[idy+4],x[idx+4]);
1616:       y[idy+5]  = PetscMax(y[idy+5],x[idx+5]);
1617:       y[idy+6]  = PetscMax(y[idy+6],x[idx+6]);
1618:       y[idy+7]  = PetscMax(y[idy+7],x[idx+7]);
1619:       y[idy+8]  = PetscMax(y[idy+8],x[idx+8]);
1620:       y[idy+9]  = PetscMax(y[idy+9],x[idx+9]);
1621:     }
1622: #else
1623:   case MAX_VALUES:
1624: #endif
1625:   case NOT_SET_VALUES:
1626:     break;
1627:   default:
1628:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1629:   }
1630:   return(0);
1631: }

1633: PETSC_STATIC_INLINE void Pack_MPI1_11(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1634: {
1635:   PetscInt i,idx;

1637:   for (i=0; i<n; i++) {
1638:     idx   = *indicesx++;
1639:     y[0]  = x[idx];
1640:     y[1]  = x[idx+1];
1641:     y[2]  = x[idx+2];
1642:     y[3]  = x[idx+3];
1643:     y[4]  = x[idx+4];
1644:     y[5]  = x[idx+5];
1645:     y[6]  = x[idx+6];
1646:     y[7]  = x[idx+7];
1647:     y[8]  = x[idx+8];
1648:     y[9]  = x[idx+9];
1649:     y[10] = x[idx+10];
1650:     y    += 11;
1651:   }
1652: }

1654: PETSC_STATIC_INLINE PetscErrorCode UnPack_MPI1_11(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1655: {
1656:   PetscInt i,idy;

1659:   switch (addv) {
1660:   case INSERT_VALUES:
1661:   case INSERT_ALL_VALUES:
1662:     for (i=0; i<n; i++) {
1663:       idy       = *indicesy++;
1664:       y[idy]    = x[0];
1665:       y[idy+1]  = x[1];
1666:       y[idy+2]  = x[2];
1667:       y[idy+3]  = x[3];
1668:       y[idy+4]  = x[4];
1669:       y[idy+5]  = x[5];
1670:       y[idy+6]  = x[6];
1671:       y[idy+7]  = x[7];
1672:       y[idy+8]  = x[8];
1673:       y[idy+9]  = x[9];
1674:       y[idy+10] = x[10];
1675:       x        += 11;
1676:     }
1677:     break;
1678:   case ADD_VALUES:
1679:   case ADD_ALL_VALUES:
1680:     for (i=0; i<n; i++) {
1681:       idy        = *indicesy++;
1682:       y[idy]    += x[0];
1683:       y[idy+1]  += x[1];
1684:       y[idy+2]  += x[2];
1685:       y[idy+3]  += x[3];
1686:       y[idy+4]  += x[4];
1687:       y[idy+5]  += x[5];
1688:       y[idy+6]  += x[6];
1689:       y[idy+7]  += x[7];
1690:       y[idy+8]  += x[8];
1691:       y[idy+9]  += x[9];
1692:       y[idy+10] += x[10];
1693:       x         += 11;
1694:     }
1695:     break;
1696: #if !defined(PETSC_USE_COMPLEX)
1697:   case MAX_VALUES:
1698:     for (i=0; i<n; i++) {
1699:       idy       = *indicesy++;
1700:       y[idy]    = PetscMax(y[idy],x[0]);
1701:       y[idy+1]  = PetscMax(y[idy+1],x[1]);
1702:       y[idy+2]  = PetscMax(y[idy+2],x[2]);
1703:       y[idy+3]  = PetscMax(y[idy+3],x[3]);
1704:       y[idy+4]  = PetscMax(y[idy+4],x[4]);
1705:       y[idy+5]  = PetscMax(y[idy+5],x[5]);
1706:       y[idy+6]  = PetscMax(y[idy+6],x[6]);
1707:       y[idy+7]  = PetscMax(y[idy+7],x[7]);
1708:       y[idy+8]  = PetscMax(y[idy+8],x[8]);
1709:       y[idy+9]  = PetscMax(y[idy+9],x[9]);
1710:       y[idy+10] = PetscMax(y[idy+10],x[10]);
1711:       x        += 11;
1712:     }
1713: #else
1714:   case MAX_VALUES:
1715: #endif
1716:   case NOT_SET_VALUES:
1717:     break;
1718:   default:
1719:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1720:   }
1721:   return(0);
1722: }

1724: PETSC_STATIC_INLINE PetscErrorCode Scatter_MPI1_11(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1725: {
1726:   PetscInt i,idx,idy;

1729:   switch (addv) {
1730:   case INSERT_VALUES:
1731:   case INSERT_ALL_VALUES:
1732:     for (i=0; i<n; i++) {
1733:       idx       = *indicesx++;
1734:       idy       = *indicesy++;
1735:       y[idy]    = x[idx];
1736:       y[idy+1]  = x[idx+1];
1737:       y[idy+2]  = x[idx+2];
1738:       y[idy+3]  = x[idx+3];
1739:       y[idy+4]  = x[idx+4];
1740:       y[idy+5]  = x[idx+5];
1741:       y[idy+6]  = x[idx+6];
1742:       y[idy+7]  = x[idx+7];
1743:       y[idy+8]  = x[idx+8];
1744:       y[idy+9]  = x[idx+9];
1745:       y[idy+10] = x[idx+10];
1746:     }
1747:     break;
1748:   case ADD_VALUES:
1749:   case ADD_ALL_VALUES:
1750:     for (i=0; i<n; i++) {
1751:       idx        = *indicesx++;
1752:       idy        = *indicesy++;
1753:       y[idy]    += x[idx];
1754:       y[idy+1]  += x[idx+1];
1755:       y[idy+2]  += x[idx+2];
1756:       y[idy+3]  += x[idx+3];
1757:       y[idy+4]  += x[idx+4];
1758:       y[idy+5]  += x[idx+5];
1759:       y[idy+6]  += x[idx+6];
1760:       y[idy+7]  += x[idx+7];
1761:       y[idy+8]  += x[idx+8];
1762:       y[idy+9]  += x[idx+9];
1763:       y[idy+10] += x[idx+10];
1764:     }
1765:     break;
1766: #if !defined(PETSC_USE_COMPLEX)
1767:   case MAX_VALUES:
1768:     for (i=0; i<n; i++) {
1769:       idx       = *indicesx++;
1770:       idy       = *indicesy++;
1771:       y[idy]    = PetscMax(y[idy],x[idx]);
1772:       y[idy+1]  = PetscMax(y[idy+1],x[idx+1]);
1773:       y[idy+2]  = PetscMax(y[idy+2],x[idx+2]);
1774:       y[idy+3]  = PetscMax(y[idy+3],x[idx+3]);
1775:       y[idy+4]  = PetscMax(y[idy+4],x[idx+4]);
1776:       y[idy+5]  = PetscMax(y[idy+5],x[idx+5]);
1777:       y[idy+6]  = PetscMax(y[idy+6],x[idx+6]);
1778:       y[idy+7]  = PetscMax(y[idy+7],x[idx+7]);
1779:       y[idy+8]  = PetscMax(y[idy+8],x[idx+8]);
1780:       y[idy+9]  = PetscMax(y[idy+9],x[idx+9]);
1781:       y[idy+10] = PetscMax(y[idy+10],x[idx+10]);
1782:     }
1783: #else
1784:   case MAX_VALUES:
1785: #endif
1786:   case NOT_SET_VALUES:
1787:     break;
1788:   default:
1789:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1790:   }
1791:   return(0);
1792: }

1794: /* ----------------------------------------------------------------------------------------------- */
1795: PETSC_STATIC_INLINE void Pack_MPI1_12(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1796: {
1797:   PetscInt i,idx;

1799:   for (i=0; i<n; i++) {
1800:     idx   = *indicesx++;
1801:     y[0]  = x[idx];
1802:     y[1]  = x[idx+1];
1803:     y[2]  = x[idx+2];
1804:     y[3]  = x[idx+3];
1805:     y[4]  = x[idx+4];
1806:     y[5]  = x[idx+5];
1807:     y[6]  = x[idx+6];
1808:     y[7]  = x[idx+7];
1809:     y[8]  = x[idx+8];
1810:     y[9]  = x[idx+9];
1811:     y[10] = x[idx+10];
1812:     y[11] = x[idx+11];
1813:     y    += 12;
1814:   }
1815: }

1817: PETSC_STATIC_INLINE PetscErrorCode UnPack_MPI1_12(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1818: {
1819:   PetscInt i,idy;

1822:   switch (addv) {
1823:   case INSERT_VALUES:
1824:   case INSERT_ALL_VALUES:
1825:     for (i=0; i<n; i++) {
1826:       idy       = *indicesy++;
1827:       y[idy]    = x[0];
1828:       y[idy+1]  = x[1];
1829:       y[idy+2]  = x[2];
1830:       y[idy+3]  = x[3];
1831:       y[idy+4]  = x[4];
1832:       y[idy+5]  = x[5];
1833:       y[idy+6]  = x[6];
1834:       y[idy+7]  = x[7];
1835:       y[idy+8]  = x[8];
1836:       y[idy+9]  = x[9];
1837:       y[idy+10] = x[10];
1838:       y[idy+11] = x[11];
1839:       x        += 12;
1840:     }
1841:     break;
1842:   case ADD_VALUES:
1843:   case ADD_ALL_VALUES:
1844:     for (i=0; i<n; i++) {
1845:       idy        = *indicesy++;
1846:       y[idy]    += x[0];
1847:       y[idy+1]  += x[1];
1848:       y[idy+2]  += x[2];
1849:       y[idy+3]  += x[3];
1850:       y[idy+4]  += x[4];
1851:       y[idy+5]  += x[5];
1852:       y[idy+6]  += x[6];
1853:       y[idy+7]  += x[7];
1854:       y[idy+8]  += x[8];
1855:       y[idy+9]  += x[9];
1856:       y[idy+10] += x[10];
1857:       y[idy+11] += x[11];
1858:       x         += 12;
1859:     }
1860:     break;
1861: #if !defined(PETSC_USE_COMPLEX)
1862:   case MAX_VALUES:
1863:     for (i=0; i<n; i++) {
1864:       idy       = *indicesy++;
1865:       y[idy]    = PetscMax(y[idy],x[0]);
1866:       y[idy+1]  = PetscMax(y[idy+1],x[1]);
1867:       y[idy+2]  = PetscMax(y[idy+2],x[2]);
1868:       y[idy+3]  = PetscMax(y[idy+3],x[3]);
1869:       y[idy+4]  = PetscMax(y[idy+4],x[4]);
1870:       y[idy+5]  = PetscMax(y[idy+5],x[5]);
1871:       y[idy+6]  = PetscMax(y[idy+6],x[6]);
1872:       y[idy+7]  = PetscMax(y[idy+7],x[7]);
1873:       y[idy+8]  = PetscMax(y[idy+8],x[8]);
1874:       y[idy+9]  = PetscMax(y[idy+9],x[9]);
1875:       y[idy+10] = PetscMax(y[idy+10],x[10]);
1876:       y[idy+11] = PetscMax(y[idy+11],x[11]);
1877:       x        += 12;
1878:     }
1879: #else
1880:   case MAX_VALUES:
1881: #endif
1882:   case NOT_SET_VALUES:
1883:     break;
1884:   default:
1885:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1886:   }
1887:   return(0);
1888: }

1890: PETSC_STATIC_INLINE PetscErrorCode Scatter_MPI1_12(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1891: {
1892:   PetscInt i,idx,idy;

1895:   switch (addv) {
1896:   case INSERT_VALUES:
1897:   case INSERT_ALL_VALUES:
1898:     for (i=0; i<n; i++) {
1899:       idx       = *indicesx++;
1900:       idy       = *indicesy++;
1901:       y[idy]    = x[idx];
1902:       y[idy+1]  = x[idx+1];
1903:       y[idy+2]  = x[idx+2];
1904:       y[idy+3]  = x[idx+3];
1905:       y[idy+4]  = x[idx+4];
1906:       y[idy+5]  = x[idx+5];
1907:       y[idy+6]  = x[idx+6];
1908:       y[idy+7]  = x[idx+7];
1909:       y[idy+8]  = x[idx+8];
1910:       y[idy+9]  = x[idx+9];
1911:       y[idy+10] = x[idx+10];
1912:       y[idy+11] = x[idx+11];
1913:     }
1914:     break;
1915:   case ADD_VALUES:
1916:   case ADD_ALL_VALUES:
1917:     for (i=0; i<n; i++) {
1918:       idx        = *indicesx++;
1919:       idy        = *indicesy++;
1920:       y[idy]    += x[idx];
1921:       y[idy+1]  += x[idx+1];
1922:       y[idy+2]  += x[idx+2];
1923:       y[idy+3]  += x[idx+3];
1924:       y[idy+4]  += x[idx+4];
1925:       y[idy+5]  += x[idx+5];
1926:       y[idy+6]  += x[idx+6];
1927:       y[idy+7]  += x[idx+7];
1928:       y[idy+8]  += x[idx+8];
1929:       y[idy+9]  += x[idx+9];
1930:       y[idy+10] += x[idx+10];
1931:       y[idy+11] += x[idx+11];
1932:     }
1933:     break;
1934: #if !defined(PETSC_USE_COMPLEX)
1935:   case MAX_VALUES:
1936:     for (i=0; i<n; i++) {
1937:       idx       = *indicesx++;
1938:       idy       = *indicesy++;
1939:       y[idy]    = PetscMax(y[idy],x[idx]);
1940:       y[idy+1]  = PetscMax(y[idy+1],x[idx+1]);
1941:       y[idy+2]  = PetscMax(y[idy+2],x[idx+2]);
1942:       y[idy+3]  = PetscMax(y[idy+3],x[idx+3]);
1943:       y[idy+4]  = PetscMax(y[idy+4],x[idx+4]);
1944:       y[idy+5]  = PetscMax(y[idy+5],x[idx+5]);
1945:       y[idy+6]  = PetscMax(y[idy+6],x[idx+6]);
1946:       y[idy+7]  = PetscMax(y[idy+7],x[idx+7]);
1947:       y[idy+8]  = PetscMax(y[idy+8],x[idx+8]);
1948:       y[idy+9]  = PetscMax(y[idy+9],x[idx+9]);
1949:       y[idy+10] = PetscMax(y[idy+10],x[idx+10]);
1950:       y[idy+11] = PetscMax(y[idy+11],x[idx+11]);
1951:     }
1952: #else
1953:   case MAX_VALUES:
1954: #endif
1955:   case NOT_SET_VALUES:
1956:     break;
1957:   default:
1958:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1959:   }
1960:   return(0);
1961: }

1963: /* ----------------------------------------------------------------------------------------------- */
1964: PETSC_STATIC_INLINE void Pack_MPI1_bs(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1965: {
1966:   PetscInt       i,idx;

1968:   for (i=0; i<n; i++) {
1969:     idx   = *indicesx++;
1970:     PetscMemcpy(y,x + idx,bs*sizeof(PetscScalar));
1971:     y    += bs;
1972:   }
1973: }

1975: PETSC_STATIC_INLINE PetscErrorCode UnPack_MPI1_bs(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1976: {
1977:   PetscInt i,idy,j;

1980:   switch (addv) {
1981:   case INSERT_VALUES:
1982:   case INSERT_ALL_VALUES:
1983:     for (i=0; i<n; i++) {
1984:       idy       = *indicesy++;
1985:       PetscMemcpy(y + idy,x,bs*sizeof(PetscScalar));
1986:       x        += bs;
1987:     }
1988:     break;
1989:   case ADD_VALUES:
1990:   case ADD_ALL_VALUES:
1991:     for (i=0; i<n; i++) {
1992:       idy        = *indicesy++;
1993:       for (j=0; j<bs; j++) y[idy+j] += x[j];
1994:       x         += bs;
1995:     }
1996:     break;
1997: #if !defined(PETSC_USE_COMPLEX)
1998:   case MAX_VALUES:
1999:     for (i=0; i<n; i++) {
2000:       idy = *indicesy++;
2001:       for (j=0; j<bs; j++) y[idy+j] = PetscMax(y[idy+j],x[j]);
2002:       x  += bs;
2003:     }
2004: #else
2005:   case MAX_VALUES:
2006: #endif
2007:   case NOT_SET_VALUES:
2008:     break;
2009:   default:
2010:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
2011:   }
2012:   return(0);
2013: }

2015: PETSC_STATIC_INLINE PetscErrorCode Scatter_MPI1_bs(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
2016: {
2017:   PetscInt i,idx,idy,j;

2020:   switch (addv) {
2021:   case INSERT_VALUES:
2022:   case INSERT_ALL_VALUES:
2023:     for (i=0; i<n; i++) {
2024:       idx       = *indicesx++;
2025:       idy       = *indicesy++;
2026:       PetscMemcpy(y + idy, x + idx,bs*sizeof(PetscScalar));
2027:     }
2028:     break;
2029:   case ADD_VALUES:
2030:   case ADD_ALL_VALUES:
2031:     for (i=0; i<n; i++) {
2032:       idx        = *indicesx++;
2033:       idy        = *indicesy++;
2034:       for (j=0; j<bs; j++ )  y[idy+j] += x[idx+j];
2035:     }
2036:     break;
2037: #if !defined(PETSC_USE_COMPLEX)
2038:   case MAX_VALUES:
2039:     for (i=0; i<n; i++) {
2040:       idx       = *indicesx++;
2041:       idy       = *indicesy++;
2042:       for (j=0; j<bs; j++ )  y[idy+j] = PetscMax(y[idy+j],x[idx+j]);
2043:     }
2044: #else
2045:   case MAX_VALUES:
2046: #endif
2047:   case NOT_SET_VALUES:
2048:     break;
2049:   default:
2050:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
2051:   }
2052:   return(0);
2053: }

2055: /* Create the VecScatterBegin/End_P for our chosen block sizes */
2056: #define BS 1
2057: #include <../src/vec/vscat/impls/vpscat_mpi1.h>
2058: #define BS 2
2059: #include <../src/vec/vscat/impls/vpscat_mpi1.h>
2060: #define BS 3
2061: #include <../src/vec/vscat/impls/vpscat_mpi1.h>
2062: #define BS 4
2063: #include <../src/vec/vscat/impls/vpscat_mpi1.h>
2064: #define BS 5
2065: #include <../src/vec/vscat/impls/vpscat_mpi1.h>
2066: #define BS 6
2067: #include <../src/vec/vscat/impls/vpscat_mpi1.h>
2068: #define BS 7
2069: #include <../src/vec/vscat/impls/vpscat_mpi1.h>
2070: #define BS 8
2071: #include <../src/vec/vscat/impls/vpscat_mpi1.h>
2072: #define BS 9
2073: #include <../src/vec/vscat/impls/vpscat_mpi1.h>
2074: #define BS 10
2075: #include <../src/vec/vscat/impls/vpscat_mpi1.h>
2076: #define BS 11
2077: #include <../src/vec/vscat/impls/vpscat_mpi1.h>
2078: #define BS 12
2079: #include <../src/vec/vscat/impls/vpscat_mpi1.h>
2080: #define BS bs
2081: #include <../src/vec/vscat/impls/vpscat_mpi1.h>

2083: /* ==========================================================================================*/

2085: /*              create parallel to sequential scatter context                           */

2087: PetscErrorCode VecScatterCreateCommon_PtoS_MPI1(VecScatter_MPI_General*,VecScatter_MPI_General*,VecScatter);

2089: /*
2090:    bs indicates how many elements there are in each block. Normally this would be 1.

2092:    contains check that PetscMPIInt can handle the sizes needed
2093: */
2094: PetscErrorCode VecScatterCreateLocal_PtoS_MPI1(PetscInt nx,const PetscInt *inidx,PetscInt ny,const PetscInt *inidy,Vec xin,Vec yin,PetscInt bs,VecScatter ctx)
2095: {
2096:   VecScatter_MPI_General *from,*to;
2097:   PetscMPIInt            size,rank,imdex,tag,n;
2098:   PetscInt               *source = NULL,*owners = NULL,nxr;
2099:   PetscInt               *lowner = NULL,*start = NULL,lengthy,lengthx;
2100:   PetscMPIInt            *nprocs = NULL,nrecvs;
2101:   PetscInt               i,j,idx,nsends;
2102:   PetscMPIInt            *owner = NULL;
2103:   PetscInt               *starts = NULL,count,slen;
2104:   PetscInt               *rvalues,*svalues,base,*values,nprocslocal,recvtotal,*rsvalues;
2105:   PetscMPIInt            *onodes1,*olengths1;
2106:   MPI_Comm               comm;
2107:   MPI_Request            *send_waits = NULL,*recv_waits = NULL;
2108:   MPI_Status             recv_status,*send_status;
2109:   PetscErrorCode         ierr;

2112:   PetscObjectGetNewTag((PetscObject)ctx,&tag);
2113:   PetscObjectGetComm((PetscObject)xin,&comm);
2114:   MPI_Comm_rank(comm,&rank);
2115:   MPI_Comm_size(comm,&size);
2116:   owners = xin->map->range;
2117:   VecGetSize(yin,&lengthy);
2118:   VecGetSize(xin,&lengthx);

2120:   /*  first count number of contributors to each processor */
2121:   /*  owner[i]: owner of ith inidx; nproc[j]: num of inidx to be sent to jth proc */
2122:   PetscMalloc2(size,&nprocs,nx,&owner);
2123:   PetscMemzero(nprocs,size*sizeof(PetscMPIInt));

2125:   j      = 0;
2126:   nsends = 0;
2127:   for (i=0; i<nx; i++) {
2128:     idx = bs*inidx[i];
2129:     if (idx < owners[j]) j = 0;
2130:     for (; j<size; j++) {
2131:       if (idx < owners[j+1]) {
2132:         if (!nprocs[j]++) nsends++;
2133:         owner[i] = j;
2134:         break;
2135:       }
2136:     }
2137:     if (j == size) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"ith %D block entry %D not owned by any process, upper bound %D",i,idx,owners[size]);
2138:   }

2140:   nprocslocal  = nprocs[rank];
2141:   nprocs[rank] = 0;
2142:   if (nprocslocal) nsends--;
2143:   /* inform other processors of number of messages and max length*/
2144:   PetscGatherNumberOfMessages(comm,NULL,nprocs,&nrecvs);
2145:   PetscGatherMessageLengths(comm,nsends,nrecvs,nprocs,&onodes1,&olengths1);
2146:   PetscSortMPIIntWithArray(nrecvs,onodes1,olengths1);
2147:   recvtotal = 0; for (i=0; i<nrecvs; i++) recvtotal += olengths1[i];

2149:   /* post receives:   */
2150:   PetscMalloc3(recvtotal,&rvalues,nrecvs,&source,nrecvs,&recv_waits);
2151:   count = 0;
2152:   for (i=0; i<nrecvs; i++) {
2153:     MPI_Irecv((rvalues+count),olengths1[i],MPIU_INT,onodes1[i],tag,comm,recv_waits+i);
2154:     count += olengths1[i];
2155:   }

2157:   /* do sends:
2158:      1) starts[i] gives the starting index in svalues for stuff going to
2159:      the ith processor
2160:   */
2161:   nxr = 0;
2162:   for (i=0; i<nx; i++) {
2163:     if (owner[i] != rank) nxr++;
2164:   }
2165:   PetscMalloc3(nxr,&svalues,nsends,&send_waits,size+1,&starts);

2167:   starts[0]  = 0;
2168:   for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[i-1];
2169:   for (i=0; i<nx; i++) {
2170:     if (owner[i] != rank) svalues[starts[owner[i]]++] = bs*inidx[i];
2171:   }
2172:   starts[0] = 0;
2173:   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[i-1];
2174:   count = 0;
2175:   for (i=0; i<size; i++) {
2176:     if (nprocs[i]) {
2177:       MPI_Isend(svalues+starts[i],nprocs[i],MPIU_INT,i,tag,comm,send_waits+count++);
2178:     }
2179:   }

2181:   /*  wait on receives */
2182:   count = nrecvs;
2183:   slen  = 0;
2184:   while (count) {
2185:     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
2186:     /* unpack receives into our local space */
2187:     MPI_Get_count(&recv_status,MPIU_INT,&n);
2188:     slen += n;
2189:     count--;
2190:   }

2192:   if (slen != recvtotal) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Total message lengths %D not expected %D",slen,recvtotal);

2194:   /* allocate entire send scatter context */
2195:   PetscNewLog(ctx,&to);
2196:   to->n = nrecvs;

2198:   PetscMalloc1(nrecvs,&to->requests);
2199:   PetscMalloc4(bs*slen,&to->values,slen,&to->indices,nrecvs+1,&to->starts,nrecvs,&to->procs);
2200:   PetscMalloc2(PetscMax(to->n,nsends),&to->sstatus,PetscMax(to->n,nsends),&to->rstatus);

2202:   ctx->todata   = (void*)to;
2203:   to->starts[0] = 0;

2205:   if (nrecvs) {
2206:     /* move the data into the send scatter */
2207:     base     = owners[rank];
2208:     rsvalues = rvalues;
2209:     for (i=0; i<nrecvs; i++) {
2210:       to->starts[i+1] = to->starts[i] + olengths1[i];
2211:       to->procs[i]    = onodes1[i];
2212:       values = rsvalues;
2213:       rsvalues += olengths1[i];
2214:       for (j=0; j<olengths1[i]; j++) to->indices[to->starts[i] + j] = values[j] - base;
2215:     }
2216:   }
2217:   PetscFree(olengths1);
2218:   PetscFree(onodes1);
2219:   PetscFree3(rvalues,source,recv_waits);

2221:   /* allocate entire receive scatter context */
2222:   PetscNewLog(ctx,&from);
2223:   from->n = nsends;

2225:   PetscMalloc1(nsends,&from->requests);
2226:   PetscMalloc4((ny-nprocslocal)*bs,&from->values,ny-nprocslocal,&from->indices,nsends+1,&from->starts,from->n,&from->procs);
2227:   ctx->fromdata = (void*)from;

2229:   /* move data into receive scatter */
2230:   PetscMalloc2(size,&lowner,nsends+1,&start);
2231:   count = 0; from->starts[0] = start[0] = 0;
2232:   for (i=0; i<size; i++) {
2233:     if (nprocs[i]) {
2234:       lowner[i]            = count;
2235:       from->procs[count++] = i;
2236:       from->starts[count]  = start[count] = start[count-1] + nprocs[i];
2237:     }
2238:   }

2240:   for (i=0; i<nx; i++) {
2241:     if (owner[i] != rank) {
2242:       from->indices[start[lowner[owner[i]]]++] = bs*inidy[i];
2243:       if (bs*inidy[i] >= lengthy) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Scattering past end of TO vector");
2244:     }
2245:   }
2246:   PetscFree2(lowner,start);
2247:   PetscFree2(nprocs,owner);

2249:   /* wait on sends */
2250:   if (nsends) {
2251:     PetscMalloc1(nsends,&send_status);
2252:     MPI_Waitall(nsends,send_waits,send_status);
2253:     PetscFree(send_status);
2254:   }
2255:   PetscFree3(svalues,send_waits,starts);

2257:   if (nprocslocal) {
2258:     PetscInt nt = from->local.n = to->local.n = nprocslocal;
2259:     /* we have a scatter to ourselves */
2260:     PetscMalloc1(nt,&to->local.vslots);
2261:     PetscMalloc1(nt,&from->local.vslots);
2262:     nt   = 0;
2263:     for (i=0; i<nx; i++) {
2264:       idx = bs*inidx[i];
2265:       if (idx >= owners[rank] && idx < owners[rank+1]) {
2266:         to->local.vslots[nt]     = idx - owners[rank];
2267:         from->local.vslots[nt++] = bs*inidy[i];
2268:         if (bs*inidy[i] >= lengthy) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Scattering past end of TO vector");
2269:       }
2270:     }
2271:     PetscLogObjectMemory((PetscObject)ctx,2*nt*sizeof(PetscInt));
2272:   } else {
2273:     from->local.n      = 0;
2274:     from->local.vslots = 0;
2275:     to->local.n        = 0;
2276:     to->local.vslots   = 0;
2277:   }

2279:   from->local.nonmatching_computed = PETSC_FALSE;
2280:   from->local.n_nonmatching        = 0;
2281:   from->local.slots_nonmatching    = 0;
2282:   to->local.nonmatching_computed   = PETSC_FALSE;
2283:   to->local.n_nonmatching          = 0;
2284:   to->local.slots_nonmatching      = 0;

2286:   from->format = VEC_SCATTER_MPI_GENERAL;
2287:   to->format   = VEC_SCATTER_MPI_GENERAL;
2288:   from->bs     = bs;
2289:   to->bs       = bs;

2291:   VecScatterCreateCommon_PtoS_MPI1(from,to,ctx);
2292:   return(0);
2293: }

2295: /*
2296:    bs indicates how many elements there are in each block. Normally this would be 1.
2297: */
2298: PetscErrorCode VecScatterCreateCommon_PtoS_MPI1(VecScatter_MPI_General *from,VecScatter_MPI_General *to,VecScatter ctx)
2299: {
2300:   MPI_Comm       comm;
2301:   PetscMPIInt    tag  = ((PetscObject)ctx)->tag, tagr;
2302:   PetscInt       bs   = to->bs;
2303:   PetscMPIInt    size;
2304:   PetscInt       i, n;

2308:   PetscObjectGetComm((PetscObject)ctx,&comm);
2309:   PetscObjectGetNewTag((PetscObject)ctx,&tagr);
2310:   ctx->ops->destroy = VecScatterDestroy_PtoP_MPI1;

2312:   ctx->reproduce = PETSC_FALSE;
2313:   to->sendfirst  = PETSC_FALSE;
2314:   PetscOptionsGetBool(NULL,NULL,"-vecscatter_reproduce",&ctx->reproduce,NULL);
2315:   PetscOptionsGetBool(NULL,NULL,"-vecscatter_sendfirst",&to->sendfirst,NULL);
2316:   from->sendfirst = to->sendfirst;

2318:   MPI_Comm_size(comm,&size);
2319:   /* check if the receives are ALL going into contiguous locations; if so can skip indexing */
2320:   to->contiq = PETSC_FALSE;
2321:   n = from->starts[from->n];
2322:   from->contiq = PETSC_TRUE;
2323:   for (i=1; i<n; i++) {
2324:     if (from->indices[i] != from->indices[i-1] + bs) {
2325:       from->contiq = PETSC_FALSE;
2326:       break;
2327:     }
2328:   }

2330:   to->use_alltoallv = PETSC_FALSE;
2331:   PetscOptionsGetBool(NULL,NULL,"-vecscatter_alltoall",&to->use_alltoallv,NULL);
2332:   from->use_alltoallv = to->use_alltoallv;
2333:   if (from->use_alltoallv) PetscInfo(ctx,"Using MPI_Alltoallv() for scatter\n");
2334: #if defined(PETSC_HAVE_MPI_ALLTOALLW)  && !defined(PETSC_USE_64BIT_INDICES)
2335:   if (to->use_alltoallv) {
2336:     to->use_alltoallw = PETSC_FALSE;
2337:     PetscOptionsGetBool(NULL,NULL,"-vecscatter_nopack",&to->use_alltoallw,NULL);
2338:   }
2339:   from->use_alltoallw = to->use_alltoallw;
2340:   if (from->use_alltoallw) PetscInfo(ctx,"Using MPI_Alltoallw() for scatter\n");
2341: #endif

2343: #if defined(PETSC_HAVE_MPI_WIN_CREATE_FEATURE)
2344:   to->use_window = PETSC_FALSE;
2345:   PetscOptionsGetBool(NULL,NULL,"-vecscatter_window",&to->use_window,NULL);
2346:   from->use_window = to->use_window;
2347: #endif

2349:   if (to->use_alltoallv) {

2351:     PetscMalloc2(size,&to->counts,size,&to->displs);
2352:     PetscMemzero(to->counts,size*sizeof(PetscMPIInt));
2353:     for (i=0; i<to->n; i++) to->counts[to->procs[i]] = bs*(to->starts[i+1] - to->starts[i]);

2355:     to->displs[0] = 0;
2356:     for (i=1; i<size; i++) to->displs[i] = to->displs[i-1] + to->counts[i-1];

2358:     PetscMalloc2(size,&from->counts,size,&from->displs);
2359:     PetscMemzero(from->counts,size*sizeof(PetscMPIInt));
2360:     for (i=0; i<from->n; i++) from->counts[from->procs[i]] = bs*(from->starts[i+1] - from->starts[i]);
2361:     from->displs[0] = 0;
2362:     for (i=1; i<size; i++) from->displs[i] = from->displs[i-1] + from->counts[i-1];

2364: #if defined(PETSC_HAVE_MPI_ALLTOALLW) && !defined(PETSC_USE_64BIT_INDICES)
2365:     if (to->use_alltoallw) {
2366:       PetscMPIInt mpibs, mpilen;

2368:       ctx->packtogether = PETSC_FALSE;
2369:       PetscMPIIntCast(bs,&mpibs);
2370:       PetscMalloc3(size,&to->wcounts,size,&to->wdispls,size,&to->types);
2371:       PetscMemzero(to->wcounts,size*sizeof(PetscMPIInt));
2372:       PetscMemzero(to->wdispls,size*sizeof(PetscMPIInt));
2373:       for (i=0; i<size; i++) to->types[i] = MPIU_SCALAR;

2375:       for (i=0; i<to->n; i++) {
2376:         to->wcounts[to->procs[i]] = 1;
2377:         PetscMPIIntCast(to->starts[i+1]-to->starts[i],&mpilen);
2378:         MPI_Type_create_indexed_block(mpilen,mpibs,to->indices+to->starts[i],MPIU_SCALAR,to->types+to->procs[i]);
2379:         MPI_Type_commit(to->types+to->procs[i]);
2380:       }
2381:       PetscMalloc3(size,&from->wcounts,size,&from->wdispls,size,&from->types);
2382:       PetscMemzero(from->wcounts,size*sizeof(PetscMPIInt));
2383:       PetscMemzero(from->wdispls,size*sizeof(PetscMPIInt));
2384:       for (i=0; i<size; i++) from->types[i] = MPIU_SCALAR;

2386:       if (from->contiq) {
2387:         PetscInfo(ctx,"Scattered vector entries are stored contiguously, taking advantage of this with -vecscatter_alltoall\n");
2388:         for (i=0; i<from->n; i++) from->wcounts[from->procs[i]] = bs*(from->starts[i+1] - from->starts[i]);

2390:         if (from->n) from->wdispls[from->procs[0]] = sizeof(PetscScalar)*from->indices[0];
2391:         for (i=1; i<from->n; i++) from->wdispls[from->procs[i]] = from->wdispls[from->procs[i-1]] + sizeof(PetscScalar)*from->wcounts[from->procs[i-1]];

2393:       } else {
2394:         for (i=0; i<from->n; i++) {
2395:           from->wcounts[from->procs[i]] = 1;
2396:           PetscMPIIntCast(from->starts[i+1]-from->starts[i],&mpilen);
2397:           MPI_Type_create_indexed_block(mpilen,mpibs,from->indices+from->starts[i],MPIU_SCALAR,from->types+from->procs[i]);
2398:           MPI_Type_commit(from->types+from->procs[i]);
2399:         }
2400:       }
2401:     } else ctx->ops->copy = VecScatterCopy_PtoP_AllToAll_MPI1;

2403: #else
2404:     to->use_alltoallw   = PETSC_FALSE;
2405:     from->use_alltoallw = PETSC_FALSE;
2406:     ctx->ops->copy      = VecScatterCopy_PtoP_AllToAll_MPI1;
2407: #endif
2408: #if defined(PETSC_HAVE_MPI_WIN_CREATE_FEATURE)
2409:   } else if (to->use_window) {
2410:     PetscMPIInt temptag,winsize;
2411:     MPI_Request *request;
2412:     MPI_Status  *status;

2414:     PetscObjectGetNewTag((PetscObject)ctx,&temptag);
2415:     winsize = (to->n ? to->starts[to->n] : 0)*bs*sizeof(PetscScalar);
2416:     MPI_Win_create(to->values ? to->values : MPI_BOTTOM,winsize,sizeof(PetscScalar),MPI_INFO_NULL,comm,&to->window);
2417:     PetscMalloc1(to->n,&to->winstarts);
2418:     PetscMalloc2(to->n,&request,to->n,&status);
2419:     for (i=0; i<to->n; i++) {
2420:       MPI_Irecv(to->winstarts+i,1,MPIU_INT,to->procs[i],temptag,comm,request+i);
2421:     }
2422:     for (i=0; i<from->n; i++) {
2423:       MPI_Send(from->starts+i,1,MPIU_INT,from->procs[i],temptag,comm);
2424:     }
2425:     MPI_Waitall(to->n,request,status);
2426:     PetscFree2(request,status);

2428:     winsize = (from->n ? from->starts[from->n] : 0)*bs*sizeof(PetscScalar);
2429:     MPI_Win_create(from->values ? from->values : MPI_BOTTOM,winsize,sizeof(PetscScalar),MPI_INFO_NULL,comm,&from->window);
2430:     PetscMalloc1(from->n,&from->winstarts);
2431:     PetscMalloc2(from->n,&request,from->n,&status);
2432:     for (i=0; i<from->n; i++) {
2433:       MPI_Irecv(from->winstarts+i,1,MPIU_INT,from->procs[i],temptag,comm,request+i);
2434:     }
2435:     for (i=0; i<to->n; i++) {
2436:       MPI_Send(to->starts+i,1,MPIU_INT,to->procs[i],temptag,comm);
2437:     }
2438:     MPI_Waitall(from->n,request,status);
2439:     PetscFree2(request,status);
2440: #endif
2441:   } else {
2442:     PetscBool   use_rsend = PETSC_FALSE, use_ssend = PETSC_FALSE;
2443:     PetscInt    *sstarts  = to->starts,  *rstarts = from->starts;
2444:     PetscMPIInt *sprocs   = to->procs,   *rprocs  = from->procs;
2445:     MPI_Request *swaits   = to->requests,*rwaits  = from->requests;
2446:     MPI_Request *rev_swaits,*rev_rwaits;
2447:     PetscScalar *Ssvalues = to->values, *Srvalues = from->values;

2449:     /* allocate additional wait variables for the "reverse" scatter */
2450:     PetscMalloc1(to->n,&rev_rwaits);
2451:     PetscMalloc1(from->n,&rev_swaits);
2452:     to->rev_requests   = rev_rwaits;
2453:     from->rev_requests = rev_swaits;

2455:     /* Register the receives that you will use later (sends for scatter reverse) */
2456:     PetscOptionsGetBool(NULL,NULL,"-vecscatter_rsend",&use_rsend,NULL);
2457:     PetscOptionsGetBool(NULL,NULL,"-vecscatter_ssend",&use_ssend,NULL);
2458:     if (use_rsend) {
2459:       PetscInfo(ctx,"Using VecScatter ready receiver mode\n");
2460:       to->use_readyreceiver   = PETSC_TRUE;
2461:       from->use_readyreceiver = PETSC_TRUE;
2462:     } else {
2463:       to->use_readyreceiver   = PETSC_FALSE;
2464:       from->use_readyreceiver = PETSC_FALSE;
2465:     }
2466:     if (use_ssend) {
2467:       PetscInfo(ctx,"Using VecScatter Ssend mode\n");
2468:     }

2470:     for (i=0; i<from->n; i++) {
2471:       if (use_rsend) {
2472:         MPI_Rsend_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tagr,comm,rev_swaits+i);
2473:       } else if (use_ssend) {
2474:         MPI_Ssend_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tagr,comm,rev_swaits+i);
2475:       } else {
2476:         MPI_Send_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tagr,comm,rev_swaits+i);
2477:       }
2478:     }

2480:     for (i=0; i<to->n; i++) {
2481:       if (use_rsend) {
2482:         MPI_Rsend_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
2483:       } else if (use_ssend) {
2484:         MPI_Ssend_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
2485:       } else {
2486:         MPI_Send_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
2487:       }
2488:     }
2489:     /* Register receives for scatter and reverse */
2490:     for (i=0; i<from->n; i++) {
2491:       MPI_Recv_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);
2492:     }
2493:     for (i=0; i<to->n; i++) {
2494:       MPI_Recv_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tagr,comm,rev_rwaits+i);
2495:     }
2496:     if (use_rsend) {
2497:       if (to->n)   {MPI_Startall_irecv(to->starts[to->n]*to->bs,to->n,to->rev_requests);}
2498:       if (from->n) {MPI_Startall_irecv(from->starts[from->n]*from->bs,from->n,from->requests);}
2499:       MPI_Barrier(comm);
2500:     }

2502:     ctx->ops->copy = VecScatterCopy_PtoP_X_MPI1;
2503:   }
2504:   PetscInfo1(ctx,"Using blocksize %D scatter\n",bs);

2506: #if defined(PETSC_USE_DEBUG)
2507:   MPIU_Allreduce(&bs,&i,1,MPIU_INT,MPI_MIN,PetscObjectComm((PetscObject)ctx));
2508:   MPIU_Allreduce(&bs,&n,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)ctx));
2509:   if (bs!=i || bs!=n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Blocks size %D != %D or %D",bs,i,n);
2510: #endif

2512:   switch (bs) {
2513:   case 12:
2514:     ctx->ops->begin = VecScatterBeginMPI1_12;
2515:     ctx->ops->end   = VecScatterEndMPI1_12;
2516:     break;
2517:   case 11:
2518:     ctx->ops->begin = VecScatterBeginMPI1_11;
2519:     ctx->ops->end   = VecScatterEndMPI1_11;
2520:     break;
2521:   case 10:
2522:     ctx->ops->begin = VecScatterBeginMPI1_10;
2523:     ctx->ops->end   = VecScatterEndMPI1_10;
2524:     break;
2525:   case 9:
2526:     ctx->ops->begin = VecScatterBeginMPI1_9;
2527:     ctx->ops->end   = VecScatterEndMPI1_9;
2528:     break;
2529:   case 8:
2530:     ctx->ops->begin = VecScatterBeginMPI1_8;
2531:     ctx->ops->end   = VecScatterEndMPI1_8;
2532:     break;
2533:   case 7:
2534:     ctx->ops->begin = VecScatterBeginMPI1_7;
2535:     ctx->ops->end   = VecScatterEndMPI1_7;
2536:     break;
2537:   case 6:
2538:     ctx->ops->begin = VecScatterBeginMPI1_6;
2539:     ctx->ops->end   = VecScatterEndMPI1_6;
2540:     break;
2541:   case 5:
2542:     ctx->ops->begin = VecScatterBeginMPI1_5;
2543:     ctx->ops->end   = VecScatterEndMPI1_5;
2544:     break;
2545:   case 4:
2546:     ctx->ops->begin = VecScatterBeginMPI1_4;
2547:     ctx->ops->end   = VecScatterEndMPI1_4;
2548:     break;
2549:   case 3:
2550:     ctx->ops->begin = VecScatterBeginMPI1_3;
2551:     ctx->ops->end   = VecScatterEndMPI1_3;
2552:     break;
2553:   case 2:
2554:     ctx->ops->begin = VecScatterBeginMPI1_2;
2555:     ctx->ops->end   = VecScatterEndMPI1_2;
2556:     break;
2557:   case 1:
2558:     ctx->ops->begin = VecScatterBeginMPI1_1;
2559:     ctx->ops->end   = VecScatterEndMPI1_1;
2560:     break;
2561:   default:
2562:     ctx->ops->begin = VecScatterBeginMPI1_bs;
2563:     ctx->ops->end   = VecScatterEndMPI1_bs;

2565:   }
2566:   ctx->ops->view = VecScatterView_MPI_MPI1;
2567:   /* Check if the local scatter is actually a copy; important special case */
2568:   if (to->local.n) {
2569:     VecScatterLocalOptimizeCopy_Private_MPI1(ctx,&to->local,&from->local,bs);
2570:   }
2571:   return(0);
2572: }



2576: /* ------------------------------------------------------------------------------------*/
2577: /*
2578:          Scatter from local Seq vectors to a parallel vector.
2579:          Reverses the order of the arguments, calls VecScatterCreateLocal_PtoS() then
2580:          reverses the result.
2581: */
2582: PetscErrorCode VecScatterCreateLocal_StoP_MPI1(PetscInt nx,const PetscInt *inidx,PetscInt ny,const PetscInt *inidy,Vec xin,Vec yin,PetscInt bs,VecScatter ctx)
2583: {
2584:   PetscErrorCode         ierr;
2585:   MPI_Request            *waits;
2586:   VecScatter_MPI_General *to,*from;

2589:   VecScatterCreateLocal_PtoS_MPI1(ny,inidy,nx,inidx,yin,xin,bs,ctx);
2590:   to            = (VecScatter_MPI_General*)ctx->fromdata;
2591:   from          = (VecScatter_MPI_General*)ctx->todata;
2592:   ctx->todata   = (void*)to;
2593:   ctx->fromdata = (void*)from;
2594:   /* these two are special, they are ALWAYS stored in to struct */
2595:   to->sstatus   = from->sstatus;
2596:   to->rstatus   = from->rstatus;

2598:   from->sstatus = 0;
2599:   from->rstatus = 0;

2601:   waits              = from->rev_requests;
2602:   from->rev_requests = from->requests;
2603:   from->requests     = waits;
2604:   waits              = to->rev_requests;
2605:   to->rev_requests   = to->requests;
2606:   to->requests       = waits;
2607:   return(0);
2608: }

2610: /* ---------------------------------------------------------------------------------*/
2611: PetscErrorCode VecScatterCreateLocal_PtoP_MPI1(PetscInt nx,const PetscInt *inidx,PetscInt ny,const PetscInt *inidy,Vec xin,Vec yin,PetscInt bs,VecScatter ctx)
2612: {
2614:   PetscMPIInt    size,rank,tag,imdex,n;
2615:   PetscInt       *owners = xin->map->range;
2616:   PetscMPIInt    *nprocs = NULL;
2617:   PetscInt       i,j,idx,nsends,*local_inidx = NULL,*local_inidy = NULL;
2618:   PetscMPIInt    *owner   = NULL;
2619:   PetscInt       *starts  = NULL,count,slen;
2620:   PetscInt       *rvalues = NULL,*svalues = NULL,base,*values = NULL,*rsvalues,recvtotal,lastidx;
2621:   PetscMPIInt    *onodes1,*olengths1,nrecvs;
2622:   MPI_Comm       comm;
2623:   MPI_Request    *send_waits = NULL,*recv_waits = NULL;
2624:   MPI_Status     recv_status,*send_status = NULL;
2625:   PetscBool      duplicate = PETSC_FALSE;
2626: #if defined(PETSC_USE_DEBUG)
2627:   PetscBool      found = PETSC_FALSE;
2628: #endif

2631:   PetscObjectGetNewTag((PetscObject)ctx,&tag);
2632:   PetscObjectGetComm((PetscObject)xin,&comm);
2633:   MPI_Comm_size(comm,&size);
2634:   MPI_Comm_rank(comm,&rank);
2635:   if (size == 1) {
2636:     VecScatterCreateLocal_StoP_MPI1(nx,inidx,ny,inidy,xin,yin,bs,ctx);
2637:     return(0);
2638:   }

2640:   /*
2641:      Each processor ships off its inidx[j] and inidy[j] to the appropriate processor
2642:      They then call the StoPScatterCreate()
2643:   */
2644:   /*  first count number of contributors to each processor */
2645:   PetscMalloc3(size,&nprocs,nx,&owner,(size+1),&starts);
2646:   PetscMemzero(nprocs,size*sizeof(PetscMPIInt));

2648:   lastidx = -1;
2649:   j       = 0;
2650:   for (i=0; i<nx; i++) {
2651:     /* if indices are NOT locally sorted, need to start search at the beginning */
2652:     if (lastidx > (idx = bs*inidx[i])) j = 0;
2653:     lastidx = idx;
2654:     for (; j<size; j++) {
2655:       if (idx >= owners[j] && idx < owners[j+1]) {
2656:         nprocs[j]++;
2657:         owner[i] = j;
2658: #if defined(PETSC_USE_DEBUG)
2659:         found = PETSC_TRUE;
2660: #endif
2661:         break;
2662:       }
2663:     }
2664: #if defined(PETSC_USE_DEBUG)
2665:     if (!found) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index %D out of range",idx);
2666:     found = PETSC_FALSE;
2667: #endif
2668:   }
2669:   nsends = 0;
2670:   for (i=0; i<size; i++) nsends += (nprocs[i] > 0);

2672:   /* inform other processors of number of messages and max length*/
2673:   PetscGatherNumberOfMessages(comm,NULL,nprocs,&nrecvs);
2674:   PetscGatherMessageLengths(comm,nsends,nrecvs,nprocs,&onodes1,&olengths1);
2675:   PetscSortMPIIntWithArray(nrecvs,onodes1,olengths1);
2676:   recvtotal = 0; for (i=0; i<nrecvs; i++) recvtotal += olengths1[i];

2678:   /* post receives:   */
2679:   PetscMalloc5(2*recvtotal,&rvalues,2*nx,&svalues,nrecvs,&recv_waits,nsends,&send_waits,nsends,&send_status);

2681:   count = 0;
2682:   for (i=0; i<nrecvs; i++) {
2683:     MPI_Irecv((rvalues+2*count),2*olengths1[i],MPIU_INT,onodes1[i],tag,comm,recv_waits+i);
2684:     count += olengths1[i];
2685:   }
2686:   PetscFree(onodes1);

2688:   /* do sends:
2689:       1) starts[i] gives the starting index in svalues for stuff going to
2690:          the ith processor
2691:   */
2692:   starts[0]= 0;
2693:   for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[i-1];
2694:   for (i=0; i<nx; i++) {
2695:     svalues[2*starts[owner[i]]]       = bs*inidx[i];
2696:     svalues[1 + 2*starts[owner[i]]++] = bs*inidy[i];
2697:   }

2699:   starts[0] = 0;
2700:   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[i-1];
2701:   count = 0;
2702:   for (i=0; i<size; i++) {
2703:     if (nprocs[i]) {
2704:       MPI_Isend(svalues+2*starts[i],2*nprocs[i],MPIU_INT,i,tag,comm,send_waits+count);
2705:       count++;
2706:     }
2707:   }
2708:   PetscFree3(nprocs,owner,starts);

2710:   /*  wait on receives */
2711:   count = nrecvs;
2712:   slen  = 0;
2713:   while (count) {
2714:     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
2715:     /* unpack receives into our local space */
2716:     MPI_Get_count(&recv_status,MPIU_INT,&n);
2717:     slen += n/2;
2718:     count--;
2719:   }
2720:   if (slen != recvtotal) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Total message lengths %D not as expected %D",slen,recvtotal);

2722:   PetscMalloc2(slen,&local_inidx,slen,&local_inidy);
2723:   base     = owners[rank];
2724:   count    = 0;
2725:   rsvalues = rvalues;
2726:   for (i=0; i<nrecvs; i++) {
2727:     values    = rsvalues;
2728:     rsvalues += 2*olengths1[i];
2729:     for (j=0; j<olengths1[i]; j++) {
2730:       local_inidx[count]   = values[2*j] - base;
2731:       local_inidy[count++] = values[2*j+1];
2732:     }
2733:   }
2734:   PetscFree(olengths1);

2736:   /* wait on sends */
2737:   if (nsends) {MPI_Waitall(nsends,send_waits,send_status);}
2738:   PetscFree5(rvalues,svalues,recv_waits,send_waits,send_status);

2740:   /*
2741:      should sort and remove duplicates from local_inidx,local_inidy
2742:   */

2744: #if defined(do_it_slow)
2745:   /* sort on the from index */
2746:   PetscSortIntWithArray(slen,local_inidx,local_inidy);
2747:   start = 0;
2748:   while (start < slen) {
2749:     count = start+1;
2750:     last  = local_inidx[start];
2751:     while (count < slen && last == local_inidx[count]) count++;
2752:     if (count > start + 1) { /* found 2 or more same local_inidx[] in a row */
2753:       /* sort on to index */
2754:       PetscSortInt(count-start,local_inidy+start);
2755:     }
2756:     /* remove duplicates; not most efficient way, but probably good enough */
2757:     i = start;
2758:     while (i < count-1) {
2759:       if (local_inidy[i] != local_inidy[i+1]) i++;
2760:       else { /* found a duplicate */
2761:         duplicate = PETSC_TRUE;
2762:         for (j=i; j<slen-1; j++) {
2763:           local_inidx[j] = local_inidx[j+1];
2764:           local_inidy[j] = local_inidy[j+1];
2765:         }
2766:         slen--;
2767:         count--;
2768:       }
2769:     }
2770:     start = count;
2771:   }
2772: #endif
2773:   if (duplicate) {
2774:     PetscInfo(ctx,"Duplicate from to indices passed in VecScatterCreate(), they are ignored\n");
2775:   }
2776:   VecScatterCreateLocal_StoP_MPI1(slen,local_inidx,slen,local_inidy,xin,yin,bs,ctx);
2777:   PetscFree2(local_inidx,local_inidy);
2778:   return(0);
2779: }

2781: /*@
2782:   PetscSFCreateFromZero - Create a PetscSF that maps a Vec from sequential to distributed

2784:   Input Parameters:
2785: . gv - A distributed Vec

2787:   Output Parameters:
2788: . sf - The SF created mapping a sequential Vec to gv

2790:   Level: developer

2792: .seealso: DMPlexDistributedToSequential()
2793: @*/
2794: PetscErrorCode PetscSFCreateFromZero(MPI_Comm comm, Vec gv, PetscSF *sf)
2795: {
2796:   PetscSFNode   *remotenodes;
2797:   PetscInt      *localnodes;
2798:   PetscInt       N, n, start, numroots, l;
2799:   PetscMPIInt    rank;

2803:   PetscSFCreate(comm, sf);
2804:   VecGetSize(gv, &N);
2805:   VecGetLocalSize(gv, &n);
2806:   VecGetOwnershipRange(gv, &start, NULL);
2807:   MPI_Comm_rank(comm, &rank);
2808:   PetscMalloc1(n, &localnodes);
2809:   PetscMalloc1(n, &remotenodes);
2810:   if (!rank) numroots = N;
2811:   else       numroots = 0;
2812:   for (l = 0; l < n; ++l) {
2813:     localnodes[l]        = l;
2814:     remotenodes[l].rank  = 0;
2815:     remotenodes[l].index = l+start;
2816:   }
2817:   PetscSFSetGraph(*sf, numroots, n, localnodes, PETSC_OWN_POINTER, remotenodes, PETSC_OWN_POINTER);
2818:   return(0);
2819: }