Actual source code: vpscat_mpi1.c

petsc-3.13.6 2020-09-29
Report Typos and Errors

  2: /*
  3:     Defines parallel vector scatters using MPI1.
  4: */

  6:  #include <../src/vec/vec/impls/dvecimpl.h>
  7:  #include <../src/vec/vec/impls/mpi/pvecimpl.h>
  8:  #include <petsc/private/vecscatterimpl.h>


 11: PetscErrorCode VecScatterView_MPI_MPI1(VecScatter ctx,PetscViewer viewer)
 12: {
 13:   VecScatter_MPI_General *to  =(VecScatter_MPI_General*)ctx->todata;
 14:   VecScatter_MPI_General *from=(VecScatter_MPI_General*)ctx->fromdata;
 15:   PetscErrorCode         ierr;
 16:   PetscInt               i;
 17:   PetscMPIInt            rank;
 18:   PetscViewerFormat      format;
 19:   PetscBool              iascii;

 22:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
 23:   if (iascii) {
 24:     MPI_Comm_rank(PetscObjectComm((PetscObject)ctx),&rank);
 25:     PetscViewerGetFormat(viewer,&format);
 26:     if (format ==  PETSC_VIEWER_ASCII_INFO) {
 27:       PetscInt nsend_max,nrecv_max,lensend_max,lenrecv_max,alldata,itmp;

 29:       MPI_Reduce(&to->n,&nsend_max,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)ctx));
 30:       MPI_Reduce(&from->n,&nrecv_max,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)ctx));
 31:       itmp = to->starts[to->n+1];
 32:       MPI_Reduce(&itmp,&lensend_max,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)ctx));
 33:       itmp = from->starts[from->n+1];
 34:       MPI_Reduce(&itmp,&lenrecv_max,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)ctx));
 35:       MPI_Reduce(&itmp,&alldata,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)ctx));

 37:       PetscViewerASCIIPrintf(viewer,"VecScatter statistics\n");
 38:       PetscViewerASCIIPrintf(viewer,"  Maximum number sends %D\n",nsend_max);
 39:       PetscViewerASCIIPrintf(viewer,"  Maximum number receives %D\n",nrecv_max);
 40:       PetscViewerASCIIPrintf(viewer,"  Maximum data sent %D\n",(int)(lensend_max*to->bs*sizeof(PetscScalar)));
 41:       PetscViewerASCIIPrintf(viewer,"  Maximum data received %D\n",(int)(lenrecv_max*to->bs*sizeof(PetscScalar)));
 42:       PetscViewerASCIIPrintf(viewer,"  Total data sent %D\n",(int)(alldata*to->bs*sizeof(PetscScalar)));

 44:     } else {
 45:       PetscViewerASCIIPushSynchronized(viewer);
 46:       PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number sends = %D; Number to self = %D\n",rank,to->n,to->local.n);
 47:       if (to->n) {
 48:         for (i=0; i<to->n; i++) {
 49:           PetscViewerASCIISynchronizedPrintf(viewer,"[%d]   %D length = %D to whom %d\n",rank,i,to->starts[i+1]-to->starts[i],to->procs[i]);
 50:           if (to->memcpy_plan.optimized[i]) { PetscViewerASCIISynchronizedPrintf(viewer,"  is optimized with %D memcpy's in Pack\n",to->memcpy_plan.copy_offsets[i+1]-to->memcpy_plan.copy_offsets[i]); }
 51:         }
 52:         PetscViewerASCIISynchronizedPrintf(viewer,"Now the indices for all remote sends (in order by process sent to)\n");
 53:         for (i=0; i<to->starts[to->n]; i++) {
 54:           PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D \n",rank,to->indices[i]);
 55:         }
 56:       }

 58:       PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number receives = %D; Number from self = %D\n",rank,from->n,from->local.n);
 59:       if (from->n) {
 60:         for (i=0; i<from->n; i++) {
 61:           PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D length %D from whom %d\n",rank,i,from->starts[i+1]-from->starts[i],from->procs[i]);
 62:           if (from->memcpy_plan.optimized[i]) { PetscViewerASCIISynchronizedPrintf(viewer,"  is optimized with %D memcpy's in Unpack\n",to->memcpy_plan.copy_offsets[i+1]-to->memcpy_plan.copy_offsets[i]); }
 63:         }

 65:         PetscViewerASCIISynchronizedPrintf(viewer,"Now the indices for all remote receives (in order by process received from)\n");
 66:         for (i=0; i<from->starts[from->n]; i++) {
 67:           PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D \n",rank,from->indices[i]);
 68:         }
 69:       }
 70:       if (to->local.n) {
 71:         PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Indices for local part of scatter\n",rank);
 72:         if (to->local.memcpy_plan.optimized[0]) {
 73:           PetscViewerASCIIPrintf(viewer,"Local part of the scatter is made of %D copies\n",to->local.memcpy_plan.copy_offsets[1]);
 74:         }
 75:         for (i=0; i<to->local.n; i++) {  /* the to and from have the opposite meaning from what you would expect */
 76:           PetscViewerASCIISynchronizedPrintf(viewer,"[%d] From %D to %D \n",rank,to->local.vslots[i],from->local.vslots[i]);
 77:         }
 78:       }

 80:       PetscViewerFlush(viewer);
 81:       PetscViewerASCIIPopSynchronized(viewer);
 82:     }
 83:   }
 84:   return(0);
 85: }

 87: /* -------------------------------------------------------------------------------------*/
 88: PetscErrorCode VecScatterDestroy_PtoP_MPI1(VecScatter ctx)
 89: {
 90:   VecScatter_MPI_General *to   = (VecScatter_MPI_General*)ctx->todata;
 91:   VecScatter_MPI_General *from = (VecScatter_MPI_General*)ctx->fromdata;
 92:   PetscErrorCode         ierr;
 93:   PetscInt               i;

 96:   /* release MPI resources obtained with MPI_Send_init() and MPI_Recv_init() */
 97:   if (to->requests) {
 98:     for (i=0; i<to->n; i++) {
 99:       MPI_Request_free(to->requests + i);
100:     }
101:   }
102:   if (to->rev_requests) {
103:     for (i=0; i<to->n; i++) {
104:       MPI_Request_free(to->rev_requests + i);
105:     }
106:   }
107:   if (from->requests) {
108:     for (i=0; i<from->n; i++) {
109:       MPI_Request_free(from->requests + i);
110:     }
111:   }

113:   if (from->rev_requests) {
114:     for (i=0; i<from->n; i++) {
115:       MPI_Request_free(from->rev_requests + i);
116:     }
117:   }

119:   PetscFree(to->local.vslots);
120:   PetscFree(from->local.vslots);
121:   PetscFree(to->local.slots_nonmatching);
122:   PetscFree(from->local.slots_nonmatching);
123:   PetscFree(to->rev_requests);
124:   PetscFree(from->rev_requests);
125:   PetscFree(to->requests);
126:   PetscFree(from->requests);
127:   PetscFree4(to->values,to->indices,to->starts,to->procs);
128:   PetscFree2(to->sstatus,to->rstatus);
129:   PetscFree4(from->values,from->indices,from->starts,from->procs);
130:   VecScatterMemcpyPlanDestroy_PtoP(to,from);
131:   PetscFree(from);
132:   PetscFree(to);
133:   return(0);
134: }

136: /* --------------------------------------------------------------------------------------*/

138: PetscErrorCode VecScatterCopy_PtoP_X_MPI1(VecScatter in,VecScatter out)
139: {
140:   VecScatter_MPI_General *in_to   = (VecScatter_MPI_General*)in->todata;
141:   VecScatter_MPI_General *in_from = (VecScatter_MPI_General*)in->fromdata,*out_to,*out_from;
142:   PetscErrorCode         ierr;
143:   PetscInt               ny,bs = in_from->bs;

146:   out->ops->begin   = in->ops->begin;
147:   out->ops->end     = in->ops->end;
148:   out->ops->copy    = in->ops->copy;
149:   out->ops->destroy = in->ops->destroy;
150:   out->ops->view    = in->ops->view;

152:   /* allocate entire send scatter context */
153:   PetscNewLog(out,&out_to);
154:   PetscNewLog(out,&out_from);

156:   ny                = in_to->starts[in_to->n];
157:   out_to->n         = in_to->n;
158:   out_to->format    = in_to->format;

160:   PetscMalloc1(out_to->n,&out_to->requests);
161:   PetscMalloc4(bs*ny,&out_to->values,ny,&out_to->indices,out_to->n+1,&out_to->starts,out_to->n,&out_to->procs);
162:   PetscMalloc2(PetscMax(in_to->n,in_from->n),&out_to->sstatus,PetscMax(in_to->n,in_from->n),&out_to->rstatus);
163:   PetscArraycpy(out_to->indices,in_to->indices,ny);
164:   PetscArraycpy(out_to->starts,in_to->starts,out_to->n+1);
165:   PetscArraycpy(out_to->procs,in_to->procs,out_to->n);

167:   out->todata                        = (void*)out_to;
168:   out_to->local.n                    = in_to->local.n;
169:   out_to->local.nonmatching_computed = PETSC_FALSE;
170:   out_to->local.n_nonmatching        = 0;
171:   out_to->local.slots_nonmatching    = 0;
172:   if (in_to->local.n) {
173:     PetscMalloc1(in_to->local.n,&out_to->local.vslots);
174:     PetscMalloc1(in_from->local.n,&out_from->local.vslots);
175:     PetscArraycpy(out_to->local.vslots,in_to->local.vslots,in_to->local.n);
176:     PetscArraycpy(out_from->local.vslots,in_from->local.vslots,in_from->local.n);
177:   } else {
178:     out_to->local.vslots   = 0;
179:     out_from->local.vslots = 0;
180:   }

182:   /* allocate entire receive context */
183:   out_from->format    = in_from->format;
184:   ny                  = in_from->starts[in_from->n];
185:   out_from->n         = in_from->n;

187:   PetscMalloc1(out_from->n,&out_from->requests);
188:   PetscMalloc4(ny*bs,&out_from->values,ny,&out_from->indices,out_from->n+1,&out_from->starts,out_from->n,&out_from->procs);
189:   PetscArraycpy(out_from->indices,in_from->indices,ny);
190:   PetscArraycpy(out_from->starts,in_from->starts,out_from->n+1);
191:   PetscArraycpy(out_from->procs,in_from->procs,out_from->n);

193:   out->fromdata                        = (void*)out_from;
194:   out_from->local.n                    = in_from->local.n;
195:   out_from->local.nonmatching_computed = PETSC_FALSE;
196:   out_from->local.n_nonmatching        = 0;
197:   out_from->local.slots_nonmatching    = 0;

199:   /*
200:       set up the request arrays for use with isend_init() and irecv_init()
201:   */
202:   {
203:     PetscMPIInt tag;
204:     MPI_Comm    comm;
205:     PetscInt    *sstarts = out_to->starts,  *rstarts = out_from->starts;
206:     PetscMPIInt *sprocs  = out_to->procs,   *rprocs  = out_from->procs;
207:     PetscInt    i;
208:     MPI_Request *swaits   = out_to->requests,*rwaits  = out_from->requests;
209:     MPI_Request *rev_swaits,*rev_rwaits;
210:     PetscScalar *Ssvalues = out_to->values, *Srvalues = out_from->values;

212:     PetscMalloc1(in_to->n,&out_to->rev_requests);
213:     PetscMalloc1(in_from->n,&out_from->rev_requests);

215:     rev_rwaits = out_to->rev_requests;
216:     rev_swaits = out_from->rev_requests;

218:     out_from->bs = out_to->bs = bs;
219:     tag          = ((PetscObject)out)->tag;
220:     PetscObjectGetComm((PetscObject)out,&comm);

222:     /* Register the receives that you will use later (sends for scatter reverse) */
223:     for (i=0; i<out_from->n; i++) {
224:       MPI_Recv_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);
225:       MPI_Send_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tag,comm,rev_swaits+i);
226:     }
227:     for (i=0; i<out_to->n; i++) {
228:       MPI_Send_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
229:       /* Register receives for scatter reverse */
230:       MPI_Recv_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,rev_rwaits+i);
231:     }
232:   }

234:   VecScatterMemcpyPlanCopy_PtoP(in_to,in_from,out_to,out_from);
235:   return(0);
236: }

238: PetscErrorCode VecScatterCopy_PtoP_AllToAll_MPI1(VecScatter in,VecScatter out)
239: {
240:   VecScatter_MPI_General *in_to   = (VecScatter_MPI_General*)in->todata;
241:   VecScatter_MPI_General *in_from = (VecScatter_MPI_General*)in->fromdata,*out_to,*out_from;
242:   PetscErrorCode         ierr;
243:   PetscInt               ny,bs = in_from->bs;
244:   PetscMPIInt            size;

247:   MPI_Comm_size(PetscObjectComm((PetscObject)in),&size);

249:   out->ops->begin     = in->ops->begin;
250:   out->ops->end       = in->ops->end;
251:   out->ops->copy      = in->ops->copy;
252:   out->ops->destroy   = in->ops->destroy;
253:   out->ops->view      = in->ops->view;

255:   /* allocate entire send scatter context */
256:   PetscNewLog(out,&out_to);
257:   PetscNewLog(out,&out_from);

259:   ny                = in_to->starts[in_to->n];
260:   out_to->n         = in_to->n;
261:   out_to->format    = in_to->format;

263:   PetscMalloc1(out_to->n,&out_to->requests);
264:   PetscMalloc4(bs*ny,&out_to->values,ny,&out_to->indices,out_to->n+1,&out_to->starts,out_to->n,&out_to->procs);
265:   PetscMalloc2(PetscMax(in_to->n,in_from->n),&out_to->sstatus,PetscMax(in_to->n,in_from->n),&out_to->rstatus);
266:   PetscArraycpy(out_to->indices,in_to->indices,ny);
267:   PetscArraycpy(out_to->starts,in_to->starts,out_to->n+1);
268:   PetscArraycpy(out_to->procs,in_to->procs,out_to->n);

270:   out->todata                        = (void*)out_to;
271:   out_to->local.n                    = in_to->local.n;
272:   out_to->local.nonmatching_computed = PETSC_FALSE;
273:   out_to->local.n_nonmatching        = 0;
274:   out_to->local.slots_nonmatching    = 0;
275:   if (in_to->local.n) {
276:     PetscMalloc1(in_to->local.n,&out_to->local.vslots);
277:     PetscMalloc1(in_from->local.n,&out_from->local.vslots);
278:     PetscArraycpy(out_to->local.vslots,in_to->local.vslots,in_to->local.n);
279:     PetscArraycpy(out_from->local.vslots,in_from->local.vslots,in_from->local.n);
280:   } else {
281:     out_to->local.vslots   = 0;
282:     out_from->local.vslots = 0;
283:   }

285:   /* allocate entire receive context */
286:   out_from->format    = in_from->format;
287:   ny                  = in_from->starts[in_from->n];
288:   out_from->n         = in_from->n;

290:   PetscMalloc1(out_from->n,&out_from->requests);
291:   PetscMalloc4(ny*bs,&out_from->values,ny,&out_from->indices,out_from->n+1,&out_from->starts,out_from->n,&out_from->procs);
292:   PetscArraycpy(out_from->indices,in_from->indices,ny);
293:   PetscArraycpy(out_from->starts,in_from->starts,out_from->n+1);
294:   PetscArraycpy(out_from->procs,in_from->procs,out_from->n);

296:   out->fromdata                        = (void*)out_from;
297:   out_from->local.n                    = in_from->local.n;
298:   out_from->local.nonmatching_computed = PETSC_FALSE;
299:   out_from->local.n_nonmatching        = 0;
300:   out_from->local.slots_nonmatching    = 0;

302:   VecScatterMemcpyPlanCopy_PtoP(in_to,in_from,out_to,out_from);
303:   return(0);
304: }

306: /* Optimize a parallel vector to parallel vector vecscatter with memory copies */
307: PetscErrorCode VecScatterMemcpyPlanCreate_PtoP(VecScatter_MPI_General *to,VecScatter_MPI_General *from)
308: {

312:   VecScatterMemcpyPlanCreate_Index(to->n,to->starts,to->indices,to->bs,&to->memcpy_plan);
313:   VecScatterMemcpyPlanCreate_Index(from->n,from->starts,from->indices,to->bs,&from->memcpy_plan);
314:   VecScatterMemcpyPlanCreate_SGToSG(to->bs,&to->local,&from->local);
315:   return(0);
316: }

318: PetscErrorCode VecScatterMemcpyPlanCopy_PtoP(const VecScatter_MPI_General *in_to,const VecScatter_MPI_General *in_from,VecScatter_MPI_General *out_to,VecScatter_MPI_General *out_from)
319: {

323:   VecScatterMemcpyPlanCopy(&in_to->memcpy_plan,&out_to->memcpy_plan);
324:   VecScatterMemcpyPlanCopy(&in_from->memcpy_plan,&out_from->memcpy_plan);
325:   VecScatterMemcpyPlanCopy(&in_to->local.memcpy_plan,&out_to->local.memcpy_plan);
326:   VecScatterMemcpyPlanCopy(&in_from->local.memcpy_plan,&out_from->local.memcpy_plan);
327:   return(0);
328: }

330: PetscErrorCode VecScatterMemcpyPlanDestroy_PtoP(VecScatter_MPI_General *to,VecScatter_MPI_General *from)
331: {

335:   VecScatterMemcpyPlanDestroy(&to->memcpy_plan);
336:   VecScatterMemcpyPlanDestroy(&from->memcpy_plan);
337:   VecScatterMemcpyPlanDestroy(&to->local.memcpy_plan);
338:   VecScatterMemcpyPlanDestroy(&from->local.memcpy_plan);
339:   return(0);
340: }

342: /* --------------------------------------------------------------------------------------------------
343:     Packs and unpacks the message data into send or from receive buffers.

345:     These could be generated automatically.

347:     Fortran kernels etc. could be used.
348: */
349: PETSC_STATIC_INLINE void Pack_MPI1_1(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
350: {
351:   PetscInt i;
352:   for (i=0; i<n; i++) y[i] = x[indicesx[i]];
353: }

355: PETSC_STATIC_INLINE PetscErrorCode UnPack_MPI1_1(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
356: {
357:   PetscInt i;

360:   switch (addv) {
361:   case INSERT_VALUES:
362:   case INSERT_ALL_VALUES:
363:     for (i=0; i<n; i++) y[indicesy[i]] = x[i];
364:     break;
365:   case ADD_VALUES:
366:   case ADD_ALL_VALUES:
367:     for (i=0; i<n; i++) y[indicesy[i]] += x[i];
368:     break;
369: #if !defined(PETSC_USE_COMPLEX)
370:   case MAX_VALUES:
371:     for (i=0; i<n; i++) y[indicesy[i]] = PetscMax(y[indicesy[i]],x[i]);
372: #else
373:   case MAX_VALUES:
374: #endif
375:   case NOT_SET_VALUES:
376:     break;
377:   default:
378:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
379:   }
380:   return(0);
381: }

383: PETSC_STATIC_INLINE PetscErrorCode Scatter_MPI1_1(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
384: {
385:   PetscInt i;

388:   switch (addv) {
389:   case INSERT_VALUES:
390:   case INSERT_ALL_VALUES:
391:     for (i=0; i<n; i++) y[indicesy[i]] = x[indicesx[i]];
392:     break;
393:   case ADD_VALUES:
394:   case ADD_ALL_VALUES:
395:     for (i=0; i<n; i++) y[indicesy[i]] += x[indicesx[i]];
396:     break;
397: #if !defined(PETSC_USE_COMPLEX)
398:   case MAX_VALUES:
399:     for (i=0; i<n; i++) y[indicesy[i]] = PetscMax(y[indicesy[i]],x[indicesx[i]]);
400: #else
401:   case MAX_VALUES:
402: #endif
403:   case NOT_SET_VALUES:
404:     break;
405:   default:
406:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
407:   }
408:   return(0);
409: }

411: /* ----------------------------------------------------------------------------------------------- */
412: PETSC_STATIC_INLINE void Pack_MPI1_2(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
413: {
414:   PetscInt i,idx;

416:   for (i=0; i<n; i++) {
417:     idx  = *indicesx++;
418:     y[0] = x[idx];
419:     y[1] = x[idx+1];
420:     y   += 2;
421:   }
422: }

424: PETSC_STATIC_INLINE PetscErrorCode UnPack_MPI1_2(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
425: {
426:   PetscInt i,idy;

429:   switch (addv) {
430:   case INSERT_VALUES:
431:   case INSERT_ALL_VALUES:
432:     for (i=0; i<n; i++) {
433:       idy      = *indicesy++;
434:       y[idy]   = x[0];
435:       y[idy+1] = x[1];
436:       x       += 2;
437:     }
438:     break;
439:   case ADD_VALUES:
440:   case ADD_ALL_VALUES:
441:     for (i=0; i<n; i++) {
442:       idy       = *indicesy++;
443:       y[idy]   += x[0];
444:       y[idy+1] += x[1];
445:       x        += 2;
446:     }
447:     break;
448: #if !defined(PETSC_USE_COMPLEX)
449:   case MAX_VALUES:
450:     for (i=0; i<n; i++) {
451:       idy      = *indicesy++;
452:       y[idy]   = PetscMax(y[idy],x[0]);
453:       y[idy+1] = PetscMax(y[idy+1],x[1]);
454:       x       += 2;
455:     }
456: #else
457:   case MAX_VALUES:
458: #endif
459:   case NOT_SET_VALUES:
460:     break;
461:   default:
462:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
463:   }
464:   return(0);
465: }

467: PETSC_STATIC_INLINE PetscErrorCode Scatter_MPI1_2(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
468: {
469:   PetscInt i,idx,idy;

472:   switch (addv) {
473:   case INSERT_VALUES:
474:   case INSERT_ALL_VALUES:
475:     for (i=0; i<n; i++) {
476:       idx      = *indicesx++;
477:       idy      = *indicesy++;
478:       y[idy]   = x[idx];
479:       y[idy+1] = x[idx+1];
480:     }
481:     break;
482:   case ADD_VALUES:
483:   case ADD_ALL_VALUES:
484:     for (i=0; i<n; i++) {
485:       idx       = *indicesx++;
486:       idy       = *indicesy++;
487:       y[idy]   += x[idx];
488:       y[idy+1] += x[idx+1];
489:     }
490:     break;
491: #if !defined(PETSC_USE_COMPLEX)
492:   case MAX_VALUES:
493:     for (i=0; i<n; i++) {
494:       idx      = *indicesx++;
495:       idy      = *indicesy++;
496:       y[idy]   = PetscMax(y[idy],x[idx]);
497:       y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
498:     }
499: #else
500:   case MAX_VALUES:
501: #endif
502:   case NOT_SET_VALUES:
503:     break;
504:   default:
505:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
506:   }
507:   return(0);
508: }
509: /* ----------------------------------------------------------------------------------------------- */
510: PETSC_STATIC_INLINE void Pack_MPI1_3(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
511: {
512:   PetscInt i,idx;

514:   for (i=0; i<n; i++) {
515:     idx  = *indicesx++;
516:     y[0] = x[idx];
517:     y[1] = x[idx+1];
518:     y[2] = x[idx+2];
519:     y   += 3;
520:   }
521: }
522: PETSC_STATIC_INLINE PetscErrorCode UnPack_MPI1_3(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
523: {
524:   PetscInt i,idy;

527:   switch (addv) {
528:   case INSERT_VALUES:
529:   case INSERT_ALL_VALUES:
530:     for (i=0; i<n; i++) {
531:       idy      = *indicesy++;
532:       y[idy]   = x[0];
533:       y[idy+1] = x[1];
534:       y[idy+2] = x[2];
535:       x       += 3;
536:     }
537:     break;
538:   case ADD_VALUES:
539:   case ADD_ALL_VALUES:
540:     for (i=0; i<n; i++) {
541:       idy       = *indicesy++;
542:       y[idy]   += x[0];
543:       y[idy+1] += x[1];
544:       y[idy+2] += x[2];
545:       x        += 3;
546:     }
547:     break;
548: #if !defined(PETSC_USE_COMPLEX)
549:   case MAX_VALUES:
550:     for (i=0; i<n; i++) {
551:       idy      = *indicesy++;
552:       y[idy]   = PetscMax(y[idy],x[0]);
553:       y[idy+1] = PetscMax(y[idy+1],x[1]);
554:       y[idy+2] = PetscMax(y[idy+2],x[2]);
555:       x       += 3;
556:     }
557: #else
558:   case MAX_VALUES:
559: #endif
560:   case NOT_SET_VALUES:
561:     break;
562:   default:
563:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
564:   }
565:   return(0);
566: }

568: PETSC_STATIC_INLINE PetscErrorCode Scatter_MPI1_3(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
569: {
570:   PetscInt i,idx,idy;

573:   switch (addv) {
574:   case INSERT_VALUES:
575:   case INSERT_ALL_VALUES:
576:     for (i=0; i<n; i++) {
577:       idx      = *indicesx++;
578:       idy      = *indicesy++;
579:       y[idy]   = x[idx];
580:       y[idy+1] = x[idx+1];
581:       y[idy+2] = x[idx+2];
582:     }
583:     break;
584:   case ADD_VALUES:
585:   case ADD_ALL_VALUES:
586:     for (i=0; i<n; i++) {
587:       idx       = *indicesx++;
588:       idy       = *indicesy++;
589:       y[idy]   += x[idx];
590:       y[idy+1] += x[idx+1];
591:       y[idy+2] += x[idx+2];
592:     }
593:     break;
594: #if !defined(PETSC_USE_COMPLEX)
595:   case MAX_VALUES:
596:     for (i=0; i<n; i++) {
597:       idx      = *indicesx++;
598:       idy      = *indicesy++;
599:       y[idy]   = PetscMax(y[idy],x[idx]);
600:       y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
601:       y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
602:     }
603: #else
604:   case MAX_VALUES:
605: #endif
606:   case NOT_SET_VALUES:
607:     break;
608:   default:
609:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
610:   }
611:   return(0);
612: }
613: /* ----------------------------------------------------------------------------------------------- */
614: PETSC_STATIC_INLINE void Pack_MPI1_4(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
615: {
616:   PetscInt i,idx;

618:   for (i=0; i<n; i++) {
619:     idx  = *indicesx++;
620:     y[0] = x[idx];
621:     y[1] = x[idx+1];
622:     y[2] = x[idx+2];
623:     y[3] = x[idx+3];
624:     y   += 4;
625:   }
626: }
627: PETSC_STATIC_INLINE PetscErrorCode UnPack_MPI1_4(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
628: {
629:   PetscInt i,idy;

632:   switch (addv) {
633:   case INSERT_VALUES:
634:   case INSERT_ALL_VALUES:
635:     for (i=0; i<n; i++) {
636:       idy      = *indicesy++;
637:       y[idy]   = x[0];
638:       y[idy+1] = x[1];
639:       y[idy+2] = x[2];
640:       y[idy+3] = x[3];
641:       x       += 4;
642:     }
643:     break;
644:   case ADD_VALUES:
645:   case ADD_ALL_VALUES:
646:     for (i=0; i<n; i++) {
647:       idy       = *indicesy++;
648:       y[idy]   += x[0];
649:       y[idy+1] += x[1];
650:       y[idy+2] += x[2];
651:       y[idy+3] += x[3];
652:       x        += 4;
653:     }
654:     break;
655: #if !defined(PETSC_USE_COMPLEX)
656:   case MAX_VALUES:
657:     for (i=0; i<n; i++) {
658:       idy      = *indicesy++;
659:       y[idy]   = PetscMax(y[idy],x[0]);
660:       y[idy+1] = PetscMax(y[idy+1],x[1]);
661:       y[idy+2] = PetscMax(y[idy+2],x[2]);
662:       y[idy+3] = PetscMax(y[idy+3],x[3]);
663:       x       += 4;
664:     }
665: #else
666:   case MAX_VALUES:
667: #endif
668:   case NOT_SET_VALUES:
669:     break;
670:   default:
671:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
672:   }
673:   return(0);
674: }

676: PETSC_STATIC_INLINE PetscErrorCode Scatter_MPI1_4(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
677: {
678:   PetscInt i,idx,idy;

681:   switch (addv) {
682:   case INSERT_VALUES:
683:   case INSERT_ALL_VALUES:
684:     for (i=0; i<n; i++) {
685:       idx      = *indicesx++;
686:       idy      = *indicesy++;
687:       y[idy]   = x[idx];
688:       y[idy+1] = x[idx+1];
689:       y[idy+2] = x[idx+2];
690:       y[idy+3] = x[idx+3];
691:     }
692:     break;
693:   case ADD_VALUES:
694:   case ADD_ALL_VALUES:
695:     for (i=0; i<n; i++) {
696:       idx       = *indicesx++;
697:       idy       = *indicesy++;
698:       y[idy]   += x[idx];
699:       y[idy+1] += x[idx+1];
700:       y[idy+2] += x[idx+2];
701:       y[idy+3] += x[idx+3];
702:     }
703:     break;
704: #if !defined(PETSC_USE_COMPLEX)
705:   case MAX_VALUES:
706:     for (i=0; i<n; i++) {
707:       idx      = *indicesx++;
708:       idy      = *indicesy++;
709:       y[idy]   = PetscMax(y[idy],x[idx]);
710:       y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
711:       y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
712:       y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
713:     }
714: #else
715:   case MAX_VALUES:
716: #endif
717:   case NOT_SET_VALUES:
718:     break;
719:   default:
720:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
721:   }
722:   return(0);
723: }
724: /* ----------------------------------------------------------------------------------------------- */
725: PETSC_STATIC_INLINE void Pack_MPI1_5(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
726: {
727:   PetscInt i,idx;

729:   for (i=0; i<n; i++) {
730:     idx  = *indicesx++;
731:     y[0] = x[idx];
732:     y[1] = x[idx+1];
733:     y[2] = x[idx+2];
734:     y[3] = x[idx+3];
735:     y[4] = x[idx+4];
736:     y   += 5;
737:   }
738: }

740: PETSC_STATIC_INLINE PetscErrorCode UnPack_MPI1_5(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
741: {
742:   PetscInt i,idy;

745:   switch (addv) {
746:   case INSERT_VALUES:
747:   case INSERT_ALL_VALUES:
748:     for (i=0; i<n; i++) {
749:       idy      = *indicesy++;
750:       y[idy]   = x[0];
751:       y[idy+1] = x[1];
752:       y[idy+2] = x[2];
753:       y[idy+3] = x[3];
754:       y[idy+4] = x[4];
755:       x       += 5;
756:     }
757:     break;
758:   case ADD_VALUES:
759:   case ADD_ALL_VALUES:
760:     for (i=0; i<n; i++) {
761:       idy       = *indicesy++;
762:       y[idy]   += x[0];
763:       y[idy+1] += x[1];
764:       y[idy+2] += x[2];
765:       y[idy+3] += x[3];
766:       y[idy+4] += x[4];
767:       x        += 5;
768:     }
769:     break;
770: #if !defined(PETSC_USE_COMPLEX)
771:   case MAX_VALUES:
772:     for (i=0; i<n; i++) {
773:       idy      = *indicesy++;
774:       y[idy]   = PetscMax(y[idy],x[0]);
775:       y[idy+1] = PetscMax(y[idy+1],x[1]);
776:       y[idy+2] = PetscMax(y[idy+2],x[2]);
777:       y[idy+3] = PetscMax(y[idy+3],x[3]);
778:       y[idy+4] = PetscMax(y[idy+4],x[4]);
779:       x       += 5;
780:     }
781: #else
782:   case MAX_VALUES:
783: #endif
784:   case NOT_SET_VALUES:
785:     break;
786:   default:
787:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
788:   }
789:   return(0);
790: }

792: PETSC_STATIC_INLINE PetscErrorCode Scatter_MPI1_5(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
793: {
794:   PetscInt i,idx,idy;

797:   switch (addv) {
798:   case INSERT_VALUES:
799:   case INSERT_ALL_VALUES:
800:     for (i=0; i<n; i++) {
801:       idx      = *indicesx++;
802:       idy      = *indicesy++;
803:       y[idy]   = x[idx];
804:       y[idy+1] = x[idx+1];
805:       y[idy+2] = x[idx+2];
806:       y[idy+3] = x[idx+3];
807:       y[idy+4] = x[idx+4];
808:     }
809:     break;
810:   case ADD_VALUES:
811:   case ADD_ALL_VALUES:
812:     for (i=0; i<n; i++) {
813:       idx       = *indicesx++;
814:       idy       = *indicesy++;
815:       y[idy]   += x[idx];
816:       y[idy+1] += x[idx+1];
817:       y[idy+2] += x[idx+2];
818:       y[idy+3] += x[idx+3];
819:       y[idy+4] += x[idx+4];
820:     }
821:     break;
822: #if !defined(PETSC_USE_COMPLEX)
823:   case MAX_VALUES:
824:     for (i=0; i<n; i++) {
825:       idx      = *indicesx++;
826:       idy      = *indicesy++;
827:       y[idy]   = PetscMax(y[idy],x[idx]);
828:       y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
829:       y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
830:       y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
831:       y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
832:     }
833: #else
834:   case MAX_VALUES:
835: #endif
836:   case NOT_SET_VALUES:
837:     break;
838:   default:
839:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
840:   }
841:   return(0);
842: }
843: /* ----------------------------------------------------------------------------------------------- */
844: PETSC_STATIC_INLINE void Pack_MPI1_6(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
845: {
846:   PetscInt i,idx;

848:   for (i=0; i<n; i++) {
849:     idx  = *indicesx++;
850:     y[0] = x[idx];
851:     y[1] = x[idx+1];
852:     y[2] = x[idx+2];
853:     y[3] = x[idx+3];
854:     y[4] = x[idx+4];
855:     y[5] = x[idx+5];
856:     y   += 6;
857:   }
858: }

860: PETSC_STATIC_INLINE PetscErrorCode UnPack_MPI1_6(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
861: {
862:   PetscInt i,idy;

865:   switch (addv) {
866:   case INSERT_VALUES:
867:   case INSERT_ALL_VALUES:
868:     for (i=0; i<n; i++) {
869:       idy      = *indicesy++;
870:       y[idy]   = x[0];
871:       y[idy+1] = x[1];
872:       y[idy+2] = x[2];
873:       y[idy+3] = x[3];
874:       y[idy+4] = x[4];
875:       y[idy+5] = x[5];
876:       x       += 6;
877:     }
878:     break;
879:   case ADD_VALUES:
880:   case ADD_ALL_VALUES:
881:     for (i=0; i<n; i++) {
882:       idy       = *indicesy++;
883:       y[idy]   += x[0];
884:       y[idy+1] += x[1];
885:       y[idy+2] += x[2];
886:       y[idy+3] += x[3];
887:       y[idy+4] += x[4];
888:       y[idy+5] += x[5];
889:       x        += 6;
890:     }
891:     break;
892: #if !defined(PETSC_USE_COMPLEX)
893:   case MAX_VALUES:
894:     for (i=0; i<n; i++) {
895:       idy      = *indicesy++;
896:       y[idy]   = PetscMax(y[idy],x[0]);
897:       y[idy+1] = PetscMax(y[idy+1],x[1]);
898:       y[idy+2] = PetscMax(y[idy+2],x[2]);
899:       y[idy+3] = PetscMax(y[idy+3],x[3]);
900:       y[idy+4] = PetscMax(y[idy+4],x[4]);
901:       y[idy+5] = PetscMax(y[idy+5],x[5]);
902:       x       += 6;
903:     }
904: #else
905:   case MAX_VALUES:
906: #endif
907:   case NOT_SET_VALUES:
908:     break;
909:   default:
910:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
911:   }
912:   return(0);
913: }

915: PETSC_STATIC_INLINE PetscErrorCode Scatter_MPI1_6(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
916: {
917:   PetscInt i,idx,idy;

920:   switch (addv) {
921:   case INSERT_VALUES:
922:   case INSERT_ALL_VALUES:
923:     for (i=0; i<n; i++) {
924:       idx      = *indicesx++;
925:       idy      = *indicesy++;
926:       y[idy]   = x[idx];
927:       y[idy+1] = x[idx+1];
928:       y[idy+2] = x[idx+2];
929:       y[idy+3] = x[idx+3];
930:       y[idy+4] = x[idx+4];
931:       y[idy+5] = x[idx+5];
932:     }
933:     break;
934:   case ADD_VALUES:
935:   case ADD_ALL_VALUES:
936:     for (i=0; i<n; i++) {
937:       idx       = *indicesx++;
938:       idy       = *indicesy++;
939:       y[idy]   += x[idx];
940:       y[idy+1] += x[idx+1];
941:       y[idy+2] += x[idx+2];
942:       y[idy+3] += x[idx+3];
943:       y[idy+4] += x[idx+4];
944:       y[idy+5] += x[idx+5];
945:     }
946:     break;
947: #if !defined(PETSC_USE_COMPLEX)
948:   case MAX_VALUES:
949:     for (i=0; i<n; i++) {
950:       idx      = *indicesx++;
951:       idy      = *indicesy++;
952:       y[idy]   = PetscMax(y[idy],x[idx]);
953:       y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
954:       y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
955:       y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
956:       y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
957:       y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
958:     }
959: #else
960:   case MAX_VALUES:
961: #endif
962:   case NOT_SET_VALUES:
963:     break;
964:   default:
965:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
966:   }
967:   return(0);
968: }
969: /* ----------------------------------------------------------------------------------------------- */
970: PETSC_STATIC_INLINE void Pack_MPI1_7(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
971: {
972:   PetscInt i,idx;

974:   for (i=0; i<n; i++) {
975:     idx  = *indicesx++;
976:     y[0] = x[idx];
977:     y[1] = x[idx+1];
978:     y[2] = x[idx+2];
979:     y[3] = x[idx+3];
980:     y[4] = x[idx+4];
981:     y[5] = x[idx+5];
982:     y[6] = x[idx+6];
983:     y   += 7;
984:   }
985: }

987: PETSC_STATIC_INLINE PetscErrorCode UnPack_MPI1_7(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
988: {
989:   PetscInt i,idy;

992:   switch (addv) {
993:   case INSERT_VALUES:
994:   case INSERT_ALL_VALUES:
995:     for (i=0; i<n; i++) {
996:       idy      = *indicesy++;
997:       y[idy]   = x[0];
998:       y[idy+1] = x[1];
999:       y[idy+2] = x[2];
1000:       y[idy+3] = x[3];
1001:       y[idy+4] = x[4];
1002:       y[idy+5] = x[5];
1003:       y[idy+6] = x[6];
1004:       x       += 7;
1005:     }
1006:     break;
1007:   case ADD_VALUES:
1008:   case ADD_ALL_VALUES:
1009:     for (i=0; i<n; i++) {
1010:       idy       = *indicesy++;
1011:       y[idy]   += x[0];
1012:       y[idy+1] += x[1];
1013:       y[idy+2] += x[2];
1014:       y[idy+3] += x[3];
1015:       y[idy+4] += x[4];
1016:       y[idy+5] += x[5];
1017:       y[idy+6] += x[6];
1018:       x        += 7;
1019:     }
1020:     break;
1021: #if !defined(PETSC_USE_COMPLEX)
1022:   case MAX_VALUES:
1023:     for (i=0; i<n; i++) {
1024:       idy      = *indicesy++;
1025:       y[idy]   = PetscMax(y[idy],x[0]);
1026:       y[idy+1] = PetscMax(y[idy+1],x[1]);
1027:       y[idy+2] = PetscMax(y[idy+2],x[2]);
1028:       y[idy+3] = PetscMax(y[idy+3],x[3]);
1029:       y[idy+4] = PetscMax(y[idy+4],x[4]);
1030:       y[idy+5] = PetscMax(y[idy+5],x[5]);
1031:       y[idy+6] = PetscMax(y[idy+6],x[6]);
1032:       x       += 7;
1033:     }
1034: #else
1035:   case MAX_VALUES:
1036: #endif
1037:   case NOT_SET_VALUES:
1038:     break;
1039:   default:
1040:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1041:   }
1042:   return(0);
1043: }

1045: PETSC_STATIC_INLINE PetscErrorCode Scatter_MPI1_7(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1046: {
1047:   PetscInt i,idx,idy;

1050:   switch (addv) {
1051:   case INSERT_VALUES:
1052:   case INSERT_ALL_VALUES:
1053:     for (i=0; i<n; i++) {
1054:       idx      = *indicesx++;
1055:       idy      = *indicesy++;
1056:       y[idy]   = x[idx];
1057:       y[idy+1] = x[idx+1];
1058:       y[idy+2] = x[idx+2];
1059:       y[idy+3] = x[idx+3];
1060:       y[idy+4] = x[idx+4];
1061:       y[idy+5] = x[idx+5];
1062:       y[idy+6] = x[idx+6];
1063:     }
1064:     break;
1065:   case ADD_VALUES:
1066:   case ADD_ALL_VALUES:
1067:     for (i=0; i<n; i++) {
1068:       idx       = *indicesx++;
1069:       idy       = *indicesy++;
1070:       y[idy]   += x[idx];
1071:       y[idy+1] += x[idx+1];
1072:       y[idy+2] += x[idx+2];
1073:       y[idy+3] += x[idx+3];
1074:       y[idy+4] += x[idx+4];
1075:       y[idy+5] += x[idx+5];
1076:       y[idy+6] += x[idx+6];
1077:     }
1078:     break;
1079: #if !defined(PETSC_USE_COMPLEX)
1080:   case MAX_VALUES:
1081:     for (i=0; i<n; i++) {
1082:       idx      = *indicesx++;
1083:       idy      = *indicesy++;
1084:       y[idy]   = PetscMax(y[idy],x[idx]);
1085:       y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
1086:       y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
1087:       y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
1088:       y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1089:       y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
1090:       y[idy+6] = PetscMax(y[idy+6],x[idx+6]);
1091:     }
1092: #else
1093:   case MAX_VALUES:
1094: #endif
1095:   case NOT_SET_VALUES:
1096:     break;
1097:   default:
1098:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1099:   }
1100:   return(0);
1101: }
1102: /* ----------------------------------------------------------------------------------------------- */
1103: PETSC_STATIC_INLINE void Pack_MPI1_8(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1104: {
1105:   PetscInt i,idx;

1107:   for (i=0; i<n; i++) {
1108:     idx  = *indicesx++;
1109:     y[0] = x[idx];
1110:     y[1] = x[idx+1];
1111:     y[2] = x[idx+2];
1112:     y[3] = x[idx+3];
1113:     y[4] = x[idx+4];
1114:     y[5] = x[idx+5];
1115:     y[6] = x[idx+6];
1116:     y[7] = x[idx+7];
1117:     y   += 8;
1118:   }
1119: }

1121: PETSC_STATIC_INLINE PetscErrorCode UnPack_MPI1_8(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1122: {
1123:   PetscInt i,idy;

1126:   switch (addv) {
1127:   case INSERT_VALUES:
1128:   case INSERT_ALL_VALUES:
1129:     for (i=0; i<n; i++) {
1130:       idy      = *indicesy++;
1131:       y[idy]   = x[0];
1132:       y[idy+1] = x[1];
1133:       y[idy+2] = x[2];
1134:       y[idy+3] = x[3];
1135:       y[idy+4] = x[4];
1136:       y[idy+5] = x[5];
1137:       y[idy+6] = x[6];
1138:       y[idy+7] = x[7];
1139:       x       += 8;
1140:     }
1141:     break;
1142:   case ADD_VALUES:
1143:   case ADD_ALL_VALUES:
1144:     for (i=0; i<n; i++) {
1145:       idy       = *indicesy++;
1146:       y[idy]   += x[0];
1147:       y[idy+1] += x[1];
1148:       y[idy+2] += x[2];
1149:       y[idy+3] += x[3];
1150:       y[idy+4] += x[4];
1151:       y[idy+5] += x[5];
1152:       y[idy+6] += x[6];
1153:       y[idy+7] += x[7];
1154:       x        += 8;
1155:     }
1156:     break;
1157: #if !defined(PETSC_USE_COMPLEX)
1158:   case MAX_VALUES:
1159:     for (i=0; i<n; i++) {
1160:       idy      = *indicesy++;
1161:       y[idy]   = PetscMax(y[idy],x[0]);
1162:       y[idy+1] = PetscMax(y[idy+1],x[1]);
1163:       y[idy+2] = PetscMax(y[idy+2],x[2]);
1164:       y[idy+3] = PetscMax(y[idy+3],x[3]);
1165:       y[idy+4] = PetscMax(y[idy+4],x[4]);
1166:       y[idy+5] = PetscMax(y[idy+5],x[5]);
1167:       y[idy+6] = PetscMax(y[idy+6],x[6]);
1168:       y[idy+7] = PetscMax(y[idy+7],x[7]);
1169:       x       += 8;
1170:     }
1171: #else
1172:   case MAX_VALUES:
1173: #endif
1174:   case NOT_SET_VALUES:
1175:     break;
1176:   default:
1177:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1178:   }
1179:   return(0);
1180: }

1182: PETSC_STATIC_INLINE PetscErrorCode Scatter_MPI1_8(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1183: {
1184:   PetscInt i,idx,idy;

1187:   switch (addv) {
1188:   case INSERT_VALUES:
1189:   case INSERT_ALL_VALUES:
1190:     for (i=0; i<n; i++) {
1191:       idx      = *indicesx++;
1192:       idy      = *indicesy++;
1193:       y[idy]   = x[idx];
1194:       y[idy+1] = x[idx+1];
1195:       y[idy+2] = x[idx+2];
1196:       y[idy+3] = x[idx+3];
1197:       y[idy+4] = x[idx+4];
1198:       y[idy+5] = x[idx+5];
1199:       y[idy+6] = x[idx+6];
1200:       y[idy+7] = x[idx+7];
1201:     }
1202:     break;
1203:   case ADD_VALUES:
1204:   case ADD_ALL_VALUES:
1205:     for (i=0; i<n; i++) {
1206:       idx       = *indicesx++;
1207:       idy       = *indicesy++;
1208:       y[idy]   += x[idx];
1209:       y[idy+1] += x[idx+1];
1210:       y[idy+2] += x[idx+2];
1211:       y[idy+3] += x[idx+3];
1212:       y[idy+4] += x[idx+4];
1213:       y[idy+5] += x[idx+5];
1214:       y[idy+6] += x[idx+6];
1215:       y[idy+7] += x[idx+7];
1216:     }
1217:     break;
1218: #if !defined(PETSC_USE_COMPLEX)
1219:   case MAX_VALUES:
1220:     for (i=0; i<n; i++) {
1221:       idx      = *indicesx++;
1222:       idy      = *indicesy++;
1223:       y[idy]   = PetscMax(y[idy],x[idx]);
1224:       y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
1225:       y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
1226:       y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
1227:       y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1228:       y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
1229:       y[idy+6] = PetscMax(y[idy+6],x[idx+6]);
1230:       y[idy+7] = PetscMax(y[idy+7],x[idx+7]);
1231:     }
1232: #else
1233:   case MAX_VALUES:
1234: #endif
1235:   case NOT_SET_VALUES:
1236:     break;
1237:   default:
1238:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1239:   }
1240:   return(0);
1241: }

1243: PETSC_STATIC_INLINE void Pack_MPI1_9(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1244: {
1245:   PetscInt i,idx;

1247:   for (i=0; i<n; i++) {
1248:     idx   = *indicesx++;
1249:     y[0]  = x[idx];
1250:     y[1]  = x[idx+1];
1251:     y[2]  = x[idx+2];
1252:     y[3]  = x[idx+3];
1253:     y[4]  = x[idx+4];
1254:     y[5]  = x[idx+5];
1255:     y[6]  = x[idx+6];
1256:     y[7]  = x[idx+7];
1257:     y[8]  = x[idx+8];
1258:     y    += 9;
1259:   }
1260: }

1262: PETSC_STATIC_INLINE PetscErrorCode UnPack_MPI1_9(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1263: {
1264:   PetscInt i,idy;

1267:   switch (addv) {
1268:   case INSERT_VALUES:
1269:   case INSERT_ALL_VALUES:
1270:     for (i=0; i<n; i++) {
1271:       idy       = *indicesy++;
1272:       y[idy]    = x[0];
1273:       y[idy+1]  = x[1];
1274:       y[idy+2]  = x[2];
1275:       y[idy+3]  = x[3];
1276:       y[idy+4]  = x[4];
1277:       y[idy+5]  = x[5];
1278:       y[idy+6]  = x[6];
1279:       y[idy+7]  = x[7];
1280:       y[idy+8]  = x[8];
1281:       x        += 9;
1282:     }
1283:     break;
1284:   case ADD_VALUES:
1285:   case ADD_ALL_VALUES:
1286:     for (i=0; i<n; i++) {
1287:       idy        = *indicesy++;
1288:       y[idy]    += x[0];
1289:       y[idy+1]  += x[1];
1290:       y[idy+2]  += x[2];
1291:       y[idy+3]  += x[3];
1292:       y[idy+4]  += x[4];
1293:       y[idy+5]  += x[5];
1294:       y[idy+6]  += x[6];
1295:       y[idy+7]  += x[7];
1296:       y[idy+8]  += x[8];
1297:       x         += 9;
1298:     }
1299:     break;
1300: #if !defined(PETSC_USE_COMPLEX)
1301:   case MAX_VALUES:
1302:     for (i=0; i<n; i++) {
1303:       idy       = *indicesy++;
1304:       y[idy]    = PetscMax(y[idy],x[0]);
1305:       y[idy+1]  = PetscMax(y[idy+1],x[1]);
1306:       y[idy+2]  = PetscMax(y[idy+2],x[2]);
1307:       y[idy+3]  = PetscMax(y[idy+3],x[3]);
1308:       y[idy+4]  = PetscMax(y[idy+4],x[4]);
1309:       y[idy+5]  = PetscMax(y[idy+5],x[5]);
1310:       y[idy+6]  = PetscMax(y[idy+6],x[6]);
1311:       y[idy+7]  = PetscMax(y[idy+7],x[7]);
1312:       y[idy+8]  = PetscMax(y[idy+8],x[8]);
1313:       x        += 9;
1314:     }
1315: #else
1316:   case MAX_VALUES:
1317: #endif
1318:   case NOT_SET_VALUES:
1319:     break;
1320:   default:
1321:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1322:   }
1323:   return(0);
1324: }

1326: PETSC_STATIC_INLINE PetscErrorCode Scatter_MPI1_9(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1327: {
1328:   PetscInt i,idx,idy;

1331:   switch (addv) {
1332:   case INSERT_VALUES:
1333:   case INSERT_ALL_VALUES:
1334:     for (i=0; i<n; i++) {
1335:       idx       = *indicesx++;
1336:       idy       = *indicesy++;
1337:       y[idy]    = x[idx];
1338:       y[idy+1]  = x[idx+1];
1339:       y[idy+2]  = x[idx+2];
1340:       y[idy+3]  = x[idx+3];
1341:       y[idy+4]  = x[idx+4];
1342:       y[idy+5]  = x[idx+5];
1343:       y[idy+6]  = x[idx+6];
1344:       y[idy+7]  = x[idx+7];
1345:       y[idy+8]  = x[idx+8];
1346:     }
1347:     break;
1348:   case ADD_VALUES:
1349:   case ADD_ALL_VALUES:
1350:     for (i=0; i<n; i++) {
1351:       idx        = *indicesx++;
1352:       idy        = *indicesy++;
1353:       y[idy]    += x[idx];
1354:       y[idy+1]  += x[idx+1];
1355:       y[idy+2]  += x[idx+2];
1356:       y[idy+3]  += x[idx+3];
1357:       y[idy+4]  += x[idx+4];
1358:       y[idy+5]  += x[idx+5];
1359:       y[idy+6]  += x[idx+6];
1360:       y[idy+7]  += x[idx+7];
1361:       y[idy+8]  += x[idx+8];
1362:     }
1363:     break;
1364: #if !defined(PETSC_USE_COMPLEX)
1365:   case MAX_VALUES:
1366:     for (i=0; i<n; i++) {
1367:       idx       = *indicesx++;
1368:       idy       = *indicesy++;
1369:       y[idy]    = PetscMax(y[idy],x[idx]);
1370:       y[idy+1]  = PetscMax(y[idy+1],x[idx+1]);
1371:       y[idy+2]  = PetscMax(y[idy+2],x[idx+2]);
1372:       y[idy+3]  = PetscMax(y[idy+3],x[idx+3]);
1373:       y[idy+4]  = PetscMax(y[idy+4],x[idx+4]);
1374:       y[idy+5]  = PetscMax(y[idy+5],x[idx+5]);
1375:       y[idy+6]  = PetscMax(y[idy+6],x[idx+6]);
1376:       y[idy+7]  = PetscMax(y[idy+7],x[idx+7]);
1377:       y[idy+8]  = PetscMax(y[idy+8],x[idx+8]);
1378:     }
1379: #else
1380:   case MAX_VALUES:
1381: #endif
1382:   case NOT_SET_VALUES:
1383:     break;
1384:   default:
1385:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1386:   }
1387:   return(0);
1388: }

1390: PETSC_STATIC_INLINE void Pack_MPI1_10(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1391: {
1392:   PetscInt i,idx;

1394:   for (i=0; i<n; i++) {
1395:     idx   = *indicesx++;
1396:     y[0]  = x[idx];
1397:     y[1]  = x[idx+1];
1398:     y[2]  = x[idx+2];
1399:     y[3]  = x[idx+3];
1400:     y[4]  = x[idx+4];
1401:     y[5]  = x[idx+5];
1402:     y[6]  = x[idx+6];
1403:     y[7]  = x[idx+7];
1404:     y[8]  = x[idx+8];
1405:     y[9]  = x[idx+9];
1406:     y    += 10;
1407:   }
1408: }

1410: PETSC_STATIC_INLINE PetscErrorCode UnPack_MPI1_10(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1411: {
1412:   PetscInt i,idy;

1415:   switch (addv) {
1416:   case INSERT_VALUES:
1417:   case INSERT_ALL_VALUES:
1418:     for (i=0; i<n; i++) {
1419:       idy       = *indicesy++;
1420:       y[idy]    = x[0];
1421:       y[idy+1]  = x[1];
1422:       y[idy+2]  = x[2];
1423:       y[idy+3]  = x[3];
1424:       y[idy+4]  = x[4];
1425:       y[idy+5]  = x[5];
1426:       y[idy+6]  = x[6];
1427:       y[idy+7]  = x[7];
1428:       y[idy+8]  = x[8];
1429:       y[idy+9]  = x[9];
1430:       x        += 10;
1431:     }
1432:     break;
1433:   case ADD_VALUES:
1434:   case ADD_ALL_VALUES:
1435:     for (i=0; i<n; i++) {
1436:       idy        = *indicesy++;
1437:       y[idy]    += x[0];
1438:       y[idy+1]  += x[1];
1439:       y[idy+2]  += x[2];
1440:       y[idy+3]  += x[3];
1441:       y[idy+4]  += x[4];
1442:       y[idy+5]  += x[5];
1443:       y[idy+6]  += x[6];
1444:       y[idy+7]  += x[7];
1445:       y[idy+8]  += x[8];
1446:       y[idy+9]  += x[9];
1447:       x         += 10;
1448:     }
1449:     break;
1450: #if !defined(PETSC_USE_COMPLEX)
1451:   case MAX_VALUES:
1452:     for (i=0; i<n; i++) {
1453:       idy       = *indicesy++;
1454:       y[idy]    = PetscMax(y[idy],x[0]);
1455:       y[idy+1]  = PetscMax(y[idy+1],x[1]);
1456:       y[idy+2]  = PetscMax(y[idy+2],x[2]);
1457:       y[idy+3]  = PetscMax(y[idy+3],x[3]);
1458:       y[idy+4]  = PetscMax(y[idy+4],x[4]);
1459:       y[idy+5]  = PetscMax(y[idy+5],x[5]);
1460:       y[idy+6]  = PetscMax(y[idy+6],x[6]);
1461:       y[idy+7]  = PetscMax(y[idy+7],x[7]);
1462:       y[idy+8]  = PetscMax(y[idy+8],x[8]);
1463:       y[idy+9]  = PetscMax(y[idy+9],x[9]);
1464:       x        += 10;
1465:     }
1466: #else
1467:   case MAX_VALUES:
1468: #endif
1469:   case NOT_SET_VALUES:
1470:     break;
1471:   default:
1472:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1473:   }
1474:   return(0);
1475: }

1477: PETSC_STATIC_INLINE PetscErrorCode Scatter_MPI1_10(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1478: {
1479:   PetscInt i,idx,idy;

1482:   switch (addv) {
1483:   case INSERT_VALUES:
1484:   case INSERT_ALL_VALUES:
1485:     for (i=0; i<n; i++) {
1486:       idx       = *indicesx++;
1487:       idy       = *indicesy++;
1488:       y[idy]    = x[idx];
1489:       y[idy+1]  = x[idx+1];
1490:       y[idy+2]  = x[idx+2];
1491:       y[idy+3]  = x[idx+3];
1492:       y[idy+4]  = x[idx+4];
1493:       y[idy+5]  = x[idx+5];
1494:       y[idy+6]  = x[idx+6];
1495:       y[idy+7]  = x[idx+7];
1496:       y[idy+8]  = x[idx+8];
1497:       y[idy+9]  = x[idx+9];
1498:     }
1499:     break;
1500:   case ADD_VALUES:
1501:   case ADD_ALL_VALUES:
1502:     for (i=0; i<n; i++) {
1503:       idx        = *indicesx++;
1504:       idy        = *indicesy++;
1505:       y[idy]    += x[idx];
1506:       y[idy+1]  += x[idx+1];
1507:       y[idy+2]  += x[idx+2];
1508:       y[idy+3]  += x[idx+3];
1509:       y[idy+4]  += x[idx+4];
1510:       y[idy+5]  += x[idx+5];
1511:       y[idy+6]  += x[idx+6];
1512:       y[idy+7]  += x[idx+7];
1513:       y[idy+8]  += x[idx+8];
1514:       y[idy+9]  += x[idx+9];
1515:     }
1516:     break;
1517: #if !defined(PETSC_USE_COMPLEX)
1518:   case MAX_VALUES:
1519:     for (i=0; i<n; i++) {
1520:       idx       = *indicesx++;
1521:       idy       = *indicesy++;
1522:       y[idy]    = PetscMax(y[idy],x[idx]);
1523:       y[idy+1]  = PetscMax(y[idy+1],x[idx+1]);
1524:       y[idy+2]  = PetscMax(y[idy+2],x[idx+2]);
1525:       y[idy+3]  = PetscMax(y[idy+3],x[idx+3]);
1526:       y[idy+4]  = PetscMax(y[idy+4],x[idx+4]);
1527:       y[idy+5]  = PetscMax(y[idy+5],x[idx+5]);
1528:       y[idy+6]  = PetscMax(y[idy+6],x[idx+6]);
1529:       y[idy+7]  = PetscMax(y[idy+7],x[idx+7]);
1530:       y[idy+8]  = PetscMax(y[idy+8],x[idx+8]);
1531:       y[idy+9]  = PetscMax(y[idy+9],x[idx+9]);
1532:     }
1533: #else
1534:   case MAX_VALUES:
1535: #endif
1536:   case NOT_SET_VALUES:
1537:     break;
1538:   default:
1539:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1540:   }
1541:   return(0);
1542: }

1544: PETSC_STATIC_INLINE void Pack_MPI1_11(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1545: {
1546:   PetscInt i,idx;

1548:   for (i=0; i<n; i++) {
1549:     idx   = *indicesx++;
1550:     y[0]  = x[idx];
1551:     y[1]  = x[idx+1];
1552:     y[2]  = x[idx+2];
1553:     y[3]  = x[idx+3];
1554:     y[4]  = x[idx+4];
1555:     y[5]  = x[idx+5];
1556:     y[6]  = x[idx+6];
1557:     y[7]  = x[idx+7];
1558:     y[8]  = x[idx+8];
1559:     y[9]  = x[idx+9];
1560:     y[10] = x[idx+10];
1561:     y    += 11;
1562:   }
1563: }

1565: PETSC_STATIC_INLINE PetscErrorCode UnPack_MPI1_11(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1566: {
1567:   PetscInt i,idy;

1570:   switch (addv) {
1571:   case INSERT_VALUES:
1572:   case INSERT_ALL_VALUES:
1573:     for (i=0; i<n; i++) {
1574:       idy       = *indicesy++;
1575:       y[idy]    = x[0];
1576:       y[idy+1]  = x[1];
1577:       y[idy+2]  = x[2];
1578:       y[idy+3]  = x[3];
1579:       y[idy+4]  = x[4];
1580:       y[idy+5]  = x[5];
1581:       y[idy+6]  = x[6];
1582:       y[idy+7]  = x[7];
1583:       y[idy+8]  = x[8];
1584:       y[idy+9]  = x[9];
1585:       y[idy+10] = x[10];
1586:       x        += 11;
1587:     }
1588:     break;
1589:   case ADD_VALUES:
1590:   case ADD_ALL_VALUES:
1591:     for (i=0; i<n; i++) {
1592:       idy        = *indicesy++;
1593:       y[idy]    += x[0];
1594:       y[idy+1]  += x[1];
1595:       y[idy+2]  += x[2];
1596:       y[idy+3]  += x[3];
1597:       y[idy+4]  += x[4];
1598:       y[idy+5]  += x[5];
1599:       y[idy+6]  += x[6];
1600:       y[idy+7]  += x[7];
1601:       y[idy+8]  += x[8];
1602:       y[idy+9]  += x[9];
1603:       y[idy+10] += x[10];
1604:       x         += 11;
1605:     }
1606:     break;
1607: #if !defined(PETSC_USE_COMPLEX)
1608:   case MAX_VALUES:
1609:     for (i=0; i<n; i++) {
1610:       idy       = *indicesy++;
1611:       y[idy]    = PetscMax(y[idy],x[0]);
1612:       y[idy+1]  = PetscMax(y[idy+1],x[1]);
1613:       y[idy+2]  = PetscMax(y[idy+2],x[2]);
1614:       y[idy+3]  = PetscMax(y[idy+3],x[3]);
1615:       y[idy+4]  = PetscMax(y[idy+4],x[4]);
1616:       y[idy+5]  = PetscMax(y[idy+5],x[5]);
1617:       y[idy+6]  = PetscMax(y[idy+6],x[6]);
1618:       y[idy+7]  = PetscMax(y[idy+7],x[7]);
1619:       y[idy+8]  = PetscMax(y[idy+8],x[8]);
1620:       y[idy+9]  = PetscMax(y[idy+9],x[9]);
1621:       y[idy+10] = PetscMax(y[idy+10],x[10]);
1622:       x        += 11;
1623:     }
1624: #else
1625:   case MAX_VALUES:
1626: #endif
1627:   case NOT_SET_VALUES:
1628:     break;
1629:   default:
1630:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1631:   }
1632:   return(0);
1633: }

1635: PETSC_STATIC_INLINE PetscErrorCode Scatter_MPI1_11(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1636: {
1637:   PetscInt i,idx,idy;

1640:   switch (addv) {
1641:   case INSERT_VALUES:
1642:   case INSERT_ALL_VALUES:
1643:     for (i=0; i<n; i++) {
1644:       idx       = *indicesx++;
1645:       idy       = *indicesy++;
1646:       y[idy]    = x[idx];
1647:       y[idy+1]  = x[idx+1];
1648:       y[idy+2]  = x[idx+2];
1649:       y[idy+3]  = x[idx+3];
1650:       y[idy+4]  = x[idx+4];
1651:       y[idy+5]  = x[idx+5];
1652:       y[idy+6]  = x[idx+6];
1653:       y[idy+7]  = x[idx+7];
1654:       y[idy+8]  = x[idx+8];
1655:       y[idy+9]  = x[idx+9];
1656:       y[idy+10] = x[idx+10];
1657:     }
1658:     break;
1659:   case ADD_VALUES:
1660:   case ADD_ALL_VALUES:
1661:     for (i=0; i<n; i++) {
1662:       idx        = *indicesx++;
1663:       idy        = *indicesy++;
1664:       y[idy]    += x[idx];
1665:       y[idy+1]  += x[idx+1];
1666:       y[idy+2]  += x[idx+2];
1667:       y[idy+3]  += x[idx+3];
1668:       y[idy+4]  += x[idx+4];
1669:       y[idy+5]  += x[idx+5];
1670:       y[idy+6]  += x[idx+6];
1671:       y[idy+7]  += x[idx+7];
1672:       y[idy+8]  += x[idx+8];
1673:       y[idy+9]  += x[idx+9];
1674:       y[idy+10] += x[idx+10];
1675:     }
1676:     break;
1677: #if !defined(PETSC_USE_COMPLEX)
1678:   case MAX_VALUES:
1679:     for (i=0; i<n; i++) {
1680:       idx       = *indicesx++;
1681:       idy       = *indicesy++;
1682:       y[idy]    = PetscMax(y[idy],x[idx]);
1683:       y[idy+1]  = PetscMax(y[idy+1],x[idx+1]);
1684:       y[idy+2]  = PetscMax(y[idy+2],x[idx+2]);
1685:       y[idy+3]  = PetscMax(y[idy+3],x[idx+3]);
1686:       y[idy+4]  = PetscMax(y[idy+4],x[idx+4]);
1687:       y[idy+5]  = PetscMax(y[idy+5],x[idx+5]);
1688:       y[idy+6]  = PetscMax(y[idy+6],x[idx+6]);
1689:       y[idy+7]  = PetscMax(y[idy+7],x[idx+7]);
1690:       y[idy+8]  = PetscMax(y[idy+8],x[idx+8]);
1691:       y[idy+9]  = PetscMax(y[idy+9],x[idx+9]);
1692:       y[idy+10] = PetscMax(y[idy+10],x[idx+10]);
1693:     }
1694: #else
1695:   case MAX_VALUES:
1696: #endif
1697:   case NOT_SET_VALUES:
1698:     break;
1699:   default:
1700:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1701:   }
1702:   return(0);
1703: }

1705: /* ----------------------------------------------------------------------------------------------- */
1706: PETSC_STATIC_INLINE void Pack_MPI1_12(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1707: {
1708:   PetscInt i,idx;

1710:   for (i=0; i<n; i++) {
1711:     idx   = *indicesx++;
1712:     y[0]  = x[idx];
1713:     y[1]  = x[idx+1];
1714:     y[2]  = x[idx+2];
1715:     y[3]  = x[idx+3];
1716:     y[4]  = x[idx+4];
1717:     y[5]  = x[idx+5];
1718:     y[6]  = x[idx+6];
1719:     y[7]  = x[idx+7];
1720:     y[8]  = x[idx+8];
1721:     y[9]  = x[idx+9];
1722:     y[10] = x[idx+10];
1723:     y[11] = x[idx+11];
1724:     y    += 12;
1725:   }
1726: }

1728: PETSC_STATIC_INLINE PetscErrorCode UnPack_MPI1_12(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1729: {
1730:   PetscInt i,idy;

1733:   switch (addv) {
1734:   case INSERT_VALUES:
1735:   case INSERT_ALL_VALUES:
1736:     for (i=0; i<n; i++) {
1737:       idy       = *indicesy++;
1738:       y[idy]    = x[0];
1739:       y[idy+1]  = x[1];
1740:       y[idy+2]  = x[2];
1741:       y[idy+3]  = x[3];
1742:       y[idy+4]  = x[4];
1743:       y[idy+5]  = x[5];
1744:       y[idy+6]  = x[6];
1745:       y[idy+7]  = x[7];
1746:       y[idy+8]  = x[8];
1747:       y[idy+9]  = x[9];
1748:       y[idy+10] = x[10];
1749:       y[idy+11] = x[11];
1750:       x        += 12;
1751:     }
1752:     break;
1753:   case ADD_VALUES:
1754:   case ADD_ALL_VALUES:
1755:     for (i=0; i<n; i++) {
1756:       idy        = *indicesy++;
1757:       y[idy]    += x[0];
1758:       y[idy+1]  += x[1];
1759:       y[idy+2]  += x[2];
1760:       y[idy+3]  += x[3];
1761:       y[idy+4]  += x[4];
1762:       y[idy+5]  += x[5];
1763:       y[idy+6]  += x[6];
1764:       y[idy+7]  += x[7];
1765:       y[idy+8]  += x[8];
1766:       y[idy+9]  += x[9];
1767:       y[idy+10] += x[10];
1768:       y[idy+11] += x[11];
1769:       x         += 12;
1770:     }
1771:     break;
1772: #if !defined(PETSC_USE_COMPLEX)
1773:   case MAX_VALUES:
1774:     for (i=0; i<n; i++) {
1775:       idy       = *indicesy++;
1776:       y[idy]    = PetscMax(y[idy],x[0]);
1777:       y[idy+1]  = PetscMax(y[idy+1],x[1]);
1778:       y[idy+2]  = PetscMax(y[idy+2],x[2]);
1779:       y[idy+3]  = PetscMax(y[idy+3],x[3]);
1780:       y[idy+4]  = PetscMax(y[idy+4],x[4]);
1781:       y[idy+5]  = PetscMax(y[idy+5],x[5]);
1782:       y[idy+6]  = PetscMax(y[idy+6],x[6]);
1783:       y[idy+7]  = PetscMax(y[idy+7],x[7]);
1784:       y[idy+8]  = PetscMax(y[idy+8],x[8]);
1785:       y[idy+9]  = PetscMax(y[idy+9],x[9]);
1786:       y[idy+10] = PetscMax(y[idy+10],x[10]);
1787:       y[idy+11] = PetscMax(y[idy+11],x[11]);
1788:       x        += 12;
1789:     }
1790: #else
1791:   case MAX_VALUES:
1792: #endif
1793:   case NOT_SET_VALUES:
1794:     break;
1795:   default:
1796:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1797:   }
1798:   return(0);
1799: }

1801: PETSC_STATIC_INLINE PetscErrorCode Scatter_MPI1_12(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1802: {
1803:   PetscInt i,idx,idy;

1806:   switch (addv) {
1807:   case INSERT_VALUES:
1808:   case INSERT_ALL_VALUES:
1809:     for (i=0; i<n; i++) {
1810:       idx       = *indicesx++;
1811:       idy       = *indicesy++;
1812:       y[idy]    = x[idx];
1813:       y[idy+1]  = x[idx+1];
1814:       y[idy+2]  = x[idx+2];
1815:       y[idy+3]  = x[idx+3];
1816:       y[idy+4]  = x[idx+4];
1817:       y[idy+5]  = x[idx+5];
1818:       y[idy+6]  = x[idx+6];
1819:       y[idy+7]  = x[idx+7];
1820:       y[idy+8]  = x[idx+8];
1821:       y[idy+9]  = x[idx+9];
1822:       y[idy+10] = x[idx+10];
1823:       y[idy+11] = x[idx+11];
1824:     }
1825:     break;
1826:   case ADD_VALUES:
1827:   case ADD_ALL_VALUES:
1828:     for (i=0; i<n; i++) {
1829:       idx        = *indicesx++;
1830:       idy        = *indicesy++;
1831:       y[idy]    += x[idx];
1832:       y[idy+1]  += x[idx+1];
1833:       y[idy+2]  += x[idx+2];
1834:       y[idy+3]  += x[idx+3];
1835:       y[idy+4]  += x[idx+4];
1836:       y[idy+5]  += x[idx+5];
1837:       y[idy+6]  += x[idx+6];
1838:       y[idy+7]  += x[idx+7];
1839:       y[idy+8]  += x[idx+8];
1840:       y[idy+9]  += x[idx+9];
1841:       y[idy+10] += x[idx+10];
1842:       y[idy+11] += x[idx+11];
1843:     }
1844:     break;
1845: #if !defined(PETSC_USE_COMPLEX)
1846:   case MAX_VALUES:
1847:     for (i=0; i<n; i++) {
1848:       idx       = *indicesx++;
1849:       idy       = *indicesy++;
1850:       y[idy]    = PetscMax(y[idy],x[idx]);
1851:       y[idy+1]  = PetscMax(y[idy+1],x[idx+1]);
1852:       y[idy+2]  = PetscMax(y[idy+2],x[idx+2]);
1853:       y[idy+3]  = PetscMax(y[idy+3],x[idx+3]);
1854:       y[idy+4]  = PetscMax(y[idy+4],x[idx+4]);
1855:       y[idy+5]  = PetscMax(y[idy+5],x[idx+5]);
1856:       y[idy+6]  = PetscMax(y[idy+6],x[idx+6]);
1857:       y[idy+7]  = PetscMax(y[idy+7],x[idx+7]);
1858:       y[idy+8]  = PetscMax(y[idy+8],x[idx+8]);
1859:       y[idy+9]  = PetscMax(y[idy+9],x[idx+9]);
1860:       y[idy+10] = PetscMax(y[idy+10],x[idx+10]);
1861:       y[idy+11] = PetscMax(y[idy+11],x[idx+11]);
1862:     }
1863: #else
1864:   case MAX_VALUES:
1865: #endif
1866:   case NOT_SET_VALUES:
1867:     break;
1868:   default:
1869:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1870:   }
1871:   return(0);
1872: }

1874: /* ----------------------------------------------------------------------------------------------- */
1875: PETSC_STATIC_INLINE void Pack_MPI1_bs(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1876: {
1877:   PetscInt       i,idx;

1880:   for (i=0; i<n; i++) {
1881:     idx   = *indicesx++;
1882:     PetscArraycpy(y,x + idx,bs);CHKERRV(ierr);
1883:     y    += bs;
1884:   }
1885: }

1887: PETSC_STATIC_INLINE PetscErrorCode UnPack_MPI1_bs(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1888: {
1889:   PetscInt i,idy,j;

1893:   switch (addv) {
1894:   case INSERT_VALUES:
1895:   case INSERT_ALL_VALUES:
1896:     for (i=0; i<n; i++) {
1897:       idy       = *indicesy++;
1898:       PetscArraycpy(y + idy,x,bs);
1899:       x        += bs;
1900:     }
1901:     break;
1902:   case ADD_VALUES:
1903:   case ADD_ALL_VALUES:
1904:     for (i=0; i<n; i++) {
1905:       idy        = *indicesy++;
1906:       for (j=0; j<bs; j++) y[idy+j] += x[j];
1907:       x         += bs;
1908:     }
1909:     break;
1910: #if !defined(PETSC_USE_COMPLEX)
1911:   case MAX_VALUES:
1912:     for (i=0; i<n; i++) {
1913:       idy = *indicesy++;
1914:       for (j=0; j<bs; j++) y[idy+j] = PetscMax(y[idy+j],x[j]);
1915:       x  += bs;
1916:     }
1917: #else
1918:   case MAX_VALUES:
1919: #endif
1920:   case NOT_SET_VALUES:
1921:     break;
1922:   default:
1923:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1924:   }
1925:   return(0);
1926: }

1928: PETSC_STATIC_INLINE PetscErrorCode Scatter_MPI1_bs(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1929: {
1930:   PetscInt i,idx,idy,j;

1934:   switch (addv) {
1935:   case INSERT_VALUES:
1936:   case INSERT_ALL_VALUES:
1937:     for (i=0; i<n; i++) {
1938:       idx       = *indicesx++;
1939:       idy       = *indicesy++;
1940:       PetscArraycpy(y + idy, x + idx,bs);
1941:     }
1942:     break;
1943:   case ADD_VALUES:
1944:   case ADD_ALL_VALUES:
1945:     for (i=0; i<n; i++) {
1946:       idx        = *indicesx++;
1947:       idy        = *indicesy++;
1948:       for (j=0; j<bs; j++ )  y[idy+j] += x[idx+j];
1949:     }
1950:     break;
1951: #if !defined(PETSC_USE_COMPLEX)
1952:   case MAX_VALUES:
1953:     for (i=0; i<n; i++) {
1954:       idx       = *indicesx++;
1955:       idy       = *indicesy++;
1956:       for (j=0; j<bs; j++ )  y[idy+j] = PetscMax(y[idy+j],x[idx+j]);
1957:     }
1958: #else
1959:   case MAX_VALUES:
1960: #endif
1961:   case NOT_SET_VALUES:
1962:     break;
1963:   default:
1964:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1965:   }
1966:   return(0);
1967: }

1969: /* Create the VecScatterBegin/End_P for our chosen block sizes */
1970: #define BS 1
1971:  #include <../src/vec/vscat/impls/mpi1/vpscat_mpi1.h>
1972: #define BS 2
1973:  #include <../src/vec/vscat/impls/mpi1/vpscat_mpi1.h>
1974: #define BS 3
1975:  #include <../src/vec/vscat/impls/mpi1/vpscat_mpi1.h>
1976: #define BS 4
1977:  #include <../src/vec/vscat/impls/mpi1/vpscat_mpi1.h>
1978: #define BS 5
1979:  #include <../src/vec/vscat/impls/mpi1/vpscat_mpi1.h>
1980: #define BS 6
1981:  #include <../src/vec/vscat/impls/mpi1/vpscat_mpi1.h>
1982: #define BS 7
1983:  #include <../src/vec/vscat/impls/mpi1/vpscat_mpi1.h>
1984: #define BS 8
1985:  #include <../src/vec/vscat/impls/mpi1/vpscat_mpi1.h>
1986: #define BS 9
1987:  #include <../src/vec/vscat/impls/mpi1/vpscat_mpi1.h>
1988: #define BS 10
1989:  #include <../src/vec/vscat/impls/mpi1/vpscat_mpi1.h>
1990: #define BS 11
1991:  #include <../src/vec/vscat/impls/mpi1/vpscat_mpi1.h>
1992: #define BS 12
1993:  #include <../src/vec/vscat/impls/mpi1/vpscat_mpi1.h>
1994: #define BS bs
1995:  #include <../src/vec/vscat/impls/mpi1/vpscat_mpi1.h>

1997: /* ==========================================================================================*/

1999: /*              create parallel to sequential scatter context                           */

2001: PetscErrorCode VecScatterCreateCommon_PtoS_MPI1(VecScatter_MPI_General*,VecScatter_MPI_General*,VecScatter);

2003: /*
2004:    bs indicates how many elements there are in each block. Normally this would be 1.

2006:    contains check that PetscMPIInt can handle the sizes needed
2007: */
2008: PetscErrorCode VecScatterCreateLocal_PtoS_MPI1(PetscInt nx,const PetscInt *inidx,PetscInt ny,const PetscInt *inidy,Vec xin,Vec yin,PetscInt bs,VecScatter ctx)
2009: {
2010:   VecScatter_MPI_General *from,*to;
2011:   PetscMPIInt            size,rank,imdex,tag,n;
2012:   PetscInt               *source = NULL,*owners = NULL,nxr;
2013:   PetscInt               *lowner = NULL,*start = NULL,lengthy,lengthx;
2014:   PetscMPIInt            *nprocs = NULL,nrecvs;
2015:   PetscInt               i,j,idx,nsends;
2016:   PetscMPIInt            *owner = NULL;
2017:   PetscInt               *starts = NULL,count,slen;
2018:   PetscInt               *rvalues,*svalues,base,*values,nprocslocal,recvtotal,*rsvalues;
2019:   PetscMPIInt            *onodes1,*olengths1;
2020:   MPI_Comm               comm;
2021:   MPI_Request            *send_waits = NULL,*recv_waits = NULL;
2022:   MPI_Status             recv_status,*send_status;
2023:   PetscErrorCode         ierr;

2026:   PetscObjectGetNewTag((PetscObject)ctx,&tag);
2027:   PetscObjectGetComm((PetscObject)xin,&comm);
2028:   MPI_Comm_rank(comm,&rank);
2029:   MPI_Comm_size(comm,&size);
2030:   owners = xin->map->range;
2031:   VecGetSize(yin,&lengthy);
2032:   VecGetSize(xin,&lengthx);

2034:   /*  first count number of contributors to each processor */
2035:   /*  owner[i]: owner of ith inidx; nproc[j]: num of inidx to be sent to jth proc */
2036:   PetscMalloc2(size,&nprocs,nx,&owner);
2037:   PetscArrayzero(nprocs,size);

2039:   j      = 0;
2040:   nsends = 0;
2041:   for (i=0; i<nx; i++) {
2042:     idx = bs*inidx[i];
2043:     if (idx < owners[j]) j = 0;
2044:     for (; j<size; j++) {
2045:       if (idx < owners[j+1]) {
2046:         if (!nprocs[j]++) nsends++;
2047:         owner[i] = j;
2048:         break;
2049:       }
2050:     }
2051:     if (j == size) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"ith %D block entry %D not owned by any process, upper bound %D",i,idx,owners[size]);
2052:   }

2054:   nprocslocal  = nprocs[rank];
2055:   nprocs[rank] = 0;
2056:   if (nprocslocal) nsends--;
2057:   /* inform other processors of number of messages and max length*/
2058:   PetscGatherNumberOfMessages(comm,NULL,nprocs,&nrecvs);
2059:   PetscGatherMessageLengths(comm,nsends,nrecvs,nprocs,&onodes1,&olengths1);
2060:   PetscSortMPIIntWithArray(nrecvs,onodes1,olengths1);
2061:   recvtotal = 0; for (i=0; i<nrecvs; i++) recvtotal += olengths1[i];

2063:   /* post receives:   */
2064:   PetscMalloc3(recvtotal,&rvalues,nrecvs,&source,nrecvs,&recv_waits);
2065:   count = 0;
2066:   for (i=0; i<nrecvs; i++) {
2067:     MPI_Irecv((rvalues+count),olengths1[i],MPIU_INT,onodes1[i],tag,comm,recv_waits+i);
2068:     count += olengths1[i];
2069:   }

2071:   /* do sends:
2072:      1) starts[i] gives the starting index in svalues for stuff going to
2073:      the ith processor
2074:   */
2075:   nxr = 0;
2076:   for (i=0; i<nx; i++) {
2077:     if (owner[i] != rank) nxr++;
2078:   }
2079:   PetscMalloc3(nxr,&svalues,nsends,&send_waits,size+1,&starts);

2081:   starts[0]  = 0;
2082:   for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[i-1];
2083:   for (i=0; i<nx; i++) {
2084:     if (owner[i] != rank) svalues[starts[owner[i]]++] = bs*inidx[i];
2085:   }
2086:   starts[0] = 0;
2087:   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[i-1];
2088:   count = 0;
2089:   for (i=0; i<size; i++) {
2090:     if (nprocs[i]) {
2091:       MPI_Isend(svalues+starts[i],nprocs[i],MPIU_INT,i,tag,comm,send_waits+count++);
2092:     }
2093:   }

2095:   /*  wait on receives */
2096:   count = nrecvs;
2097:   slen  = 0;
2098:   while (count) {
2099:     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
2100:     /* unpack receives into our local space */
2101:     MPI_Get_count(&recv_status,MPIU_INT,&n);
2102:     slen += n;
2103:     count--;
2104:   }

2106:   if (slen != recvtotal) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Total message lengths %D not expected %D",slen,recvtotal);

2108:   /* allocate entire send scatter context */
2109:   PetscNewLog(ctx,&to);
2110:   to->n = nrecvs;

2112:   PetscMalloc1(nrecvs,&to->requests);
2113:   PetscMalloc4(bs*slen,&to->values,slen,&to->indices,nrecvs+1,&to->starts,nrecvs,&to->procs);
2114:   PetscMalloc2(PetscMax(to->n,nsends),&to->sstatus,PetscMax(to->n,nsends),&to->rstatus);

2116:   ctx->todata   = (void*)to;
2117:   to->starts[0] = 0;

2119:   if (nrecvs) {
2120:     /* move the data into the send scatter */
2121:     base     = owners[rank];
2122:     rsvalues = rvalues;
2123:     for (i=0; i<nrecvs; i++) {
2124:       to->starts[i+1] = to->starts[i] + olengths1[i];
2125:       to->procs[i]    = onodes1[i];
2126:       values = rsvalues;
2127:       rsvalues += olengths1[i];
2128:       for (j=0; j<olengths1[i]; j++) to->indices[to->starts[i] + j] = values[j] - base;
2129:     }
2130:   }
2131:   PetscFree(olengths1);
2132:   PetscFree(onodes1);
2133:   PetscFree3(rvalues,source,recv_waits);

2135:   /* allocate entire receive scatter context */
2136:   PetscNewLog(ctx,&from);
2137:   from->n = nsends;

2139:   PetscMalloc1(nsends,&from->requests);
2140:   PetscMalloc4((ny-nprocslocal)*bs,&from->values,ny-nprocslocal,&from->indices,nsends+1,&from->starts,from->n,&from->procs);
2141:   ctx->fromdata = (void*)from;

2143:   /* move data into receive scatter */
2144:   PetscMalloc2(size,&lowner,nsends+1,&start);
2145:   count = 0; from->starts[0] = start[0] = 0;
2146:   for (i=0; i<size; i++) {
2147:     if (nprocs[i]) {
2148:       lowner[i]            = count;
2149:       from->procs[count++] = i;
2150:       from->starts[count]  = start[count] = start[count-1] + nprocs[i];
2151:     }
2152:   }

2154:   for (i=0; i<nx; i++) {
2155:     if (owner[i] != rank) {
2156:       from->indices[start[lowner[owner[i]]]++] = bs*inidy[i];
2157:       if (bs*inidy[i] >= lengthy) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Scattering past end of TO vector");
2158:     }
2159:   }
2160:   PetscFree2(lowner,start);
2161:   PetscFree2(nprocs,owner);

2163:   /* wait on sends */
2164:   if (nsends) {
2165:     PetscMalloc1(nsends,&send_status);
2166:     MPI_Waitall(nsends,send_waits,send_status);
2167:     PetscFree(send_status);
2168:   }
2169:   PetscFree3(svalues,send_waits,starts);

2171:   if (nprocslocal) {
2172:     PetscInt nt = from->local.n = to->local.n = nprocslocal;
2173:     /* we have a scatter to ourselves */
2174:     PetscMalloc1(nt,&to->local.vslots);
2175:     PetscMalloc1(nt,&from->local.vslots);
2176:     nt   = 0;
2177:     for (i=0; i<nx; i++) {
2178:       idx = bs*inidx[i];
2179:       if (idx >= owners[rank] && idx < owners[rank+1]) {
2180:         to->local.vslots[nt]     = idx - owners[rank];
2181:         from->local.vslots[nt++] = bs*inidy[i];
2182:         if (bs*inidy[i] >= lengthy) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Scattering past end of TO vector");
2183:       }
2184:     }
2185:     PetscLogObjectMemory((PetscObject)ctx,2*nt*sizeof(PetscInt));
2186:   } else {
2187:     from->local.n      = 0;
2188:     from->local.vslots = 0;
2189:     to->local.n        = 0;
2190:     to->local.vslots   = 0;
2191:   }

2193:   from->local.nonmatching_computed = PETSC_FALSE;
2194:   from->local.n_nonmatching        = 0;
2195:   from->local.slots_nonmatching    = 0;
2196:   to->local.nonmatching_computed   = PETSC_FALSE;
2197:   to->local.n_nonmatching          = 0;
2198:   to->local.slots_nonmatching      = 0;

2200:   from->format = VEC_SCATTER_MPI_GENERAL;
2201:   to->format   = VEC_SCATTER_MPI_GENERAL;
2202:   from->bs     = bs;
2203:   to->bs       = bs;

2205:   VecScatterCreateCommon_PtoS_MPI1(from,to,ctx);
2206:   return(0);
2207: }

2209: /*
2210:    bs indicates how many elements there are in each block. Normally this would be 1.
2211: */
2212: PetscErrorCode VecScatterCreateCommon_PtoS_MPI1(VecScatter_MPI_General *from,VecScatter_MPI_General *to,VecScatter ctx)
2213: {
2214:   MPI_Comm       comm;
2215:   PetscMPIInt    tag  = ((PetscObject)ctx)->tag, tagr;
2216:   PetscInt       bs   = to->bs;
2217:   PetscMPIInt    size;
2218:   PetscInt       i, n;

2222:   PetscObjectGetComm((PetscObject)ctx,&comm);
2223:   PetscObjectGetNewTag((PetscObject)ctx,&tagr);
2224:   ctx->ops->destroy = VecScatterDestroy_PtoP_MPI1;

2226:   MPI_Comm_size(comm,&size);
2227:   /* check if the receives are ALL going into contiguous locations; if so can skip indexing */
2228:   to->contiq = PETSC_FALSE;
2229:   n = from->starts[from->n];
2230:   from->contiq = PETSC_TRUE;
2231:   for (i=1; i<n; i++) {
2232:     if (from->indices[i] != from->indices[i-1] + bs) {
2233:       from->contiq = PETSC_FALSE;
2234:       break;
2235:     }
2236:   }

2238:   {
2239:     PetscInt    *sstarts  = to->starts,  *rstarts = from->starts;
2240:     PetscMPIInt *sprocs   = to->procs,   *rprocs  = from->procs;
2241:     MPI_Request *swaits   = to->requests,*rwaits  = from->requests;
2242:     MPI_Request *rev_swaits,*rev_rwaits;
2243:     PetscScalar *Ssvalues = to->values, *Srvalues = from->values;

2245:     /* allocate additional wait variables for the "reverse" scatter */
2246:     PetscMalloc1(to->n,&rev_rwaits);
2247:     PetscMalloc1(from->n,&rev_swaits);
2248:     to->rev_requests   = rev_rwaits;
2249:     from->rev_requests = rev_swaits;

2251:     for (i=0; i<from->n; i++) {
2252:       MPI_Send_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tagr,comm,rev_swaits+i);
2253:     }

2255:     for (i=0; i<to->n; i++) {
2256:       MPI_Send_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
2257:     }
2258:     /* Register receives for scatter and reverse */
2259:     for (i=0; i<from->n; i++) {
2260:       MPI_Recv_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);
2261:     }
2262:     for (i=0; i<to->n; i++) {
2263:       MPI_Recv_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tagr,comm,rev_rwaits+i);
2264:     }
2265:     ctx->ops->copy = VecScatterCopy_PtoP_X_MPI1;
2266:   }
2267:   PetscInfo1(ctx,"Using blocksize %D scatter\n",bs);

2269: #if defined(PETSC_USE_DEBUG)
2270:   MPIU_Allreduce(&bs,&i,1,MPIU_INT,MPI_MIN,PetscObjectComm((PetscObject)ctx));
2271:   MPIU_Allreduce(&bs,&n,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)ctx));
2272:   if (bs!=i || bs!=n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Blocks size %D != %D or %D",bs,i,n);
2273: #endif

2275:   switch (bs) {
2276:   case 12:
2277:     ctx->ops->begin = VecScatterBeginMPI1_12;
2278:     ctx->ops->end   = VecScatterEndMPI1_12;
2279:     break;
2280:   case 11:
2281:     ctx->ops->begin = VecScatterBeginMPI1_11;
2282:     ctx->ops->end   = VecScatterEndMPI1_11;
2283:     break;
2284:   case 10:
2285:     ctx->ops->begin = VecScatterBeginMPI1_10;
2286:     ctx->ops->end   = VecScatterEndMPI1_10;
2287:     break;
2288:   case 9:
2289:     ctx->ops->begin = VecScatterBeginMPI1_9;
2290:     ctx->ops->end   = VecScatterEndMPI1_9;
2291:     break;
2292:   case 8:
2293:     ctx->ops->begin = VecScatterBeginMPI1_8;
2294:     ctx->ops->end   = VecScatterEndMPI1_8;
2295:     break;
2296:   case 7:
2297:     ctx->ops->begin = VecScatterBeginMPI1_7;
2298:     ctx->ops->end   = VecScatterEndMPI1_7;
2299:     break;
2300:   case 6:
2301:     ctx->ops->begin = VecScatterBeginMPI1_6;
2302:     ctx->ops->end   = VecScatterEndMPI1_6;
2303:     break;
2304:   case 5:
2305:     ctx->ops->begin = VecScatterBeginMPI1_5;
2306:     ctx->ops->end   = VecScatterEndMPI1_5;
2307:     break;
2308:   case 4:
2309:     ctx->ops->begin = VecScatterBeginMPI1_4;
2310:     ctx->ops->end   = VecScatterEndMPI1_4;
2311:     break;
2312:   case 3:
2313:     ctx->ops->begin = VecScatterBeginMPI1_3;
2314:     ctx->ops->end   = VecScatterEndMPI1_3;
2315:     break;
2316:   case 2:
2317:     ctx->ops->begin = VecScatterBeginMPI1_2;
2318:     ctx->ops->end   = VecScatterEndMPI1_2;
2319:     break;
2320:   case 1:
2321:     ctx->ops->begin = VecScatterBeginMPI1_1;
2322:     ctx->ops->end   = VecScatterEndMPI1_1;
2323:     break;
2324:   default:
2325:     ctx->ops->begin = VecScatterBeginMPI1_bs;
2326:     ctx->ops->end   = VecScatterEndMPI1_bs;

2328:   }
2329:   ctx->ops->view = VecScatterView_MPI_MPI1;
2330:   /* try to optimize PtoP vecscatter with memcpy's */
2331:   VecScatterMemcpyPlanCreate_PtoP(to,from);
2332:   return(0);
2333: }


2336: /* ------------------------------------------------------------------------------------*/
2337: /*
2338:          Scatter from local Seq vectors to a parallel vector.
2339:          Reverses the order of the arguments, calls VecScatterCreateLocal_PtoS() then
2340:          reverses the result.
2341: */
2342: PetscErrorCode VecScatterCreateLocal_StoP_MPI1(PetscInt nx,const PetscInt *inidx,PetscInt ny,const PetscInt *inidy,Vec xin,Vec yin,PetscInt bs,VecScatter ctx)
2343: {
2344:   PetscErrorCode         ierr;
2345:   MPI_Request            *waits;
2346:   VecScatter_MPI_General *to,*from;

2349:   VecScatterCreateLocal_PtoS_MPI1(ny,inidy,nx,inidx,yin,xin,bs,ctx);
2350:   to            = (VecScatter_MPI_General*)ctx->fromdata;
2351:   from          = (VecScatter_MPI_General*)ctx->todata;
2352:   ctx->todata   = (void*)to;
2353:   ctx->fromdata = (void*)from;
2354:   /* these two are special, they are ALWAYS stored in to struct */
2355:   to->sstatus   = from->sstatus;
2356:   to->rstatus   = from->rstatus;

2358:   from->sstatus = 0;
2359:   from->rstatus = 0;

2361:   waits              = from->rev_requests;
2362:   from->rev_requests = from->requests;
2363:   from->requests     = waits;
2364:   waits              = to->rev_requests;
2365:   to->rev_requests   = to->requests;
2366:   to->requests       = waits;
2367:   return(0);
2368: }

2370: /* ---------------------------------------------------------------------------------*/
2371: PetscErrorCode VecScatterCreateLocal_PtoP_MPI1(PetscInt nx,const PetscInt *inidx,PetscInt ny,const PetscInt *inidy,Vec xin,Vec yin,PetscInt bs,VecScatter ctx)
2372: {
2374:   PetscMPIInt    size,rank,tag,imdex,n;
2375:   PetscInt       *owners = xin->map->range;
2376:   PetscMPIInt    *nprocs = NULL;
2377:   PetscInt       i,j,idx,nsends,*local_inidx = NULL,*local_inidy = NULL;
2378:   PetscMPIInt    *owner   = NULL;
2379:   PetscInt       *starts  = NULL,count,slen;
2380:   PetscInt       *rvalues = NULL,*svalues = NULL,base,*values = NULL,*rsvalues,recvtotal,lastidx;
2381:   PetscMPIInt    *onodes1,*olengths1,nrecvs;
2382:   MPI_Comm       comm;
2383:   MPI_Request    *send_waits = NULL,*recv_waits = NULL;
2384:   MPI_Status     recv_status,*send_status = NULL;
2385:   PetscBool      duplicate = PETSC_FALSE;
2386: #if defined(PETSC_USE_DEBUG)
2387:   PetscBool      found = PETSC_FALSE;
2388: #endif

2391:   PetscObjectGetNewTag((PetscObject)ctx,&tag);
2392:   PetscObjectGetComm((PetscObject)xin,&comm);
2393:   MPI_Comm_size(comm,&size);
2394:   MPI_Comm_rank(comm,&rank);
2395:   if (size == 1) {
2396:     VecScatterCreateLocal_StoP_MPI1(nx,inidx,ny,inidy,xin,yin,bs,ctx);
2397:     return(0);
2398:   }

2400:   /*
2401:      Each processor ships off its inidx[j] and inidy[j] to the appropriate processor
2402:      They then call the StoPScatterCreate()
2403:   */
2404:   /*  first count number of contributors to each processor */
2405:   PetscMalloc3(size,&nprocs,nx,&owner,(size+1),&starts);
2406:   PetscArrayzero(nprocs,size);

2408:   lastidx = -1;
2409:   j       = 0;
2410:   for (i=0; i<nx; i++) {
2411:     /* if indices are NOT locally sorted, need to start search at the beginning */
2412:     if (lastidx > (idx = bs*inidx[i])) j = 0;
2413:     lastidx = idx;
2414:     for (; j<size; j++) {
2415:       if (idx >= owners[j] && idx < owners[j+1]) {
2416:         nprocs[j]++;
2417:         owner[i] = j;
2418: #if defined(PETSC_USE_DEBUG)
2419:         found = PETSC_TRUE;
2420: #endif
2421:         break;
2422:       }
2423:     }
2424: #if defined(PETSC_USE_DEBUG)
2425:     if (!found) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index %D out of range",idx);
2426:     found = PETSC_FALSE;
2427: #endif
2428:   }
2429:   nsends = 0;
2430:   for (i=0; i<size; i++) nsends += (nprocs[i] > 0);

2432:   /* inform other processors of number of messages and max length*/
2433:   PetscGatherNumberOfMessages(comm,NULL,nprocs,&nrecvs);
2434:   PetscGatherMessageLengths(comm,nsends,nrecvs,nprocs,&onodes1,&olengths1);
2435:   PetscSortMPIIntWithArray(nrecvs,onodes1,olengths1);
2436:   recvtotal = 0; for (i=0; i<nrecvs; i++) recvtotal += olengths1[i];

2438:   /* post receives:   */
2439:   PetscMalloc5(2*recvtotal,&rvalues,2*nx,&svalues,nrecvs,&recv_waits,nsends,&send_waits,nsends,&send_status);

2441:   count = 0;
2442:   for (i=0; i<nrecvs; i++) {
2443:     MPI_Irecv((rvalues+2*count),2*olengths1[i],MPIU_INT,onodes1[i],tag,comm,recv_waits+i);
2444:     count += olengths1[i];
2445:   }
2446:   PetscFree(onodes1);

2448:   /* do sends:
2449:       1) starts[i] gives the starting index in svalues for stuff going to
2450:          the ith processor
2451:   */
2452:   starts[0]= 0;
2453:   for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[i-1];
2454:   for (i=0; i<nx; i++) {
2455:     svalues[2*starts[owner[i]]]       = bs*inidx[i];
2456:     svalues[1 + 2*starts[owner[i]]++] = bs*inidy[i];
2457:   }

2459:   starts[0] = 0;
2460:   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[i-1];
2461:   count = 0;
2462:   for (i=0; i<size; i++) {
2463:     if (nprocs[i]) {
2464:       MPI_Isend(svalues+2*starts[i],2*nprocs[i],MPIU_INT,i,tag,comm,send_waits+count);
2465:       count++;
2466:     }
2467:   }
2468:   PetscFree3(nprocs,owner,starts);

2470:   /*  wait on receives */
2471:   count = nrecvs;
2472:   slen  = 0;
2473:   while (count) {
2474:     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
2475:     /* unpack receives into our local space */
2476:     MPI_Get_count(&recv_status,MPIU_INT,&n);
2477:     slen += n/2;
2478:     count--;
2479:   }
2480:   if (slen != recvtotal) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Total message lengths %D not as expected %D",slen,recvtotal);

2482:   PetscMalloc2(slen,&local_inidx,slen,&local_inidy);
2483:   base     = owners[rank];
2484:   count    = 0;
2485:   rsvalues = rvalues;
2486:   for (i=0; i<nrecvs; i++) {
2487:     values    = rsvalues;
2488:     rsvalues += 2*olengths1[i];
2489:     for (j=0; j<olengths1[i]; j++) {
2490:       local_inidx[count]   = values[2*j] - base;
2491:       local_inidy[count++] = values[2*j+1];
2492:     }
2493:   }
2494:   PetscFree(olengths1);

2496:   /* wait on sends */
2497:   if (nsends) {MPI_Waitall(nsends,send_waits,send_status);}
2498:   PetscFree5(rvalues,svalues,recv_waits,send_waits,send_status);

2500:   /*
2501:      should sort and remove duplicates from local_inidx,local_inidy
2502:   */

2504: #if defined(do_it_slow)
2505:   /* sort on the from index */
2506:   PetscSortIntWithArray(slen,local_inidx,local_inidy);
2507:   start = 0;
2508:   while (start < slen) {
2509:     count = start+1;
2510:     last  = local_inidx[start];
2511:     while (count < slen && last == local_inidx[count]) count++;
2512:     if (count > start + 1) { /* found 2 or more same local_inidx[] in a row */
2513:       /* sort on to index */
2514:       PetscSortInt(count-start,local_inidy+start);
2515:     }
2516:     /* remove duplicates; not most efficient way, but probably good enough */
2517:     i = start;
2518:     while (i < count-1) {
2519:       if (local_inidy[i] != local_inidy[i+1]) i++;
2520:       else { /* found a duplicate */
2521:         duplicate = PETSC_TRUE;
2522:         for (j=i; j<slen-1; j++) {
2523:           local_inidx[j] = local_inidx[j+1];
2524:           local_inidy[j] = local_inidy[j+1];
2525:         }
2526:         slen--;
2527:         count--;
2528:       }
2529:     }
2530:     start = count;
2531:   }
2532: #endif
2533:   if (duplicate) {
2534:     PetscInfo(ctx,"Duplicate from to indices passed in VecScatterCreate(), they are ignored\n");
2535:   }
2536:   VecScatterCreateLocal_StoP_MPI1(slen,local_inidx,slen,local_inidy,xin,yin,bs,ctx);
2537:   PetscFree2(local_inidx,local_inidy);
2538:   return(0);
2539: }

2541: PetscErrorCode VecScatterSetUp_MPI1(VecScatter ctx)
2542: {

2546:   VecScatterSetUp_vectype_private(ctx,VecScatterCreateLocal_PtoS_MPI1,VecScatterCreateLocal_StoP_MPI1,VecScatterCreateLocal_PtoP_MPI1);
2547:   return(0);
2548: }

2550: PetscErrorCode VecScatterCreate_MPI1(VecScatter ctx)
2551: {
2552:   PetscErrorCode    ierr;

2555:   ctx->ops->setup = VecScatterSetUp_MPI1;
2556:   PetscObjectChangeTypeName((PetscObject)ctx,VECSCATTERMPI1);
2557:   PetscInfo(ctx,"Using MPI1 for vector scatter\n");
2558:   return(0);
2559: }