Actual source code: vpscat_mpi1.c

petsc-3.10.5 2019-03-28
Report Typos and Errors

  2: /*
  3:     Defines parallel vector scatters using MPI1.
  4: */

  6:  #include <../src/vec/vec/impls/dvecimpl.h>
  7:  #include <../src/vec/vec/impls/mpi/pvecimpl.h>
  8:  #include <petsc/private/vecscatterimpl.h>

 10: PetscErrorCode VecScatterView_MPI_MPI1(VecScatter ctx,PetscViewer viewer)
 11: {
 12:   VecScatter_MPI_General *to  =(VecScatter_MPI_General*)ctx->todata;
 13:   VecScatter_MPI_General *from=(VecScatter_MPI_General*)ctx->fromdata;
 14:   PetscErrorCode         ierr;
 15:   PetscInt               i;
 16:   PetscMPIInt            rank;
 17:   PetscViewerFormat      format;
 18:   PetscBool              iascii;

 21:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
 22:   if (iascii) {
 23:     MPI_Comm_rank(PetscObjectComm((PetscObject)ctx),&rank);
 24:     PetscViewerGetFormat(viewer,&format);
 25:     if (format ==  PETSC_VIEWER_ASCII_INFO) {
 26:       PetscInt nsend_max,nrecv_max,lensend_max,lenrecv_max,alldata,itmp;

 28:       MPI_Reduce(&to->n,&nsend_max,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)ctx));
 29:       MPI_Reduce(&from->n,&nrecv_max,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)ctx));
 30:       itmp = to->starts[to->n+1];
 31:       MPI_Reduce(&itmp,&lensend_max,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)ctx));
 32:       itmp = from->starts[from->n+1];
 33:       MPI_Reduce(&itmp,&lenrecv_max,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)ctx));
 34:       MPI_Reduce(&itmp,&alldata,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)ctx));

 36:       PetscViewerASCIIPrintf(viewer,"VecScatter statistics\n");
 37:       PetscViewerASCIIPrintf(viewer,"  Maximum number sends %D\n",nsend_max);
 38:       PetscViewerASCIIPrintf(viewer,"  Maximum number receives %D\n",nrecv_max);
 39:       PetscViewerASCIIPrintf(viewer,"  Maximum data sent %D\n",(int)(lensend_max*to->bs*sizeof(PetscScalar)));
 40:       PetscViewerASCIIPrintf(viewer,"  Maximum data received %D\n",(int)(lenrecv_max*to->bs*sizeof(PetscScalar)));
 41:       PetscViewerASCIIPrintf(viewer,"  Total data sent %D\n",(int)(alldata*to->bs*sizeof(PetscScalar)));

 43:     } else {
 44:       PetscViewerASCIIPushSynchronized(viewer);
 45:       PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number sends = %D; Number to self = %D\n",rank,to->n,to->local.n);
 46:       if (to->n) {
 47:         for (i=0; i<to->n; i++) {
 48:           PetscViewerASCIISynchronizedPrintf(viewer,"[%d]   %D length = %D to whom %d\n",rank,i,to->starts[i+1]-to->starts[i],to->procs[i]);
 49:           if (to->memcpy_plan.optimized[i]) { PetscViewerASCIISynchronizedPrintf(viewer,"  is optimized with %D memcpy's in Pack\n",to->memcpy_plan.copy_offsets[i+1]-to->memcpy_plan.copy_offsets[i]); }
 50:         }
 51:         PetscViewerASCIISynchronizedPrintf(viewer,"Now the indices for all remote sends (in order by process sent to)\n");
 52:         for (i=0; i<to->starts[to->n]; i++) {
 53:           PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D \n",rank,to->indices[i]);
 54:         }
 55:       }

 57:       PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number receives = %D; Number from self = %D\n",rank,from->n,from->local.n);
 58:       if (from->n) {
 59:         for (i=0; i<from->n; i++) {
 60:           PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D length %D from whom %d\n",rank,i,from->starts[i+1]-from->starts[i],from->procs[i]);
 61:           if (from->memcpy_plan.optimized[i]) { PetscViewerASCIISynchronizedPrintf(viewer,"  is optimized with %D memcpy's in Unpack\n",to->memcpy_plan.copy_offsets[i+1]-to->memcpy_plan.copy_offsets[i]); }
 62:         }

 64:         PetscViewerASCIISynchronizedPrintf(viewer,"Now the indices for all remote receives (in order by process received from)\n");
 65:         for (i=0; i<from->starts[from->n]; i++) {
 66:           PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D \n",rank,from->indices[i]);
 67:         }
 68:       }
 69:       if (to->local.n) {
 70:         PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Indices for local part of scatter\n",rank);
 71:         if (to->local.memcpy_plan.optimized[0]) {
 72:           PetscViewerASCIIPrintf(viewer,"Local part of the scatter is made of %D copies\n",to->local.memcpy_plan.copy_offsets[1]);
 73:         }
 74:         for (i=0; i<to->local.n; i++) {  /* the to and from have the opposite meaning from what you would expect */
 75:           PetscViewerASCIISynchronizedPrintf(viewer,"[%d] From %D to %D \n",rank,to->local.vslots[i],from->local.vslots[i]);
 76:         }
 77:       }

 79:       PetscViewerFlush(viewer);
 80:       PetscViewerASCIIPopSynchronized(viewer);
 81:     }
 82:   }
 83:   return(0);
 84: }

 86: /* -------------------------------------------------------------------------------------*/
 87: PetscErrorCode VecScatterDestroy_PtoP_MPI1(VecScatter ctx)
 88: {
 89:   VecScatter_MPI_General *to   = (VecScatter_MPI_General*)ctx->todata;
 90:   VecScatter_MPI_General *from = (VecScatter_MPI_General*)ctx->fromdata;
 91:   PetscErrorCode         ierr;
 92:   PetscInt               i;

 95:   /* release MPI resources obtained with MPI_Send_init() and MPI_Recv_init() */
 96:   if (to->requests) {
 97:     for (i=0; i<to->n; i++) {
 98:       MPI_Request_free(to->requests + i);
 99:     }
100:   }
101:   if (to->rev_requests) {
102:     for (i=0; i<to->n; i++) {
103:       MPI_Request_free(to->rev_requests + i);
104:     }
105:   }
106:   if (from->requests) {
107:     for (i=0; i<from->n; i++) {
108:       MPI_Request_free(from->requests + i);
109:     }
110:   }

112:   if (from->rev_requests) {
113:     for (i=0; i<from->n; i++) {
114:       MPI_Request_free(from->rev_requests + i);
115:     }
116:   }

118:   PetscFree(to->local.vslots);
119:   PetscFree(from->local.vslots);
120:   PetscFree(to->local.slots_nonmatching);
121:   PetscFree(from->local.slots_nonmatching);
122:   PetscFree(to->rev_requests);
123:   PetscFree(from->rev_requests);
124:   PetscFree(to->requests);
125:   PetscFree(from->requests);
126:   PetscFree4(to->values,to->indices,to->starts,to->procs);
127:   PetscFree2(to->sstatus,to->rstatus);
128:   PetscFree4(from->values,from->indices,from->starts,from->procs);
129:   VecScatterMemcpyPlanDestroy_PtoP(to,from);
130:   PetscFree(from);
131:   PetscFree(to);
132:   return(0);
133: }

135: /* --------------------------------------------------------------------------------------*/

137: PetscErrorCode VecScatterCopy_PtoP_X_MPI1(VecScatter in,VecScatter out)
138: {
139:   VecScatter_MPI_General *in_to   = (VecScatter_MPI_General*)in->todata;
140:   VecScatter_MPI_General *in_from = (VecScatter_MPI_General*)in->fromdata,*out_to,*out_from;
141:   PetscErrorCode         ierr;
142:   PetscInt               ny,bs = in_from->bs;

145:   out->ops->begin   = in->ops->begin;
146:   out->ops->end     = in->ops->end;
147:   out->ops->copy    = in->ops->copy;
148:   out->ops->destroy = in->ops->destroy;
149:   out->ops->view    = in->ops->view;

151:   /* allocate entire send scatter context */
152:   PetscNewLog(out,&out_to);
153:   PetscNewLog(out,&out_from);

155:   ny                = in_to->starts[in_to->n];
156:   out_to->n         = in_to->n;
157:   out_to->format    = in_to->format;

159:   PetscMalloc1(out_to->n,&out_to->requests);
160:   PetscMalloc4(bs*ny,&out_to->values,ny,&out_to->indices,out_to->n+1,&out_to->starts,out_to->n,&out_to->procs);
161:   PetscMalloc2(PetscMax(in_to->n,in_from->n),&out_to->sstatus,PetscMax(in_to->n,in_from->n),&out_to->rstatus);
162:   PetscMemcpy(out_to->indices,in_to->indices,ny*sizeof(PetscInt));
163:   PetscMemcpy(out_to->starts,in_to->starts,(out_to->n+1)*sizeof(PetscInt));
164:   PetscMemcpy(out_to->procs,in_to->procs,(out_to->n)*sizeof(PetscMPIInt));

166:   out->todata                        = (void*)out_to;
167:   out_to->local.n                    = in_to->local.n;
168:   out_to->local.nonmatching_computed = PETSC_FALSE;
169:   out_to->local.n_nonmatching        = 0;
170:   out_to->local.slots_nonmatching    = 0;
171:   if (in_to->local.n) {
172:     PetscMalloc1(in_to->local.n,&out_to->local.vslots);
173:     PetscMalloc1(in_from->local.n,&out_from->local.vslots);
174:     PetscMemcpy(out_to->local.vslots,in_to->local.vslots,in_to->local.n*sizeof(PetscInt));
175:     PetscMemcpy(out_from->local.vslots,in_from->local.vslots,in_from->local.n*sizeof(PetscInt));
176:   } else {
177:     out_to->local.vslots   = 0;
178:     out_from->local.vslots = 0;
179:   }

181:   /* allocate entire receive context */
182:   out_from->format    = in_from->format;
183:   ny                  = in_from->starts[in_from->n];
184:   out_from->n         = in_from->n;

186:   PetscMalloc1(out_from->n,&out_from->requests);
187:   PetscMalloc4(ny*bs,&out_from->values,ny,&out_from->indices,out_from->n+1,&out_from->starts,out_from->n,&out_from->procs);
188:   PetscMemcpy(out_from->indices,in_from->indices,ny*sizeof(PetscInt));
189:   PetscMemcpy(out_from->starts,in_from->starts,(out_from->n+1)*sizeof(PetscInt));
190:   PetscMemcpy(out_from->procs,in_from->procs,(out_from->n)*sizeof(PetscMPIInt));

192:   out->fromdata                        = (void*)out_from;
193:   out_from->local.n                    = in_from->local.n;
194:   out_from->local.nonmatching_computed = PETSC_FALSE;
195:   out_from->local.n_nonmatching        = 0;
196:   out_from->local.slots_nonmatching    = 0;

198:   /*
199:       set up the request arrays for use with isend_init() and irecv_init()
200:   */
201:   {
202:     PetscMPIInt tag;
203:     MPI_Comm    comm;
204:     PetscInt    *sstarts = out_to->starts,  *rstarts = out_from->starts;
205:     PetscMPIInt *sprocs  = out_to->procs,   *rprocs  = out_from->procs;
206:     PetscInt    i;
207:     MPI_Request *swaits   = out_to->requests,*rwaits  = out_from->requests;
208:     MPI_Request *rev_swaits,*rev_rwaits;
209:     PetscScalar *Ssvalues = out_to->values, *Srvalues = out_from->values;

211:     PetscMalloc1(in_to->n,&out_to->rev_requests);
212:     PetscMalloc1(in_from->n,&out_from->rev_requests);

214:     rev_rwaits = out_to->rev_requests;
215:     rev_swaits = out_from->rev_requests;

217:     out_from->bs = out_to->bs = bs;
218:     tag          = ((PetscObject)out)->tag;
219:     PetscObjectGetComm((PetscObject)out,&comm);

221:     /* Register the receives that you will use later (sends for scatter reverse) */
222:     for (i=0; i<out_from->n; i++) {
223:       MPI_Recv_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);
224:       MPI_Send_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tag,comm,rev_swaits+i);
225:     }
226:     for (i=0; i<out_to->n; i++) {
227:       MPI_Send_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
228:       /* Register receives for scatter reverse */
229:       MPI_Recv_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,rev_rwaits+i);
230:     }
231:   }

233:   VecScatterMemcpyPlanCopy_PtoP(in_to,in_from,out_to,out_from);
234:   return(0);
235: }

237: PetscErrorCode VecScatterCopy_PtoP_AllToAll_MPI1(VecScatter in,VecScatter out)
238: {
239:   VecScatter_MPI_General *in_to   = (VecScatter_MPI_General*)in->todata;
240:   VecScatter_MPI_General *in_from = (VecScatter_MPI_General*)in->fromdata,*out_to,*out_from;
241:   PetscErrorCode         ierr;
242:   PetscInt               ny,bs = in_from->bs;
243:   PetscMPIInt            size;

246:   MPI_Comm_size(PetscObjectComm((PetscObject)in),&size);

248:   out->ops->begin     = in->ops->begin;
249:   out->ops->end       = in->ops->end;
250:   out->ops->copy      = in->ops->copy;
251:   out->ops->destroy   = in->ops->destroy;
252:   out->ops->view      = in->ops->view;

254:   /* allocate entire send scatter context */
255:   PetscNewLog(out,&out_to);
256:   PetscNewLog(out,&out_from);

258:   ny                = in_to->starts[in_to->n];
259:   out_to->n         = in_to->n;
260:   out_to->format    = in_to->format;

262:   PetscMalloc1(out_to->n,&out_to->requests);
263:   PetscMalloc4(bs*ny,&out_to->values,ny,&out_to->indices,out_to->n+1,&out_to->starts,out_to->n,&out_to->procs);
264:   PetscMalloc2(PetscMax(in_to->n,in_from->n),&out_to->sstatus,PetscMax(in_to->n,in_from->n),&out_to->rstatus);
265:   PetscMemcpy(out_to->indices,in_to->indices,ny*sizeof(PetscInt));
266:   PetscMemcpy(out_to->starts,in_to->starts,(out_to->n+1)*sizeof(PetscInt));
267:   PetscMemcpy(out_to->procs,in_to->procs,(out_to->n)*sizeof(PetscMPIInt));

269:   out->todata                        = (void*)out_to;
270:   out_to->local.n                    = in_to->local.n;
271:   out_to->local.nonmatching_computed = PETSC_FALSE;
272:   out_to->local.n_nonmatching        = 0;
273:   out_to->local.slots_nonmatching    = 0;
274:   if (in_to->local.n) {
275:     PetscMalloc1(in_to->local.n,&out_to->local.vslots);
276:     PetscMalloc1(in_from->local.n,&out_from->local.vslots);
277:     PetscMemcpy(out_to->local.vslots,in_to->local.vslots,in_to->local.n*sizeof(PetscInt));
278:     PetscMemcpy(out_from->local.vslots,in_from->local.vslots,in_from->local.n*sizeof(PetscInt));
279:   } else {
280:     out_to->local.vslots   = 0;
281:     out_from->local.vslots = 0;
282:   }

284:   /* allocate entire receive context */
285:   out_from->format    = in_from->format;
286:   ny                  = in_from->starts[in_from->n];
287:   out_from->n         = in_from->n;

289:   PetscMalloc1(out_from->n,&out_from->requests);
290:   PetscMalloc4(ny*bs,&out_from->values,ny,&out_from->indices,out_from->n+1,&out_from->starts,out_from->n,&out_from->procs);
291:   PetscMemcpy(out_from->indices,in_from->indices,ny*sizeof(PetscInt));
292:   PetscMemcpy(out_from->starts,in_from->starts,(out_from->n+1)*sizeof(PetscInt));
293:   PetscMemcpy(out_from->procs,in_from->procs,(out_from->n)*sizeof(PetscMPIInt));

295:   out->fromdata                        = (void*)out_from;
296:   out_from->local.n                    = in_from->local.n;
297:   out_from->local.nonmatching_computed = PETSC_FALSE;
298:   out_from->local.n_nonmatching        = 0;
299:   out_from->local.slots_nonmatching    = 0;

301:   VecScatterMemcpyPlanCopy_PtoP(in_to,in_from,out_to,out_from);
302:   return(0);
303: }

305: /* Optimize a parallel vector to parallel vector vecscatter with memory copies */
306: PetscErrorCode VecScatterMemcpyPlanCreate_PtoP(VecScatter_MPI_General *to,VecScatter_MPI_General *from)
307: {

311:   VecScatterMemcpyPlanCreate_Index(to->n,to->starts,to->indices,to->bs,&to->memcpy_plan);
312:   VecScatterMemcpyPlanCreate_Index(from->n,from->starts,from->indices,to->bs,&from->memcpy_plan);
313:   VecScatterMemcpyPlanCreate_SGToSG(to->bs,&to->local,&from->local);
314:   return(0);
315: }

317: PetscErrorCode VecScatterMemcpyPlanCopy_PtoP(const VecScatter_MPI_General *in_to,const VecScatter_MPI_General *in_from,VecScatter_MPI_General *out_to,VecScatter_MPI_General *out_from)
318: {

322:   VecScatterMemcpyPlanCopy(&in_to->memcpy_plan,&out_to->memcpy_plan);
323:   VecScatterMemcpyPlanCopy(&in_from->memcpy_plan,&out_from->memcpy_plan);
324:   VecScatterMemcpyPlanCopy(&in_to->local.memcpy_plan,&out_to->local.memcpy_plan);
325:   VecScatterMemcpyPlanCopy(&in_from->local.memcpy_plan,&out_from->local.memcpy_plan);
326:   return(0);
327: }

329: PetscErrorCode VecScatterMemcpyPlanDestroy_PtoP(VecScatter_MPI_General *to,VecScatter_MPI_General *from)
330: {

334:   VecScatterMemcpyPlanDestroy(&to->memcpy_plan);
335:   VecScatterMemcpyPlanDestroy(&from->memcpy_plan);
336:   VecScatterMemcpyPlanDestroy(&to->local.memcpy_plan);
337:   VecScatterMemcpyPlanDestroy(&from->local.memcpy_plan);
338:   return(0);
339: }

341: /* --------------------------------------------------------------------------------------------------
342:     Packs and unpacks the message data into send or from receive buffers.

344:     These could be generated automatically.

346:     Fortran kernels etc. could be used.
347: */
348: PETSC_STATIC_INLINE void Pack_MPI1_1(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
349: {
350:   PetscInt i;
351:   for (i=0; i<n; i++) y[i] = x[indicesx[i]];
352: }

354: PETSC_STATIC_INLINE PetscErrorCode UnPack_MPI1_1(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
355: {
356:   PetscInt i;

359:   switch (addv) {
360:   case INSERT_VALUES:
361:   case INSERT_ALL_VALUES:
362:     for (i=0; i<n; i++) y[indicesy[i]] = x[i];
363:     break;
364:   case ADD_VALUES:
365:   case ADD_ALL_VALUES:
366:     for (i=0; i<n; i++) y[indicesy[i]] += x[i];
367:     break;
368: #if !defined(PETSC_USE_COMPLEX)
369:   case MAX_VALUES:
370:     for (i=0; i<n; i++) y[indicesy[i]] = PetscMax(y[indicesy[i]],x[i]);
371: #else
372:   case MAX_VALUES:
373: #endif
374:   case NOT_SET_VALUES:
375:     break;
376:   default:
377:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
378:   }
379:   return(0);
380: }

382: PETSC_STATIC_INLINE PetscErrorCode Scatter_MPI1_1(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
383: {
384:   PetscInt i;

387:   switch (addv) {
388:   case INSERT_VALUES:
389:   case INSERT_ALL_VALUES:
390:     for (i=0; i<n; i++) y[indicesy[i]] = x[indicesx[i]];
391:     break;
392:   case ADD_VALUES:
393:   case ADD_ALL_VALUES:
394:     for (i=0; i<n; i++) y[indicesy[i]] += x[indicesx[i]];
395:     break;
396: #if !defined(PETSC_USE_COMPLEX)
397:   case MAX_VALUES:
398:     for (i=0; i<n; i++) y[indicesy[i]] = PetscMax(y[indicesy[i]],x[indicesx[i]]);
399: #else
400:   case MAX_VALUES:
401: #endif
402:   case NOT_SET_VALUES:
403:     break;
404:   default:
405:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
406:   }
407:   return(0);
408: }

410: /* ----------------------------------------------------------------------------------------------- */
411: PETSC_STATIC_INLINE void Pack_MPI1_2(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
412: {
413:   PetscInt i,idx;

415:   for (i=0; i<n; i++) {
416:     idx  = *indicesx++;
417:     y[0] = x[idx];
418:     y[1] = x[idx+1];
419:     y   += 2;
420:   }
421: }

423: PETSC_STATIC_INLINE PetscErrorCode UnPack_MPI1_2(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
424: {
425:   PetscInt i,idy;

428:   switch (addv) {
429:   case INSERT_VALUES:
430:   case INSERT_ALL_VALUES:
431:     for (i=0; i<n; i++) {
432:       idy      = *indicesy++;
433:       y[idy]   = x[0];
434:       y[idy+1] = x[1];
435:       x       += 2;
436:     }
437:     break;
438:   case ADD_VALUES:
439:   case ADD_ALL_VALUES:
440:     for (i=0; i<n; i++) {
441:       idy       = *indicesy++;
442:       y[idy]   += x[0];
443:       y[idy+1] += x[1];
444:       x        += 2;
445:     }
446:     break;
447: #if !defined(PETSC_USE_COMPLEX)
448:   case MAX_VALUES:
449:     for (i=0; i<n; i++) {
450:       idy      = *indicesy++;
451:       y[idy]   = PetscMax(y[idy],x[0]);
452:       y[idy+1] = PetscMax(y[idy+1],x[1]);
453:       x       += 2;
454:     }
455: #else
456:   case MAX_VALUES:
457: #endif
458:   case NOT_SET_VALUES:
459:     break;
460:   default:
461:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
462:   }
463:   return(0);
464: }

466: PETSC_STATIC_INLINE PetscErrorCode Scatter_MPI1_2(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
467: {
468:   PetscInt i,idx,idy;

471:   switch (addv) {
472:   case INSERT_VALUES:
473:   case INSERT_ALL_VALUES:
474:     for (i=0; i<n; i++) {
475:       idx      = *indicesx++;
476:       idy      = *indicesy++;
477:       y[idy]   = x[idx];
478:       y[idy+1] = x[idx+1];
479:     }
480:     break;
481:   case ADD_VALUES:
482:   case ADD_ALL_VALUES:
483:     for (i=0; i<n; i++) {
484:       idx       = *indicesx++;
485:       idy       = *indicesy++;
486:       y[idy]   += x[idx];
487:       y[idy+1] += x[idx+1];
488:     }
489:     break;
490: #if !defined(PETSC_USE_COMPLEX)
491:   case MAX_VALUES:
492:     for (i=0; i<n; i++) {
493:       idx      = *indicesx++;
494:       idy      = *indicesy++;
495:       y[idy]   = PetscMax(y[idy],x[idx]);
496:       y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
497:     }
498: #else
499:   case MAX_VALUES:
500: #endif
501:   case NOT_SET_VALUES:
502:     break;
503:   default:
504:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
505:   }
506:   return(0);
507: }
508: /* ----------------------------------------------------------------------------------------------- */
509: PETSC_STATIC_INLINE void Pack_MPI1_3(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
510: {
511:   PetscInt i,idx;

513:   for (i=0; i<n; i++) {
514:     idx  = *indicesx++;
515:     y[0] = x[idx];
516:     y[1] = x[idx+1];
517:     y[2] = x[idx+2];
518:     y   += 3;
519:   }
520: }
521: PETSC_STATIC_INLINE PetscErrorCode UnPack_MPI1_3(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
522: {
523:   PetscInt i,idy;

526:   switch (addv) {
527:   case INSERT_VALUES:
528:   case INSERT_ALL_VALUES:
529:     for (i=0; i<n; i++) {
530:       idy      = *indicesy++;
531:       y[idy]   = x[0];
532:       y[idy+1] = x[1];
533:       y[idy+2] = x[2];
534:       x       += 3;
535:     }
536:     break;
537:   case ADD_VALUES:
538:   case ADD_ALL_VALUES:
539:     for (i=0; i<n; i++) {
540:       idy       = *indicesy++;
541:       y[idy]   += x[0];
542:       y[idy+1] += x[1];
543:       y[idy+2] += x[2];
544:       x        += 3;
545:     }
546:     break;
547: #if !defined(PETSC_USE_COMPLEX)
548:   case MAX_VALUES:
549:     for (i=0; i<n; i++) {
550:       idy      = *indicesy++;
551:       y[idy]   = PetscMax(y[idy],x[0]);
552:       y[idy+1] = PetscMax(y[idy+1],x[1]);
553:       y[idy+2] = PetscMax(y[idy+2],x[2]);
554:       x       += 3;
555:     }
556: #else
557:   case MAX_VALUES:
558: #endif
559:   case NOT_SET_VALUES:
560:     break;
561:   default:
562:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
563:   }
564:   return(0);
565: }

567: PETSC_STATIC_INLINE PetscErrorCode Scatter_MPI1_3(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
568: {
569:   PetscInt i,idx,idy;

572:   switch (addv) {
573:   case INSERT_VALUES:
574:   case INSERT_ALL_VALUES:
575:     for (i=0; i<n; i++) {
576:       idx      = *indicesx++;
577:       idy      = *indicesy++;
578:       y[idy]   = x[idx];
579:       y[idy+1] = x[idx+1];
580:       y[idy+2] = x[idx+2];
581:     }
582:     break;
583:   case ADD_VALUES:
584:   case ADD_ALL_VALUES:
585:     for (i=0; i<n; i++) {
586:       idx       = *indicesx++;
587:       idy       = *indicesy++;
588:       y[idy]   += x[idx];
589:       y[idy+1] += x[idx+1];
590:       y[idy+2] += x[idx+2];
591:     }
592:     break;
593: #if !defined(PETSC_USE_COMPLEX)
594:   case MAX_VALUES:
595:     for (i=0; i<n; i++) {
596:       idx      = *indicesx++;
597:       idy      = *indicesy++;
598:       y[idy]   = PetscMax(y[idy],x[idx]);
599:       y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
600:       y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
601:     }
602: #else
603:   case MAX_VALUES:
604: #endif
605:   case NOT_SET_VALUES:
606:     break;
607:   default:
608:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
609:   }
610:   return(0);
611: }
612: /* ----------------------------------------------------------------------------------------------- */
613: PETSC_STATIC_INLINE void Pack_MPI1_4(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
614: {
615:   PetscInt i,idx;

617:   for (i=0; i<n; i++) {
618:     idx  = *indicesx++;
619:     y[0] = x[idx];
620:     y[1] = x[idx+1];
621:     y[2] = x[idx+2];
622:     y[3] = x[idx+3];
623:     y   += 4;
624:   }
625: }
626: PETSC_STATIC_INLINE PetscErrorCode UnPack_MPI1_4(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
627: {
628:   PetscInt i,idy;

631:   switch (addv) {
632:   case INSERT_VALUES:
633:   case INSERT_ALL_VALUES:
634:     for (i=0; i<n; i++) {
635:       idy      = *indicesy++;
636:       y[idy]   = x[0];
637:       y[idy+1] = x[1];
638:       y[idy+2] = x[2];
639:       y[idy+3] = x[3];
640:       x       += 4;
641:     }
642:     break;
643:   case ADD_VALUES:
644:   case ADD_ALL_VALUES:
645:     for (i=0; i<n; i++) {
646:       idy       = *indicesy++;
647:       y[idy]   += x[0];
648:       y[idy+1] += x[1];
649:       y[idy+2] += x[2];
650:       y[idy+3] += x[3];
651:       x        += 4;
652:     }
653:     break;
654: #if !defined(PETSC_USE_COMPLEX)
655:   case MAX_VALUES:
656:     for (i=0; i<n; i++) {
657:       idy      = *indicesy++;
658:       y[idy]   = PetscMax(y[idy],x[0]);
659:       y[idy+1] = PetscMax(y[idy+1],x[1]);
660:       y[idy+2] = PetscMax(y[idy+2],x[2]);
661:       y[idy+3] = PetscMax(y[idy+3],x[3]);
662:       x       += 4;
663:     }
664: #else
665:   case MAX_VALUES:
666: #endif
667:   case NOT_SET_VALUES:
668:     break;
669:   default:
670:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
671:   }
672:   return(0);
673: }

675: PETSC_STATIC_INLINE PetscErrorCode Scatter_MPI1_4(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
676: {
677:   PetscInt i,idx,idy;

680:   switch (addv) {
681:   case INSERT_VALUES:
682:   case INSERT_ALL_VALUES:
683:     for (i=0; i<n; i++) {
684:       idx      = *indicesx++;
685:       idy      = *indicesy++;
686:       y[idy]   = x[idx];
687:       y[idy+1] = x[idx+1];
688:       y[idy+2] = x[idx+2];
689:       y[idy+3] = x[idx+3];
690:     }
691:     break;
692:   case ADD_VALUES:
693:   case ADD_ALL_VALUES:
694:     for (i=0; i<n; i++) {
695:       idx       = *indicesx++;
696:       idy       = *indicesy++;
697:       y[idy]   += x[idx];
698:       y[idy+1] += x[idx+1];
699:       y[idy+2] += x[idx+2];
700:       y[idy+3] += x[idx+3];
701:     }
702:     break;
703: #if !defined(PETSC_USE_COMPLEX)
704:   case MAX_VALUES:
705:     for (i=0; i<n; i++) {
706:       idx      = *indicesx++;
707:       idy      = *indicesy++;
708:       y[idy]   = PetscMax(y[idy],x[idx]);
709:       y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
710:       y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
711:       y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
712:     }
713: #else
714:   case MAX_VALUES:
715: #endif
716:   case NOT_SET_VALUES:
717:     break;
718:   default:
719:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
720:   }
721:   return(0);
722: }
723: /* ----------------------------------------------------------------------------------------------- */
724: PETSC_STATIC_INLINE void Pack_MPI1_5(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
725: {
726:   PetscInt i,idx;

728:   for (i=0; i<n; i++) {
729:     idx  = *indicesx++;
730:     y[0] = x[idx];
731:     y[1] = x[idx+1];
732:     y[2] = x[idx+2];
733:     y[3] = x[idx+3];
734:     y[4] = x[idx+4];
735:     y   += 5;
736:   }
737: }

739: PETSC_STATIC_INLINE PetscErrorCode UnPack_MPI1_5(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
740: {
741:   PetscInt i,idy;

744:   switch (addv) {
745:   case INSERT_VALUES:
746:   case INSERT_ALL_VALUES:
747:     for (i=0; i<n; i++) {
748:       idy      = *indicesy++;
749:       y[idy]   = x[0];
750:       y[idy+1] = x[1];
751:       y[idy+2] = x[2];
752:       y[idy+3] = x[3];
753:       y[idy+4] = x[4];
754:       x       += 5;
755:     }
756:     break;
757:   case ADD_VALUES:
758:   case ADD_ALL_VALUES:
759:     for (i=0; i<n; i++) {
760:       idy       = *indicesy++;
761:       y[idy]   += x[0];
762:       y[idy+1] += x[1];
763:       y[idy+2] += x[2];
764:       y[idy+3] += x[3];
765:       y[idy+4] += x[4];
766:       x        += 5;
767:     }
768:     break;
769: #if !defined(PETSC_USE_COMPLEX)
770:   case MAX_VALUES:
771:     for (i=0; i<n; i++) {
772:       idy      = *indicesy++;
773:       y[idy]   = PetscMax(y[idy],x[0]);
774:       y[idy+1] = PetscMax(y[idy+1],x[1]);
775:       y[idy+2] = PetscMax(y[idy+2],x[2]);
776:       y[idy+3] = PetscMax(y[idy+3],x[3]);
777:       y[idy+4] = PetscMax(y[idy+4],x[4]);
778:       x       += 5;
779:     }
780: #else
781:   case MAX_VALUES:
782: #endif
783:   case NOT_SET_VALUES:
784:     break;
785:   default:
786:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
787:   }
788:   return(0);
789: }

791: PETSC_STATIC_INLINE PetscErrorCode Scatter_MPI1_5(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
792: {
793:   PetscInt i,idx,idy;

796:   switch (addv) {
797:   case INSERT_VALUES:
798:   case INSERT_ALL_VALUES:
799:     for (i=0; i<n; i++) {
800:       idx      = *indicesx++;
801:       idy      = *indicesy++;
802:       y[idy]   = x[idx];
803:       y[idy+1] = x[idx+1];
804:       y[idy+2] = x[idx+2];
805:       y[idy+3] = x[idx+3];
806:       y[idy+4] = x[idx+4];
807:     }
808:     break;
809:   case ADD_VALUES:
810:   case ADD_ALL_VALUES:
811:     for (i=0; i<n; i++) {
812:       idx       = *indicesx++;
813:       idy       = *indicesy++;
814:       y[idy]   += x[idx];
815:       y[idy+1] += x[idx+1];
816:       y[idy+2] += x[idx+2];
817:       y[idy+3] += x[idx+3];
818:       y[idy+4] += x[idx+4];
819:     }
820:     break;
821: #if !defined(PETSC_USE_COMPLEX)
822:   case MAX_VALUES:
823:     for (i=0; i<n; i++) {
824:       idx      = *indicesx++;
825:       idy      = *indicesy++;
826:       y[idy]   = PetscMax(y[idy],x[idx]);
827:       y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
828:       y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
829:       y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
830:       y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
831:     }
832: #else
833:   case MAX_VALUES:
834: #endif
835:   case NOT_SET_VALUES:
836:     break;
837:   default:
838:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
839:   }
840:   return(0);
841: }
842: /* ----------------------------------------------------------------------------------------------- */
843: PETSC_STATIC_INLINE void Pack_MPI1_6(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
844: {
845:   PetscInt i,idx;

847:   for (i=0; i<n; i++) {
848:     idx  = *indicesx++;
849:     y[0] = x[idx];
850:     y[1] = x[idx+1];
851:     y[2] = x[idx+2];
852:     y[3] = x[idx+3];
853:     y[4] = x[idx+4];
854:     y[5] = x[idx+5];
855:     y   += 6;
856:   }
857: }

859: PETSC_STATIC_INLINE PetscErrorCode UnPack_MPI1_6(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
860: {
861:   PetscInt i,idy;

864:   switch (addv) {
865:   case INSERT_VALUES:
866:   case INSERT_ALL_VALUES:
867:     for (i=0; i<n; i++) {
868:       idy      = *indicesy++;
869:       y[idy]   = x[0];
870:       y[idy+1] = x[1];
871:       y[idy+2] = x[2];
872:       y[idy+3] = x[3];
873:       y[idy+4] = x[4];
874:       y[idy+5] = x[5];
875:       x       += 6;
876:     }
877:     break;
878:   case ADD_VALUES:
879:   case ADD_ALL_VALUES:
880:     for (i=0; i<n; i++) {
881:       idy       = *indicesy++;
882:       y[idy]   += x[0];
883:       y[idy+1] += x[1];
884:       y[idy+2] += x[2];
885:       y[idy+3] += x[3];
886:       y[idy+4] += x[4];
887:       y[idy+5] += x[5];
888:       x        += 6;
889:     }
890:     break;
891: #if !defined(PETSC_USE_COMPLEX)
892:   case MAX_VALUES:
893:     for (i=0; i<n; i++) {
894:       idy      = *indicesy++;
895:       y[idy]   = PetscMax(y[idy],x[0]);
896:       y[idy+1] = PetscMax(y[idy+1],x[1]);
897:       y[idy+2] = PetscMax(y[idy+2],x[2]);
898:       y[idy+3] = PetscMax(y[idy+3],x[3]);
899:       y[idy+4] = PetscMax(y[idy+4],x[4]);
900:       y[idy+5] = PetscMax(y[idy+5],x[5]);
901:       x       += 6;
902:     }
903: #else
904:   case MAX_VALUES:
905: #endif
906:   case NOT_SET_VALUES:
907:     break;
908:   default:
909:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
910:   }
911:   return(0);
912: }

914: PETSC_STATIC_INLINE PetscErrorCode Scatter_MPI1_6(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
915: {
916:   PetscInt i,idx,idy;

919:   switch (addv) {
920:   case INSERT_VALUES:
921:   case INSERT_ALL_VALUES:
922:     for (i=0; i<n; i++) {
923:       idx      = *indicesx++;
924:       idy      = *indicesy++;
925:       y[idy]   = x[idx];
926:       y[idy+1] = x[idx+1];
927:       y[idy+2] = x[idx+2];
928:       y[idy+3] = x[idx+3];
929:       y[idy+4] = x[idx+4];
930:       y[idy+5] = x[idx+5];
931:     }
932:     break;
933:   case ADD_VALUES:
934:   case ADD_ALL_VALUES:
935:     for (i=0; i<n; i++) {
936:       idx       = *indicesx++;
937:       idy       = *indicesy++;
938:       y[idy]   += x[idx];
939:       y[idy+1] += x[idx+1];
940:       y[idy+2] += x[idx+2];
941:       y[idy+3] += x[idx+3];
942:       y[idy+4] += x[idx+4];
943:       y[idy+5] += x[idx+5];
944:     }
945:     break;
946: #if !defined(PETSC_USE_COMPLEX)
947:   case MAX_VALUES:
948:     for (i=0; i<n; i++) {
949:       idx      = *indicesx++;
950:       idy      = *indicesy++;
951:       y[idy]   = PetscMax(y[idy],x[idx]);
952:       y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
953:       y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
954:       y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
955:       y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
956:       y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
957:     }
958: #else
959:   case MAX_VALUES:
960: #endif
961:   case NOT_SET_VALUES:
962:     break;
963:   default:
964:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
965:   }
966:   return(0);
967: }
968: /* ----------------------------------------------------------------------------------------------- */
969: PETSC_STATIC_INLINE void Pack_MPI1_7(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
970: {
971:   PetscInt i,idx;

973:   for (i=0; i<n; i++) {
974:     idx  = *indicesx++;
975:     y[0] = x[idx];
976:     y[1] = x[idx+1];
977:     y[2] = x[idx+2];
978:     y[3] = x[idx+3];
979:     y[4] = x[idx+4];
980:     y[5] = x[idx+5];
981:     y[6] = x[idx+6];
982:     y   += 7;
983:   }
984: }

986: PETSC_STATIC_INLINE PetscErrorCode UnPack_MPI1_7(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
987: {
988:   PetscInt i,idy;

991:   switch (addv) {
992:   case INSERT_VALUES:
993:   case INSERT_ALL_VALUES:
994:     for (i=0; i<n; i++) {
995:       idy      = *indicesy++;
996:       y[idy]   = x[0];
997:       y[idy+1] = x[1];
998:       y[idy+2] = x[2];
999:       y[idy+3] = x[3];
1000:       y[idy+4] = x[4];
1001:       y[idy+5] = x[5];
1002:       y[idy+6] = x[6];
1003:       x       += 7;
1004:     }
1005:     break;
1006:   case ADD_VALUES:
1007:   case ADD_ALL_VALUES:
1008:     for (i=0; i<n; i++) {
1009:       idy       = *indicesy++;
1010:       y[idy]   += x[0];
1011:       y[idy+1] += x[1];
1012:       y[idy+2] += x[2];
1013:       y[idy+3] += x[3];
1014:       y[idy+4] += x[4];
1015:       y[idy+5] += x[5];
1016:       y[idy+6] += x[6];
1017:       x        += 7;
1018:     }
1019:     break;
1020: #if !defined(PETSC_USE_COMPLEX)
1021:   case MAX_VALUES:
1022:     for (i=0; i<n; i++) {
1023:       idy      = *indicesy++;
1024:       y[idy]   = PetscMax(y[idy],x[0]);
1025:       y[idy+1] = PetscMax(y[idy+1],x[1]);
1026:       y[idy+2] = PetscMax(y[idy+2],x[2]);
1027:       y[idy+3] = PetscMax(y[idy+3],x[3]);
1028:       y[idy+4] = PetscMax(y[idy+4],x[4]);
1029:       y[idy+5] = PetscMax(y[idy+5],x[5]);
1030:       y[idy+6] = PetscMax(y[idy+6],x[6]);
1031:       x       += 7;
1032:     }
1033: #else
1034:   case MAX_VALUES:
1035: #endif
1036:   case NOT_SET_VALUES:
1037:     break;
1038:   default:
1039:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1040:   }
1041:   return(0);
1042: }

1044: PETSC_STATIC_INLINE PetscErrorCode Scatter_MPI1_7(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1045: {
1046:   PetscInt i,idx,idy;

1049:   switch (addv) {
1050:   case INSERT_VALUES:
1051:   case INSERT_ALL_VALUES:
1052:     for (i=0; i<n; i++) {
1053:       idx      = *indicesx++;
1054:       idy      = *indicesy++;
1055:       y[idy]   = x[idx];
1056:       y[idy+1] = x[idx+1];
1057:       y[idy+2] = x[idx+2];
1058:       y[idy+3] = x[idx+3];
1059:       y[idy+4] = x[idx+4];
1060:       y[idy+5] = x[idx+5];
1061:       y[idy+6] = x[idx+6];
1062:     }
1063:     break;
1064:   case ADD_VALUES:
1065:   case ADD_ALL_VALUES:
1066:     for (i=0; i<n; i++) {
1067:       idx       = *indicesx++;
1068:       idy       = *indicesy++;
1069:       y[idy]   += x[idx];
1070:       y[idy+1] += x[idx+1];
1071:       y[idy+2] += x[idx+2];
1072:       y[idy+3] += x[idx+3];
1073:       y[idy+4] += x[idx+4];
1074:       y[idy+5] += x[idx+5];
1075:       y[idy+6] += x[idx+6];
1076:     }
1077:     break;
1078: #if !defined(PETSC_USE_COMPLEX)
1079:   case MAX_VALUES:
1080:     for (i=0; i<n; i++) {
1081:       idx      = *indicesx++;
1082:       idy      = *indicesy++;
1083:       y[idy]   = PetscMax(y[idy],x[idx]);
1084:       y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
1085:       y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
1086:       y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
1087:       y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1088:       y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
1089:       y[idy+6] = PetscMax(y[idy+6],x[idx+6]);
1090:     }
1091: #else
1092:   case MAX_VALUES:
1093: #endif
1094:   case NOT_SET_VALUES:
1095:     break;
1096:   default:
1097:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1098:   }
1099:   return(0);
1100: }
1101: /* ----------------------------------------------------------------------------------------------- */
1102: PETSC_STATIC_INLINE void Pack_MPI1_8(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1103: {
1104:   PetscInt i,idx;

1106:   for (i=0; i<n; i++) {
1107:     idx  = *indicesx++;
1108:     y[0] = x[idx];
1109:     y[1] = x[idx+1];
1110:     y[2] = x[idx+2];
1111:     y[3] = x[idx+3];
1112:     y[4] = x[idx+4];
1113:     y[5] = x[idx+5];
1114:     y[6] = x[idx+6];
1115:     y[7] = x[idx+7];
1116:     y   += 8;
1117:   }
1118: }

1120: PETSC_STATIC_INLINE PetscErrorCode UnPack_MPI1_8(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1121: {
1122:   PetscInt i,idy;

1125:   switch (addv) {
1126:   case INSERT_VALUES:
1127:   case INSERT_ALL_VALUES:
1128:     for (i=0; i<n; i++) {
1129:       idy      = *indicesy++;
1130:       y[idy]   = x[0];
1131:       y[idy+1] = x[1];
1132:       y[idy+2] = x[2];
1133:       y[idy+3] = x[3];
1134:       y[idy+4] = x[4];
1135:       y[idy+5] = x[5];
1136:       y[idy+6] = x[6];
1137:       y[idy+7] = x[7];
1138:       x       += 8;
1139:     }
1140:     break;
1141:   case ADD_VALUES:
1142:   case ADD_ALL_VALUES:
1143:     for (i=0; i<n; i++) {
1144:       idy       = *indicesy++;
1145:       y[idy]   += x[0];
1146:       y[idy+1] += x[1];
1147:       y[idy+2] += x[2];
1148:       y[idy+3] += x[3];
1149:       y[idy+4] += x[4];
1150:       y[idy+5] += x[5];
1151:       y[idy+6] += x[6];
1152:       y[idy+7] += x[7];
1153:       x        += 8;
1154:     }
1155:     break;
1156: #if !defined(PETSC_USE_COMPLEX)
1157:   case MAX_VALUES:
1158:     for (i=0; i<n; i++) {
1159:       idy      = *indicesy++;
1160:       y[idy]   = PetscMax(y[idy],x[0]);
1161:       y[idy+1] = PetscMax(y[idy+1],x[1]);
1162:       y[idy+2] = PetscMax(y[idy+2],x[2]);
1163:       y[idy+3] = PetscMax(y[idy+3],x[3]);
1164:       y[idy+4] = PetscMax(y[idy+4],x[4]);
1165:       y[idy+5] = PetscMax(y[idy+5],x[5]);
1166:       y[idy+6] = PetscMax(y[idy+6],x[6]);
1167:       y[idy+7] = PetscMax(y[idy+7],x[7]);
1168:       x       += 8;
1169:     }
1170: #else
1171:   case MAX_VALUES:
1172: #endif
1173:   case NOT_SET_VALUES:
1174:     break;
1175:   default:
1176:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1177:   }
1178:   return(0);
1179: }

1181: PETSC_STATIC_INLINE PetscErrorCode Scatter_MPI1_8(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1182: {
1183:   PetscInt i,idx,idy;

1186:   switch (addv) {
1187:   case INSERT_VALUES:
1188:   case INSERT_ALL_VALUES:
1189:     for (i=0; i<n; i++) {
1190:       idx      = *indicesx++;
1191:       idy      = *indicesy++;
1192:       y[idy]   = x[idx];
1193:       y[idy+1] = x[idx+1];
1194:       y[idy+2] = x[idx+2];
1195:       y[idy+3] = x[idx+3];
1196:       y[idy+4] = x[idx+4];
1197:       y[idy+5] = x[idx+5];
1198:       y[idy+6] = x[idx+6];
1199:       y[idy+7] = x[idx+7];
1200:     }
1201:     break;
1202:   case ADD_VALUES:
1203:   case ADD_ALL_VALUES:
1204:     for (i=0; i<n; i++) {
1205:       idx       = *indicesx++;
1206:       idy       = *indicesy++;
1207:       y[idy]   += x[idx];
1208:       y[idy+1] += x[idx+1];
1209:       y[idy+2] += x[idx+2];
1210:       y[idy+3] += x[idx+3];
1211:       y[idy+4] += x[idx+4];
1212:       y[idy+5] += x[idx+5];
1213:       y[idy+6] += x[idx+6];
1214:       y[idy+7] += x[idx+7];
1215:     }
1216:     break;
1217: #if !defined(PETSC_USE_COMPLEX)
1218:   case MAX_VALUES:
1219:     for (i=0; i<n; i++) {
1220:       idx      = *indicesx++;
1221:       idy      = *indicesy++;
1222:       y[idy]   = PetscMax(y[idy],x[idx]);
1223:       y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
1224:       y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
1225:       y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
1226:       y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1227:       y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
1228:       y[idy+6] = PetscMax(y[idy+6],x[idx+6]);
1229:       y[idy+7] = PetscMax(y[idy+7],x[idx+7]);
1230:     }
1231: #else
1232:   case MAX_VALUES:
1233: #endif
1234:   case NOT_SET_VALUES:
1235:     break;
1236:   default:
1237:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1238:   }
1239:   return(0);
1240: }

1242: PETSC_STATIC_INLINE void Pack_MPI1_9(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1243: {
1244:   PetscInt i,idx;

1246:   for (i=0; i<n; i++) {
1247:     idx   = *indicesx++;
1248:     y[0]  = x[idx];
1249:     y[1]  = x[idx+1];
1250:     y[2]  = x[idx+2];
1251:     y[3]  = x[idx+3];
1252:     y[4]  = x[idx+4];
1253:     y[5]  = x[idx+5];
1254:     y[6]  = x[idx+6];
1255:     y[7]  = x[idx+7];
1256:     y[8]  = x[idx+8];
1257:     y    += 9;
1258:   }
1259: }

1261: PETSC_STATIC_INLINE PetscErrorCode UnPack_MPI1_9(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1262: {
1263:   PetscInt i,idy;

1266:   switch (addv) {
1267:   case INSERT_VALUES:
1268:   case INSERT_ALL_VALUES:
1269:     for (i=0; i<n; i++) {
1270:       idy       = *indicesy++;
1271:       y[idy]    = x[0];
1272:       y[idy+1]  = x[1];
1273:       y[idy+2]  = x[2];
1274:       y[idy+3]  = x[3];
1275:       y[idy+4]  = x[4];
1276:       y[idy+5]  = x[5];
1277:       y[idy+6]  = x[6];
1278:       y[idy+7]  = x[7];
1279:       y[idy+8]  = x[8];
1280:       x        += 9;
1281:     }
1282:     break;
1283:   case ADD_VALUES:
1284:   case ADD_ALL_VALUES:
1285:     for (i=0; i<n; i++) {
1286:       idy        = *indicesy++;
1287:       y[idy]    += x[0];
1288:       y[idy+1]  += x[1];
1289:       y[idy+2]  += x[2];
1290:       y[idy+3]  += x[3];
1291:       y[idy+4]  += x[4];
1292:       y[idy+5]  += x[5];
1293:       y[idy+6]  += x[6];
1294:       y[idy+7]  += x[7];
1295:       y[idy+8]  += x[8];
1296:       x         += 9;
1297:     }
1298:     break;
1299: #if !defined(PETSC_USE_COMPLEX)
1300:   case MAX_VALUES:
1301:     for (i=0; i<n; i++) {
1302:       idy       = *indicesy++;
1303:       y[idy]    = PetscMax(y[idy],x[0]);
1304:       y[idy+1]  = PetscMax(y[idy+1],x[1]);
1305:       y[idy+2]  = PetscMax(y[idy+2],x[2]);
1306:       y[idy+3]  = PetscMax(y[idy+3],x[3]);
1307:       y[idy+4]  = PetscMax(y[idy+4],x[4]);
1308:       y[idy+5]  = PetscMax(y[idy+5],x[5]);
1309:       y[idy+6]  = PetscMax(y[idy+6],x[6]);
1310:       y[idy+7]  = PetscMax(y[idy+7],x[7]);
1311:       y[idy+8]  = PetscMax(y[idy+8],x[8]);
1312:       x        += 9;
1313:     }
1314: #else
1315:   case MAX_VALUES:
1316: #endif
1317:   case NOT_SET_VALUES:
1318:     break;
1319:   default:
1320:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1321:   }
1322:   return(0);
1323: }

1325: PETSC_STATIC_INLINE PetscErrorCode Scatter_MPI1_9(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1326: {
1327:   PetscInt i,idx,idy;

1330:   switch (addv) {
1331:   case INSERT_VALUES:
1332:   case INSERT_ALL_VALUES:
1333:     for (i=0; i<n; i++) {
1334:       idx       = *indicesx++;
1335:       idy       = *indicesy++;
1336:       y[idy]    = x[idx];
1337:       y[idy+1]  = x[idx+1];
1338:       y[idy+2]  = x[idx+2];
1339:       y[idy+3]  = x[idx+3];
1340:       y[idy+4]  = x[idx+4];
1341:       y[idy+5]  = x[idx+5];
1342:       y[idy+6]  = x[idx+6];
1343:       y[idy+7]  = x[idx+7];
1344:       y[idy+8]  = x[idx+8];
1345:     }
1346:     break;
1347:   case ADD_VALUES:
1348:   case ADD_ALL_VALUES:
1349:     for (i=0; i<n; i++) {
1350:       idx        = *indicesx++;
1351:       idy        = *indicesy++;
1352:       y[idy]    += x[idx];
1353:       y[idy+1]  += x[idx+1];
1354:       y[idy+2]  += x[idx+2];
1355:       y[idy+3]  += x[idx+3];
1356:       y[idy+4]  += x[idx+4];
1357:       y[idy+5]  += x[idx+5];
1358:       y[idy+6]  += x[idx+6];
1359:       y[idy+7]  += x[idx+7];
1360:       y[idy+8]  += x[idx+8];
1361:     }
1362:     break;
1363: #if !defined(PETSC_USE_COMPLEX)
1364:   case MAX_VALUES:
1365:     for (i=0; i<n; i++) {
1366:       idx       = *indicesx++;
1367:       idy       = *indicesy++;
1368:       y[idy]    = PetscMax(y[idy],x[idx]);
1369:       y[idy+1]  = PetscMax(y[idy+1],x[idx+1]);
1370:       y[idy+2]  = PetscMax(y[idy+2],x[idx+2]);
1371:       y[idy+3]  = PetscMax(y[idy+3],x[idx+3]);
1372:       y[idy+4]  = PetscMax(y[idy+4],x[idx+4]);
1373:       y[idy+5]  = PetscMax(y[idy+5],x[idx+5]);
1374:       y[idy+6]  = PetscMax(y[idy+6],x[idx+6]);
1375:       y[idy+7]  = PetscMax(y[idy+7],x[idx+7]);
1376:       y[idy+8]  = PetscMax(y[idy+8],x[idx+8]);
1377:     }
1378: #else
1379:   case MAX_VALUES:
1380: #endif
1381:   case NOT_SET_VALUES:
1382:     break;
1383:   default:
1384:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1385:   }
1386:   return(0);
1387: }

1389: PETSC_STATIC_INLINE void Pack_MPI1_10(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1390: {
1391:   PetscInt i,idx;

1393:   for (i=0; i<n; i++) {
1394:     idx   = *indicesx++;
1395:     y[0]  = x[idx];
1396:     y[1]  = x[idx+1];
1397:     y[2]  = x[idx+2];
1398:     y[3]  = x[idx+3];
1399:     y[4]  = x[idx+4];
1400:     y[5]  = x[idx+5];
1401:     y[6]  = x[idx+6];
1402:     y[7]  = x[idx+7];
1403:     y[8]  = x[idx+8];
1404:     y[9]  = x[idx+9];
1405:     y    += 10;
1406:   }
1407: }

1409: PETSC_STATIC_INLINE PetscErrorCode UnPack_MPI1_10(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1410: {
1411:   PetscInt i,idy;

1414:   switch (addv) {
1415:   case INSERT_VALUES:
1416:   case INSERT_ALL_VALUES:
1417:     for (i=0; i<n; i++) {
1418:       idy       = *indicesy++;
1419:       y[idy]    = x[0];
1420:       y[idy+1]  = x[1];
1421:       y[idy+2]  = x[2];
1422:       y[idy+3]  = x[3];
1423:       y[idy+4]  = x[4];
1424:       y[idy+5]  = x[5];
1425:       y[idy+6]  = x[6];
1426:       y[idy+7]  = x[7];
1427:       y[idy+8]  = x[8];
1428:       y[idy+9]  = x[9];
1429:       x        += 10;
1430:     }
1431:     break;
1432:   case ADD_VALUES:
1433:   case ADD_ALL_VALUES:
1434:     for (i=0; i<n; i++) {
1435:       idy        = *indicesy++;
1436:       y[idy]    += x[0];
1437:       y[idy+1]  += x[1];
1438:       y[idy+2]  += x[2];
1439:       y[idy+3]  += x[3];
1440:       y[idy+4]  += x[4];
1441:       y[idy+5]  += x[5];
1442:       y[idy+6]  += x[6];
1443:       y[idy+7]  += x[7];
1444:       y[idy+8]  += x[8];
1445:       y[idy+9]  += x[9];
1446:       x         += 10;
1447:     }
1448:     break;
1449: #if !defined(PETSC_USE_COMPLEX)
1450:   case MAX_VALUES:
1451:     for (i=0; i<n; i++) {
1452:       idy       = *indicesy++;
1453:       y[idy]    = PetscMax(y[idy],x[0]);
1454:       y[idy+1]  = PetscMax(y[idy+1],x[1]);
1455:       y[idy+2]  = PetscMax(y[idy+2],x[2]);
1456:       y[idy+3]  = PetscMax(y[idy+3],x[3]);
1457:       y[idy+4]  = PetscMax(y[idy+4],x[4]);
1458:       y[idy+5]  = PetscMax(y[idy+5],x[5]);
1459:       y[idy+6]  = PetscMax(y[idy+6],x[6]);
1460:       y[idy+7]  = PetscMax(y[idy+7],x[7]);
1461:       y[idy+8]  = PetscMax(y[idy+8],x[8]);
1462:       y[idy+9]  = PetscMax(y[idy+9],x[9]);
1463:       x        += 10;
1464:     }
1465: #else
1466:   case MAX_VALUES:
1467: #endif
1468:   case NOT_SET_VALUES:
1469:     break;
1470:   default:
1471:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1472:   }
1473:   return(0);
1474: }

1476: PETSC_STATIC_INLINE PetscErrorCode Scatter_MPI1_10(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1477: {
1478:   PetscInt i,idx,idy;

1481:   switch (addv) {
1482:   case INSERT_VALUES:
1483:   case INSERT_ALL_VALUES:
1484:     for (i=0; i<n; i++) {
1485:       idx       = *indicesx++;
1486:       idy       = *indicesy++;
1487:       y[idy]    = x[idx];
1488:       y[idy+1]  = x[idx+1];
1489:       y[idy+2]  = x[idx+2];
1490:       y[idy+3]  = x[idx+3];
1491:       y[idy+4]  = x[idx+4];
1492:       y[idy+5]  = x[idx+5];
1493:       y[idy+6]  = x[idx+6];
1494:       y[idy+7]  = x[idx+7];
1495:       y[idy+8]  = x[idx+8];
1496:       y[idy+9]  = x[idx+9];
1497:     }
1498:     break;
1499:   case ADD_VALUES:
1500:   case ADD_ALL_VALUES:
1501:     for (i=0; i<n; i++) {
1502:       idx        = *indicesx++;
1503:       idy        = *indicesy++;
1504:       y[idy]    += x[idx];
1505:       y[idy+1]  += x[idx+1];
1506:       y[idy+2]  += x[idx+2];
1507:       y[idy+3]  += x[idx+3];
1508:       y[idy+4]  += x[idx+4];
1509:       y[idy+5]  += x[idx+5];
1510:       y[idy+6]  += x[idx+6];
1511:       y[idy+7]  += x[idx+7];
1512:       y[idy+8]  += x[idx+8];
1513:       y[idy+9]  += x[idx+9];
1514:     }
1515:     break;
1516: #if !defined(PETSC_USE_COMPLEX)
1517:   case MAX_VALUES:
1518:     for (i=0; i<n; i++) {
1519:       idx       = *indicesx++;
1520:       idy       = *indicesy++;
1521:       y[idy]    = PetscMax(y[idy],x[idx]);
1522:       y[idy+1]  = PetscMax(y[idy+1],x[idx+1]);
1523:       y[idy+2]  = PetscMax(y[idy+2],x[idx+2]);
1524:       y[idy+3]  = PetscMax(y[idy+3],x[idx+3]);
1525:       y[idy+4]  = PetscMax(y[idy+4],x[idx+4]);
1526:       y[idy+5]  = PetscMax(y[idy+5],x[idx+5]);
1527:       y[idy+6]  = PetscMax(y[idy+6],x[idx+6]);
1528:       y[idy+7]  = PetscMax(y[idy+7],x[idx+7]);
1529:       y[idy+8]  = PetscMax(y[idy+8],x[idx+8]);
1530:       y[idy+9]  = PetscMax(y[idy+9],x[idx+9]);
1531:     }
1532: #else
1533:   case MAX_VALUES:
1534: #endif
1535:   case NOT_SET_VALUES:
1536:     break;
1537:   default:
1538:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1539:   }
1540:   return(0);
1541: }

1543: PETSC_STATIC_INLINE void Pack_MPI1_11(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1544: {
1545:   PetscInt i,idx;

1547:   for (i=0; i<n; i++) {
1548:     idx   = *indicesx++;
1549:     y[0]  = x[idx];
1550:     y[1]  = x[idx+1];
1551:     y[2]  = x[idx+2];
1552:     y[3]  = x[idx+3];
1553:     y[4]  = x[idx+4];
1554:     y[5]  = x[idx+5];
1555:     y[6]  = x[idx+6];
1556:     y[7]  = x[idx+7];
1557:     y[8]  = x[idx+8];
1558:     y[9]  = x[idx+9];
1559:     y[10] = x[idx+10];
1560:     y    += 11;
1561:   }
1562: }

1564: PETSC_STATIC_INLINE PetscErrorCode UnPack_MPI1_11(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1565: {
1566:   PetscInt i,idy;

1569:   switch (addv) {
1570:   case INSERT_VALUES:
1571:   case INSERT_ALL_VALUES:
1572:     for (i=0; i<n; i++) {
1573:       idy       = *indicesy++;
1574:       y[idy]    = x[0];
1575:       y[idy+1]  = x[1];
1576:       y[idy+2]  = x[2];
1577:       y[idy+3]  = x[3];
1578:       y[idy+4]  = x[4];
1579:       y[idy+5]  = x[5];
1580:       y[idy+6]  = x[6];
1581:       y[idy+7]  = x[7];
1582:       y[idy+8]  = x[8];
1583:       y[idy+9]  = x[9];
1584:       y[idy+10] = x[10];
1585:       x        += 11;
1586:     }
1587:     break;
1588:   case ADD_VALUES:
1589:   case ADD_ALL_VALUES:
1590:     for (i=0; i<n; i++) {
1591:       idy        = *indicesy++;
1592:       y[idy]    += x[0];
1593:       y[idy+1]  += x[1];
1594:       y[idy+2]  += x[2];
1595:       y[idy+3]  += x[3];
1596:       y[idy+4]  += x[4];
1597:       y[idy+5]  += x[5];
1598:       y[idy+6]  += x[6];
1599:       y[idy+7]  += x[7];
1600:       y[idy+8]  += x[8];
1601:       y[idy+9]  += x[9];
1602:       y[idy+10] += x[10];
1603:       x         += 11;
1604:     }
1605:     break;
1606: #if !defined(PETSC_USE_COMPLEX)
1607:   case MAX_VALUES:
1608:     for (i=0; i<n; i++) {
1609:       idy       = *indicesy++;
1610:       y[idy]    = PetscMax(y[idy],x[0]);
1611:       y[idy+1]  = PetscMax(y[idy+1],x[1]);
1612:       y[idy+2]  = PetscMax(y[idy+2],x[2]);
1613:       y[idy+3]  = PetscMax(y[idy+3],x[3]);
1614:       y[idy+4]  = PetscMax(y[idy+4],x[4]);
1615:       y[idy+5]  = PetscMax(y[idy+5],x[5]);
1616:       y[idy+6]  = PetscMax(y[idy+6],x[6]);
1617:       y[idy+7]  = PetscMax(y[idy+7],x[7]);
1618:       y[idy+8]  = PetscMax(y[idy+8],x[8]);
1619:       y[idy+9]  = PetscMax(y[idy+9],x[9]);
1620:       y[idy+10] = PetscMax(y[idy+10],x[10]);
1621:       x        += 11;
1622:     }
1623: #else
1624:   case MAX_VALUES:
1625: #endif
1626:   case NOT_SET_VALUES:
1627:     break;
1628:   default:
1629:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1630:   }
1631:   return(0);
1632: }

1634: PETSC_STATIC_INLINE PetscErrorCode Scatter_MPI1_11(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1635: {
1636:   PetscInt i,idx,idy;

1639:   switch (addv) {
1640:   case INSERT_VALUES:
1641:   case INSERT_ALL_VALUES:
1642:     for (i=0; i<n; i++) {
1643:       idx       = *indicesx++;
1644:       idy       = *indicesy++;
1645:       y[idy]    = x[idx];
1646:       y[idy+1]  = x[idx+1];
1647:       y[idy+2]  = x[idx+2];
1648:       y[idy+3]  = x[idx+3];
1649:       y[idy+4]  = x[idx+4];
1650:       y[idy+5]  = x[idx+5];
1651:       y[idy+6]  = x[idx+6];
1652:       y[idy+7]  = x[idx+7];
1653:       y[idy+8]  = x[idx+8];
1654:       y[idy+9]  = x[idx+9];
1655:       y[idy+10] = x[idx+10];
1656:     }
1657:     break;
1658:   case ADD_VALUES:
1659:   case ADD_ALL_VALUES:
1660:     for (i=0; i<n; i++) {
1661:       idx        = *indicesx++;
1662:       idy        = *indicesy++;
1663:       y[idy]    += x[idx];
1664:       y[idy+1]  += x[idx+1];
1665:       y[idy+2]  += x[idx+2];
1666:       y[idy+3]  += x[idx+3];
1667:       y[idy+4]  += x[idx+4];
1668:       y[idy+5]  += x[idx+5];
1669:       y[idy+6]  += x[idx+6];
1670:       y[idy+7]  += x[idx+7];
1671:       y[idy+8]  += x[idx+8];
1672:       y[idy+9]  += x[idx+9];
1673:       y[idy+10] += x[idx+10];
1674:     }
1675:     break;
1676: #if !defined(PETSC_USE_COMPLEX)
1677:   case MAX_VALUES:
1678:     for (i=0; i<n; i++) {
1679:       idx       = *indicesx++;
1680:       idy       = *indicesy++;
1681:       y[idy]    = PetscMax(y[idy],x[idx]);
1682:       y[idy+1]  = PetscMax(y[idy+1],x[idx+1]);
1683:       y[idy+2]  = PetscMax(y[idy+2],x[idx+2]);
1684:       y[idy+3]  = PetscMax(y[idy+3],x[idx+3]);
1685:       y[idy+4]  = PetscMax(y[idy+4],x[idx+4]);
1686:       y[idy+5]  = PetscMax(y[idy+5],x[idx+5]);
1687:       y[idy+6]  = PetscMax(y[idy+6],x[idx+6]);
1688:       y[idy+7]  = PetscMax(y[idy+7],x[idx+7]);
1689:       y[idy+8]  = PetscMax(y[idy+8],x[idx+8]);
1690:       y[idy+9]  = PetscMax(y[idy+9],x[idx+9]);
1691:       y[idy+10] = PetscMax(y[idy+10],x[idx+10]);
1692:     }
1693: #else
1694:   case MAX_VALUES:
1695: #endif
1696:   case NOT_SET_VALUES:
1697:     break;
1698:   default:
1699:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1700:   }
1701:   return(0);
1702: }

1704: /* ----------------------------------------------------------------------------------------------- */
1705: PETSC_STATIC_INLINE void Pack_MPI1_12(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1706: {
1707:   PetscInt i,idx;

1709:   for (i=0; i<n; i++) {
1710:     idx   = *indicesx++;
1711:     y[0]  = x[idx];
1712:     y[1]  = x[idx+1];
1713:     y[2]  = x[idx+2];
1714:     y[3]  = x[idx+3];
1715:     y[4]  = x[idx+4];
1716:     y[5]  = x[idx+5];
1717:     y[6]  = x[idx+6];
1718:     y[7]  = x[idx+7];
1719:     y[8]  = x[idx+8];
1720:     y[9]  = x[idx+9];
1721:     y[10] = x[idx+10];
1722:     y[11] = x[idx+11];
1723:     y    += 12;
1724:   }
1725: }

1727: PETSC_STATIC_INLINE PetscErrorCode UnPack_MPI1_12(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1728: {
1729:   PetscInt i,idy;

1732:   switch (addv) {
1733:   case INSERT_VALUES:
1734:   case INSERT_ALL_VALUES:
1735:     for (i=0; i<n; i++) {
1736:       idy       = *indicesy++;
1737:       y[idy]    = x[0];
1738:       y[idy+1]  = x[1];
1739:       y[idy+2]  = x[2];
1740:       y[idy+3]  = x[3];
1741:       y[idy+4]  = x[4];
1742:       y[idy+5]  = x[5];
1743:       y[idy+6]  = x[6];
1744:       y[idy+7]  = x[7];
1745:       y[idy+8]  = x[8];
1746:       y[idy+9]  = x[9];
1747:       y[idy+10] = x[10];
1748:       y[idy+11] = x[11];
1749:       x        += 12;
1750:     }
1751:     break;
1752:   case ADD_VALUES:
1753:   case ADD_ALL_VALUES:
1754:     for (i=0; i<n; i++) {
1755:       idy        = *indicesy++;
1756:       y[idy]    += x[0];
1757:       y[idy+1]  += x[1];
1758:       y[idy+2]  += x[2];
1759:       y[idy+3]  += x[3];
1760:       y[idy+4]  += x[4];
1761:       y[idy+5]  += x[5];
1762:       y[idy+6]  += x[6];
1763:       y[idy+7]  += x[7];
1764:       y[idy+8]  += x[8];
1765:       y[idy+9]  += x[9];
1766:       y[idy+10] += x[10];
1767:       y[idy+11] += x[11];
1768:       x         += 12;
1769:     }
1770:     break;
1771: #if !defined(PETSC_USE_COMPLEX)
1772:   case MAX_VALUES:
1773:     for (i=0; i<n; i++) {
1774:       idy       = *indicesy++;
1775:       y[idy]    = PetscMax(y[idy],x[0]);
1776:       y[idy+1]  = PetscMax(y[idy+1],x[1]);
1777:       y[idy+2]  = PetscMax(y[idy+2],x[2]);
1778:       y[idy+3]  = PetscMax(y[idy+3],x[3]);
1779:       y[idy+4]  = PetscMax(y[idy+4],x[4]);
1780:       y[idy+5]  = PetscMax(y[idy+5],x[5]);
1781:       y[idy+6]  = PetscMax(y[idy+6],x[6]);
1782:       y[idy+7]  = PetscMax(y[idy+7],x[7]);
1783:       y[idy+8]  = PetscMax(y[idy+8],x[8]);
1784:       y[idy+9]  = PetscMax(y[idy+9],x[9]);
1785:       y[idy+10] = PetscMax(y[idy+10],x[10]);
1786:       y[idy+11] = PetscMax(y[idy+11],x[11]);
1787:       x        += 12;
1788:     }
1789: #else
1790:   case MAX_VALUES:
1791: #endif
1792:   case NOT_SET_VALUES:
1793:     break;
1794:   default:
1795:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1796:   }
1797:   return(0);
1798: }

1800: PETSC_STATIC_INLINE PetscErrorCode Scatter_MPI1_12(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1801: {
1802:   PetscInt i,idx,idy;

1805:   switch (addv) {
1806:   case INSERT_VALUES:
1807:   case INSERT_ALL_VALUES:
1808:     for (i=0; i<n; i++) {
1809:       idx       = *indicesx++;
1810:       idy       = *indicesy++;
1811:       y[idy]    = x[idx];
1812:       y[idy+1]  = x[idx+1];
1813:       y[idy+2]  = x[idx+2];
1814:       y[idy+3]  = x[idx+3];
1815:       y[idy+4]  = x[idx+4];
1816:       y[idy+5]  = x[idx+5];
1817:       y[idy+6]  = x[idx+6];
1818:       y[idy+7]  = x[idx+7];
1819:       y[idy+8]  = x[idx+8];
1820:       y[idy+9]  = x[idx+9];
1821:       y[idy+10] = x[idx+10];
1822:       y[idy+11] = x[idx+11];
1823:     }
1824:     break;
1825:   case ADD_VALUES:
1826:   case ADD_ALL_VALUES:
1827:     for (i=0; i<n; i++) {
1828:       idx        = *indicesx++;
1829:       idy        = *indicesy++;
1830:       y[idy]    += x[idx];
1831:       y[idy+1]  += x[idx+1];
1832:       y[idy+2]  += x[idx+2];
1833:       y[idy+3]  += x[idx+3];
1834:       y[idy+4]  += x[idx+4];
1835:       y[idy+5]  += x[idx+5];
1836:       y[idy+6]  += x[idx+6];
1837:       y[idy+7]  += x[idx+7];
1838:       y[idy+8]  += x[idx+8];
1839:       y[idy+9]  += x[idx+9];
1840:       y[idy+10] += x[idx+10];
1841:       y[idy+11] += x[idx+11];
1842:     }
1843:     break;
1844: #if !defined(PETSC_USE_COMPLEX)
1845:   case MAX_VALUES:
1846:     for (i=0; i<n; i++) {
1847:       idx       = *indicesx++;
1848:       idy       = *indicesy++;
1849:       y[idy]    = PetscMax(y[idy],x[idx]);
1850:       y[idy+1]  = PetscMax(y[idy+1],x[idx+1]);
1851:       y[idy+2]  = PetscMax(y[idy+2],x[idx+2]);
1852:       y[idy+3]  = PetscMax(y[idy+3],x[idx+3]);
1853:       y[idy+4]  = PetscMax(y[idy+4],x[idx+4]);
1854:       y[idy+5]  = PetscMax(y[idy+5],x[idx+5]);
1855:       y[idy+6]  = PetscMax(y[idy+6],x[idx+6]);
1856:       y[idy+7]  = PetscMax(y[idy+7],x[idx+7]);
1857:       y[idy+8]  = PetscMax(y[idy+8],x[idx+8]);
1858:       y[idy+9]  = PetscMax(y[idy+9],x[idx+9]);
1859:       y[idy+10] = PetscMax(y[idy+10],x[idx+10]);
1860:       y[idy+11] = PetscMax(y[idy+11],x[idx+11]);
1861:     }
1862: #else
1863:   case MAX_VALUES:
1864: #endif
1865:   case NOT_SET_VALUES:
1866:     break;
1867:   default:
1868:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1869:   }
1870:   return(0);
1871: }

1873: /* ----------------------------------------------------------------------------------------------- */
1874: PETSC_STATIC_INLINE void Pack_MPI1_bs(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1875: {
1876:   PetscInt       i,idx;

1878:   for (i=0; i<n; i++) {
1879:     idx   = *indicesx++;
1880:     PetscMemcpy(y,x + idx,bs*sizeof(PetscScalar));
1881:     y    += bs;
1882:   }
1883: }

1885: PETSC_STATIC_INLINE PetscErrorCode UnPack_MPI1_bs(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1886: {
1887:   PetscInt i,idy,j;

1890:   switch (addv) {
1891:   case INSERT_VALUES:
1892:   case INSERT_ALL_VALUES:
1893:     for (i=0; i<n; i++) {
1894:       idy       = *indicesy++;
1895:       PetscMemcpy(y + idy,x,bs*sizeof(PetscScalar));
1896:       x        += bs;
1897:     }
1898:     break;
1899:   case ADD_VALUES:
1900:   case ADD_ALL_VALUES:
1901:     for (i=0; i<n; i++) {
1902:       idy        = *indicesy++;
1903:       for (j=0; j<bs; j++) y[idy+j] += x[j];
1904:       x         += bs;
1905:     }
1906:     break;
1907: #if !defined(PETSC_USE_COMPLEX)
1908:   case MAX_VALUES:
1909:     for (i=0; i<n; i++) {
1910:       idy = *indicesy++;
1911:       for (j=0; j<bs; j++) y[idy+j] = PetscMax(y[idy+j],x[j]);
1912:       x  += bs;
1913:     }
1914: #else
1915:   case MAX_VALUES:
1916: #endif
1917:   case NOT_SET_VALUES:
1918:     break;
1919:   default:
1920:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1921:   }
1922:   return(0);
1923: }

1925: PETSC_STATIC_INLINE PetscErrorCode Scatter_MPI1_bs(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1926: {
1927:   PetscInt i,idx,idy,j;

1930:   switch (addv) {
1931:   case INSERT_VALUES:
1932:   case INSERT_ALL_VALUES:
1933:     for (i=0; i<n; i++) {
1934:       idx       = *indicesx++;
1935:       idy       = *indicesy++;
1936:       PetscMemcpy(y + idy, x + idx,bs*sizeof(PetscScalar));
1937:     }
1938:     break;
1939:   case ADD_VALUES:
1940:   case ADD_ALL_VALUES:
1941:     for (i=0; i<n; i++) {
1942:       idx        = *indicesx++;
1943:       idy        = *indicesy++;
1944:       for (j=0; j<bs; j++ )  y[idy+j] += x[idx+j];
1945:     }
1946:     break;
1947: #if !defined(PETSC_USE_COMPLEX)
1948:   case MAX_VALUES:
1949:     for (i=0; i<n; i++) {
1950:       idx       = *indicesx++;
1951:       idy       = *indicesy++;
1952:       for (j=0; j<bs; j++ )  y[idy+j] = PetscMax(y[idy+j],x[idx+j]);
1953:     }
1954: #else
1955:   case MAX_VALUES:
1956: #endif
1957:   case NOT_SET_VALUES:
1958:     break;
1959:   default:
1960:     SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1961:   }
1962:   return(0);
1963: }

1965: /* Create the VecScatterBegin/End_P for our chosen block sizes */
1966: #define BS 1
1967: #include <../src/vec/vscat/impls/vpscat_mpi1.h>
1968: #define BS 2
1969: #include <../src/vec/vscat/impls/vpscat_mpi1.h>
1970: #define BS 3
1971: #include <../src/vec/vscat/impls/vpscat_mpi1.h>
1972: #define BS 4
1973: #include <../src/vec/vscat/impls/vpscat_mpi1.h>
1974: #define BS 5
1975: #include <../src/vec/vscat/impls/vpscat_mpi1.h>
1976: #define BS 6
1977: #include <../src/vec/vscat/impls/vpscat_mpi1.h>
1978: #define BS 7
1979: #include <../src/vec/vscat/impls/vpscat_mpi1.h>
1980: #define BS 8
1981: #include <../src/vec/vscat/impls/vpscat_mpi1.h>
1982: #define BS 9
1983: #include <../src/vec/vscat/impls/vpscat_mpi1.h>
1984: #define BS 10
1985: #include <../src/vec/vscat/impls/vpscat_mpi1.h>
1986: #define BS 11
1987: #include <../src/vec/vscat/impls/vpscat_mpi1.h>
1988: #define BS 12
1989: #include <../src/vec/vscat/impls/vpscat_mpi1.h>
1990: #define BS bs
1991: #include <../src/vec/vscat/impls/vpscat_mpi1.h>

1993: /* ==========================================================================================*/

1995: /*              create parallel to sequential scatter context                           */

1997: PetscErrorCode VecScatterCreateCommon_PtoS_MPI1(VecScatter_MPI_General*,VecScatter_MPI_General*,VecScatter);

1999: /*
2000:    bs indicates how many elements there are in each block. Normally this would be 1.

2002:    contains check that PetscMPIInt can handle the sizes needed
2003: */
2004: PetscErrorCode VecScatterCreateLocal_PtoS_MPI1(PetscInt nx,const PetscInt *inidx,PetscInt ny,const PetscInt *inidy,Vec xin,Vec yin,PetscInt bs,VecScatter ctx)
2005: {
2006:   VecScatter_MPI_General *from,*to;
2007:   PetscMPIInt            size,rank,imdex,tag,n;
2008:   PetscInt               *source = NULL,*owners = NULL,nxr;
2009:   PetscInt               *lowner = NULL,*start = NULL,lengthy,lengthx;
2010:   PetscMPIInt            *nprocs = NULL,nrecvs;
2011:   PetscInt               i,j,idx,nsends;
2012:   PetscMPIInt            *owner = NULL;
2013:   PetscInt               *starts = NULL,count,slen;
2014:   PetscInt               *rvalues,*svalues,base,*values,nprocslocal,recvtotal,*rsvalues;
2015:   PetscMPIInt            *onodes1,*olengths1;
2016:   MPI_Comm               comm;
2017:   MPI_Request            *send_waits = NULL,*recv_waits = NULL;
2018:   MPI_Status             recv_status,*send_status;
2019:   PetscErrorCode         ierr;

2022:   PetscObjectGetNewTag((PetscObject)ctx,&tag);
2023:   PetscObjectGetComm((PetscObject)xin,&comm);
2024:   MPI_Comm_rank(comm,&rank);
2025:   MPI_Comm_size(comm,&size);
2026:   owners = xin->map->range;
2027:   VecGetSize(yin,&lengthy);
2028:   VecGetSize(xin,&lengthx);

2030:   /*  first count number of contributors to each processor */
2031:   /*  owner[i]: owner of ith inidx; nproc[j]: num of inidx to be sent to jth proc */
2032:   PetscMalloc2(size,&nprocs,nx,&owner);
2033:   PetscMemzero(nprocs,size*sizeof(PetscMPIInt));

2035:   j      = 0;
2036:   nsends = 0;
2037:   for (i=0; i<nx; i++) {
2038:     idx = bs*inidx[i];
2039:     if (idx < owners[j]) j = 0;
2040:     for (; j<size; j++) {
2041:       if (idx < owners[j+1]) {
2042:         if (!nprocs[j]++) nsends++;
2043:         owner[i] = j;
2044:         break;
2045:       }
2046:     }
2047:     if (j == size) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"ith %D block entry %D not owned by any process, upper bound %D",i,idx,owners[size]);
2048:   }

2050:   nprocslocal  = nprocs[rank];
2051:   nprocs[rank] = 0;
2052:   if (nprocslocal) nsends--;
2053:   /* inform other processors of number of messages and max length*/
2054:   PetscGatherNumberOfMessages(comm,NULL,nprocs,&nrecvs);
2055:   PetscGatherMessageLengths(comm,nsends,nrecvs,nprocs,&onodes1,&olengths1);
2056:   PetscSortMPIIntWithArray(nrecvs,onodes1,olengths1);
2057:   recvtotal = 0; for (i=0; i<nrecvs; i++) recvtotal += olengths1[i];

2059:   /* post receives:   */
2060:   PetscMalloc3(recvtotal,&rvalues,nrecvs,&source,nrecvs,&recv_waits);
2061:   count = 0;
2062:   for (i=0; i<nrecvs; i++) {
2063:     MPI_Irecv((rvalues+count),olengths1[i],MPIU_INT,onodes1[i],tag,comm,recv_waits+i);
2064:     count += olengths1[i];
2065:   }

2067:   /* do sends:
2068:      1) starts[i] gives the starting index in svalues for stuff going to
2069:      the ith processor
2070:   */
2071:   nxr = 0;
2072:   for (i=0; i<nx; i++) {
2073:     if (owner[i] != rank) nxr++;
2074:   }
2075:   PetscMalloc3(nxr,&svalues,nsends,&send_waits,size+1,&starts);

2077:   starts[0]  = 0;
2078:   for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[i-1];
2079:   for (i=0; i<nx; i++) {
2080:     if (owner[i] != rank) svalues[starts[owner[i]]++] = bs*inidx[i];
2081:   }
2082:   starts[0] = 0;
2083:   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[i-1];
2084:   count = 0;
2085:   for (i=0; i<size; i++) {
2086:     if (nprocs[i]) {
2087:       MPI_Isend(svalues+starts[i],nprocs[i],MPIU_INT,i,tag,comm,send_waits+count++);
2088:     }
2089:   }

2091:   /*  wait on receives */
2092:   count = nrecvs;
2093:   slen  = 0;
2094:   while (count) {
2095:     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
2096:     /* unpack receives into our local space */
2097:     MPI_Get_count(&recv_status,MPIU_INT,&n);
2098:     slen += n;
2099:     count--;
2100:   }

2102:   if (slen != recvtotal) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Total message lengths %D not expected %D",slen,recvtotal);

2104:   /* allocate entire send scatter context */
2105:   PetscNewLog(ctx,&to);
2106:   to->n = nrecvs;

2108:   PetscMalloc1(nrecvs,&to->requests);
2109:   PetscMalloc4(bs*slen,&to->values,slen,&to->indices,nrecvs+1,&to->starts,nrecvs,&to->procs);
2110:   PetscMalloc2(PetscMax(to->n,nsends),&to->sstatus,PetscMax(to->n,nsends),&to->rstatus);

2112:   ctx->todata   = (void*)to;
2113:   to->starts[0] = 0;

2115:   if (nrecvs) {
2116:     /* move the data into the send scatter */
2117:     base     = owners[rank];
2118:     rsvalues = rvalues;
2119:     for (i=0; i<nrecvs; i++) {
2120:       to->starts[i+1] = to->starts[i] + olengths1[i];
2121:       to->procs[i]    = onodes1[i];
2122:       values = rsvalues;
2123:       rsvalues += olengths1[i];
2124:       for (j=0; j<olengths1[i]; j++) to->indices[to->starts[i] + j] = values[j] - base;
2125:     }
2126:   }
2127:   PetscFree(olengths1);
2128:   PetscFree(onodes1);
2129:   PetscFree3(rvalues,source,recv_waits);

2131:   /* allocate entire receive scatter context */
2132:   PetscNewLog(ctx,&from);
2133:   from->n = nsends;

2135:   PetscMalloc1(nsends,&from->requests);
2136:   PetscMalloc4((ny-nprocslocal)*bs,&from->values,ny-nprocslocal,&from->indices,nsends+1,&from->starts,from->n,&from->procs);
2137:   ctx->fromdata = (void*)from;

2139:   /* move data into receive scatter */
2140:   PetscMalloc2(size,&lowner,nsends+1,&start);
2141:   count = 0; from->starts[0] = start[0] = 0;
2142:   for (i=0; i<size; i++) {
2143:     if (nprocs[i]) {
2144:       lowner[i]            = count;
2145:       from->procs[count++] = i;
2146:       from->starts[count]  = start[count] = start[count-1] + nprocs[i];
2147:     }
2148:   }

2150:   for (i=0; i<nx; i++) {
2151:     if (owner[i] != rank) {
2152:       from->indices[start[lowner[owner[i]]]++] = bs*inidy[i];
2153:       if (bs*inidy[i] >= lengthy) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Scattering past end of TO vector");
2154:     }
2155:   }
2156:   PetscFree2(lowner,start);
2157:   PetscFree2(nprocs,owner);

2159:   /* wait on sends */
2160:   if (nsends) {
2161:     PetscMalloc1(nsends,&send_status);
2162:     MPI_Waitall(nsends,send_waits,send_status);
2163:     PetscFree(send_status);
2164:   }
2165:   PetscFree3(svalues,send_waits,starts);

2167:   if (nprocslocal) {
2168:     PetscInt nt = from->local.n = to->local.n = nprocslocal;
2169:     /* we have a scatter to ourselves */
2170:     PetscMalloc1(nt,&to->local.vslots);
2171:     PetscMalloc1(nt,&from->local.vslots);
2172:     nt   = 0;
2173:     for (i=0; i<nx; i++) {
2174:       idx = bs*inidx[i];
2175:       if (idx >= owners[rank] && idx < owners[rank+1]) {
2176:         to->local.vslots[nt]     = idx - owners[rank];
2177:         from->local.vslots[nt++] = bs*inidy[i];
2178:         if (bs*inidy[i] >= lengthy) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Scattering past end of TO vector");
2179:       }
2180:     }
2181:     PetscLogObjectMemory((PetscObject)ctx,2*nt*sizeof(PetscInt));
2182:   } else {
2183:     from->local.n      = 0;
2184:     from->local.vslots = 0;
2185:     to->local.n        = 0;
2186:     to->local.vslots   = 0;
2187:   }

2189:   from->local.nonmatching_computed = PETSC_FALSE;
2190:   from->local.n_nonmatching        = 0;
2191:   from->local.slots_nonmatching    = 0;
2192:   to->local.nonmatching_computed   = PETSC_FALSE;
2193:   to->local.n_nonmatching          = 0;
2194:   to->local.slots_nonmatching      = 0;

2196:   from->format = VEC_SCATTER_MPI_GENERAL;
2197:   to->format   = VEC_SCATTER_MPI_GENERAL;
2198:   from->bs     = bs;
2199:   to->bs       = bs;

2201:   VecScatterCreateCommon_PtoS_MPI1(from,to,ctx);
2202:   return(0);
2203: }

2205: /*
2206:    bs indicates how many elements there are in each block. Normally this would be 1.
2207: */
2208: PetscErrorCode VecScatterCreateCommon_PtoS_MPI1(VecScatter_MPI_General *from,VecScatter_MPI_General *to,VecScatter ctx)
2209: {
2210:   MPI_Comm       comm;
2211:   PetscMPIInt    tag  = ((PetscObject)ctx)->tag, tagr;
2212:   PetscInt       bs   = to->bs;
2213:   PetscMPIInt    size;
2214:   PetscInt       i, n;

2218:   PetscObjectGetComm((PetscObject)ctx,&comm);
2219:   PetscObjectGetNewTag((PetscObject)ctx,&tagr);
2220:   ctx->ops->destroy = VecScatterDestroy_PtoP_MPI1;

2222:   MPI_Comm_size(comm,&size);
2223:   /* check if the receives are ALL going into contiguous locations; if so can skip indexing */
2224:   to->contiq = PETSC_FALSE;
2225:   n = from->starts[from->n];
2226:   from->contiq = PETSC_TRUE;
2227:   for (i=1; i<n; i++) {
2228:     if (from->indices[i] != from->indices[i-1] + bs) {
2229:       from->contiq = PETSC_FALSE;
2230:       break;
2231:     }
2232:   }

2234:   {
2235:     PetscInt    *sstarts  = to->starts,  *rstarts = from->starts;
2236:     PetscMPIInt *sprocs   = to->procs,   *rprocs  = from->procs;
2237:     MPI_Request *swaits   = to->requests,*rwaits  = from->requests;
2238:     MPI_Request *rev_swaits,*rev_rwaits;
2239:     PetscScalar *Ssvalues = to->values, *Srvalues = from->values;

2241:     /* allocate additional wait variables for the "reverse" scatter */
2242:     PetscMalloc1(to->n,&rev_rwaits);
2243:     PetscMalloc1(from->n,&rev_swaits);
2244:     to->rev_requests   = rev_rwaits;
2245:     from->rev_requests = rev_swaits;

2247:     for (i=0; i<from->n; i++) {
2248:       MPI_Send_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tagr,comm,rev_swaits+i);
2249:     }

2251:     for (i=0; i<to->n; i++) {
2252:       MPI_Send_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
2253:     }
2254:     /* Register receives for scatter and reverse */
2255:     for (i=0; i<from->n; i++) {
2256:       MPI_Recv_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);
2257:     }
2258:     for (i=0; i<to->n; i++) {
2259:       MPI_Recv_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tagr,comm,rev_rwaits+i);
2260:     }
2261:     ctx->ops->copy = VecScatterCopy_PtoP_X_MPI1;
2262:   }
2263:   PetscInfo1(ctx,"Using blocksize %D scatter\n",bs);

2265: #if defined(PETSC_USE_DEBUG)
2266:   MPIU_Allreduce(&bs,&i,1,MPIU_INT,MPI_MIN,PetscObjectComm((PetscObject)ctx));
2267:   MPIU_Allreduce(&bs,&n,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)ctx));
2268:   if (bs!=i || bs!=n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Blocks size %D != %D or %D",bs,i,n);
2269: #endif

2271:   switch (bs) {
2272:   case 12:
2273:     ctx->ops->begin = VecScatterBeginMPI1_12;
2274:     ctx->ops->end   = VecScatterEndMPI1_12;
2275:     break;
2276:   case 11:
2277:     ctx->ops->begin = VecScatterBeginMPI1_11;
2278:     ctx->ops->end   = VecScatterEndMPI1_11;
2279:     break;
2280:   case 10:
2281:     ctx->ops->begin = VecScatterBeginMPI1_10;
2282:     ctx->ops->end   = VecScatterEndMPI1_10;
2283:     break;
2284:   case 9:
2285:     ctx->ops->begin = VecScatterBeginMPI1_9;
2286:     ctx->ops->end   = VecScatterEndMPI1_9;
2287:     break;
2288:   case 8:
2289:     ctx->ops->begin = VecScatterBeginMPI1_8;
2290:     ctx->ops->end   = VecScatterEndMPI1_8;
2291:     break;
2292:   case 7:
2293:     ctx->ops->begin = VecScatterBeginMPI1_7;
2294:     ctx->ops->end   = VecScatterEndMPI1_7;
2295:     break;
2296:   case 6:
2297:     ctx->ops->begin = VecScatterBeginMPI1_6;
2298:     ctx->ops->end   = VecScatterEndMPI1_6;
2299:     break;
2300:   case 5:
2301:     ctx->ops->begin = VecScatterBeginMPI1_5;
2302:     ctx->ops->end   = VecScatterEndMPI1_5;
2303:     break;
2304:   case 4:
2305:     ctx->ops->begin = VecScatterBeginMPI1_4;
2306:     ctx->ops->end   = VecScatterEndMPI1_4;
2307:     break;
2308:   case 3:
2309:     ctx->ops->begin = VecScatterBeginMPI1_3;
2310:     ctx->ops->end   = VecScatterEndMPI1_3;
2311:     break;
2312:   case 2:
2313:     ctx->ops->begin = VecScatterBeginMPI1_2;
2314:     ctx->ops->end   = VecScatterEndMPI1_2;
2315:     break;
2316:   case 1:
2317:     ctx->ops->begin = VecScatterBeginMPI1_1;
2318:     ctx->ops->end   = VecScatterEndMPI1_1;
2319:     break;
2320:   default:
2321:     ctx->ops->begin = VecScatterBeginMPI1_bs;
2322:     ctx->ops->end   = VecScatterEndMPI1_bs;

2324:   }
2325:   ctx->ops->view = VecScatterView_MPI_MPI1;
2326:   /* try to optimize PtoP vecscatter with memcpy's */
2327:   VecScatterMemcpyPlanCreate_PtoP(to,from);
2328:   return(0);
2329: }


2332: /* ------------------------------------------------------------------------------------*/
2333: /*
2334:          Scatter from local Seq vectors to a parallel vector.
2335:          Reverses the order of the arguments, calls VecScatterCreateLocal_PtoS() then
2336:          reverses the result.
2337: */
2338: PetscErrorCode VecScatterCreateLocal_StoP_MPI1(PetscInt nx,const PetscInt *inidx,PetscInt ny,const PetscInt *inidy,Vec xin,Vec yin,PetscInt bs,VecScatter ctx)
2339: {
2340:   PetscErrorCode         ierr;
2341:   MPI_Request            *waits;
2342:   VecScatter_MPI_General *to,*from;

2345:   VecScatterCreateLocal_PtoS_MPI1(ny,inidy,nx,inidx,yin,xin,bs,ctx);
2346:   to            = (VecScatter_MPI_General*)ctx->fromdata;
2347:   from          = (VecScatter_MPI_General*)ctx->todata;
2348:   ctx->todata   = (void*)to;
2349:   ctx->fromdata = (void*)from;
2350:   /* these two are special, they are ALWAYS stored in to struct */
2351:   to->sstatus   = from->sstatus;
2352:   to->rstatus   = from->rstatus;

2354:   from->sstatus = 0;
2355:   from->rstatus = 0;

2357:   waits              = from->rev_requests;
2358:   from->rev_requests = from->requests;
2359:   from->requests     = waits;
2360:   waits              = to->rev_requests;
2361:   to->rev_requests   = to->requests;
2362:   to->requests       = waits;
2363:   return(0);
2364: }

2366: /* ---------------------------------------------------------------------------------*/
2367: PetscErrorCode VecScatterCreateLocal_PtoP_MPI1(PetscInt nx,const PetscInt *inidx,PetscInt ny,const PetscInt *inidy,Vec xin,Vec yin,PetscInt bs,VecScatter ctx)
2368: {
2370:   PetscMPIInt    size,rank,tag,imdex,n;
2371:   PetscInt       *owners = xin->map->range;
2372:   PetscMPIInt    *nprocs = NULL;
2373:   PetscInt       i,j,idx,nsends,*local_inidx = NULL,*local_inidy = NULL;
2374:   PetscMPIInt    *owner   = NULL;
2375:   PetscInt       *starts  = NULL,count,slen;
2376:   PetscInt       *rvalues = NULL,*svalues = NULL,base,*values = NULL,*rsvalues,recvtotal,lastidx;
2377:   PetscMPIInt    *onodes1,*olengths1,nrecvs;
2378:   MPI_Comm       comm;
2379:   MPI_Request    *send_waits = NULL,*recv_waits = NULL;
2380:   MPI_Status     recv_status,*send_status = NULL;
2381:   PetscBool      duplicate = PETSC_FALSE;
2382: #if defined(PETSC_USE_DEBUG)
2383:   PetscBool      found = PETSC_FALSE;
2384: #endif

2387:   PetscObjectGetNewTag((PetscObject)ctx,&tag);
2388:   PetscObjectGetComm((PetscObject)xin,&comm);
2389:   MPI_Comm_size(comm,&size);
2390:   MPI_Comm_rank(comm,&rank);
2391:   if (size == 1) {
2392:     VecScatterCreateLocal_StoP_MPI1(nx,inidx,ny,inidy,xin,yin,bs,ctx);
2393:     return(0);
2394:   }

2396:   /*
2397:      Each processor ships off its inidx[j] and inidy[j] to the appropriate processor
2398:      They then call the StoPScatterCreate()
2399:   */
2400:   /*  first count number of contributors to each processor */
2401:   PetscMalloc3(size,&nprocs,nx,&owner,(size+1),&starts);
2402:   PetscMemzero(nprocs,size*sizeof(PetscMPIInt));

2404:   lastidx = -1;
2405:   j       = 0;
2406:   for (i=0; i<nx; i++) {
2407:     /* if indices are NOT locally sorted, need to start search at the beginning */
2408:     if (lastidx > (idx = bs*inidx[i])) j = 0;
2409:     lastidx = idx;
2410:     for (; j<size; j++) {
2411:       if (idx >= owners[j] && idx < owners[j+1]) {
2412:         nprocs[j]++;
2413:         owner[i] = j;
2414: #if defined(PETSC_USE_DEBUG)
2415:         found = PETSC_TRUE;
2416: #endif
2417:         break;
2418:       }
2419:     }
2420: #if defined(PETSC_USE_DEBUG)
2421:     if (!found) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index %D out of range",idx);
2422:     found = PETSC_FALSE;
2423: #endif
2424:   }
2425:   nsends = 0;
2426:   for (i=0; i<size; i++) nsends += (nprocs[i] > 0);

2428:   /* inform other processors of number of messages and max length*/
2429:   PetscGatherNumberOfMessages(comm,NULL,nprocs,&nrecvs);
2430:   PetscGatherMessageLengths(comm,nsends,nrecvs,nprocs,&onodes1,&olengths1);
2431:   PetscSortMPIIntWithArray(nrecvs,onodes1,olengths1);
2432:   recvtotal = 0; for (i=0; i<nrecvs; i++) recvtotal += olengths1[i];

2434:   /* post receives:   */
2435:   PetscMalloc5(2*recvtotal,&rvalues,2*nx,&svalues,nrecvs,&recv_waits,nsends,&send_waits,nsends,&send_status);

2437:   count = 0;
2438:   for (i=0; i<nrecvs; i++) {
2439:     MPI_Irecv((rvalues+2*count),2*olengths1[i],MPIU_INT,onodes1[i],tag,comm,recv_waits+i);
2440:     count += olengths1[i];
2441:   }
2442:   PetscFree(onodes1);

2444:   /* do sends:
2445:       1) starts[i] gives the starting index in svalues for stuff going to
2446:          the ith processor
2447:   */
2448:   starts[0]= 0;
2449:   for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[i-1];
2450:   for (i=0; i<nx; i++) {
2451:     svalues[2*starts[owner[i]]]       = bs*inidx[i];
2452:     svalues[1 + 2*starts[owner[i]]++] = bs*inidy[i];
2453:   }

2455:   starts[0] = 0;
2456:   for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[i-1];
2457:   count = 0;
2458:   for (i=0; i<size; i++) {
2459:     if (nprocs[i]) {
2460:       MPI_Isend(svalues+2*starts[i],2*nprocs[i],MPIU_INT,i,tag,comm,send_waits+count);
2461:       count++;
2462:     }
2463:   }
2464:   PetscFree3(nprocs,owner,starts);

2466:   /*  wait on receives */
2467:   count = nrecvs;
2468:   slen  = 0;
2469:   while (count) {
2470:     MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
2471:     /* unpack receives into our local space */
2472:     MPI_Get_count(&recv_status,MPIU_INT,&n);
2473:     slen += n/2;
2474:     count--;
2475:   }
2476:   if (slen != recvtotal) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Total message lengths %D not as expected %D",slen,recvtotal);

2478:   PetscMalloc2(slen,&local_inidx,slen,&local_inidy);
2479:   base     = owners[rank];
2480:   count    = 0;
2481:   rsvalues = rvalues;
2482:   for (i=0; i<nrecvs; i++) {
2483:     values    = rsvalues;
2484:     rsvalues += 2*olengths1[i];
2485:     for (j=0; j<olengths1[i]; j++) {
2486:       local_inidx[count]   = values[2*j] - base;
2487:       local_inidy[count++] = values[2*j+1];
2488:     }
2489:   }
2490:   PetscFree(olengths1);

2492:   /* wait on sends */
2493:   if (nsends) {MPI_Waitall(nsends,send_waits,send_status);}
2494:   PetscFree5(rvalues,svalues,recv_waits,send_waits,send_status);

2496:   /*
2497:      should sort and remove duplicates from local_inidx,local_inidy
2498:   */

2500: #if defined(do_it_slow)
2501:   /* sort on the from index */
2502:   PetscSortIntWithArray(slen,local_inidx,local_inidy);
2503:   start = 0;
2504:   while (start < slen) {
2505:     count = start+1;
2506:     last  = local_inidx[start];
2507:     while (count < slen && last == local_inidx[count]) count++;
2508:     if (count > start + 1) { /* found 2 or more same local_inidx[] in a row */
2509:       /* sort on to index */
2510:       PetscSortInt(count-start,local_inidy+start);
2511:     }
2512:     /* remove duplicates; not most efficient way, but probably good enough */
2513:     i = start;
2514:     while (i < count-1) {
2515:       if (local_inidy[i] != local_inidy[i+1]) i++;
2516:       else { /* found a duplicate */
2517:         duplicate = PETSC_TRUE;
2518:         for (j=i; j<slen-1; j++) {
2519:           local_inidx[j] = local_inidx[j+1];
2520:           local_inidy[j] = local_inidy[j+1];
2521:         }
2522:         slen--;
2523:         count--;
2524:       }
2525:     }
2526:     start = count;
2527:   }
2528: #endif
2529:   if (duplicate) {
2530:     PetscInfo(ctx,"Duplicate from to indices passed in VecScatterCreate(), they are ignored\n");
2531:   }
2532:   VecScatterCreateLocal_StoP_MPI1(slen,local_inidx,slen,local_inidy,xin,yin,bs,ctx);
2533:   PetscFree2(local_inidx,local_inidy);
2534:   return(0);
2535: }