Actual source code: seqvscat.c

petsc-3.11.4 2019-09-28
Report Typos and Errors

  2: /*
  3:      Code for creating scatters between vectors. This file
  4:   includes the code for scattering between sequential vectors and
  5:   some special cases for parallel scatters.
  6: */

  8:  #include <petsc/private/vecscatterimpl.h>

 10: #if defined(PETSC_HAVE_CUDA)
 11:  #include <../src/vec/vec/impls/seq/seqcuda/cudavecimpl.h>
 12: #endif

 14: #if defined(PETSC_USE_DEBUG)
 15: /*
 16:      Checks if any indices are less than zero and generates an error
 17: */
 18: static PetscErrorCode VecScatterCheckIndices_Private(PetscInt nmax,PetscInt n,const PetscInt *idx)
 19: {
 20:   PetscInt i;

 23:   for (i=0; i<n; i++) {
 24:     if (idx[i] < 0)     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative index %D at %D location",idx[i],i);
 25:     if (idx[i] >= nmax) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index %D at %D location greater than max %D",idx[i],i,nmax);
 26:   }
 27:   return(0);
 28: }
 29: #endif

 31: PetscErrorCode VecScatterDestroy_SGToSG(VecScatter ctx)
 32: {

 36:   PetscFree2(((VecScatter_Seq_General*)ctx->todata)->vslots,((VecScatter_Seq_General*)ctx->fromdata)->vslots);
 37:   VecScatterMemcpyPlanDestroy(&((VecScatter_Seq_General*)ctx->fromdata)->memcpy_plan);;
 38:   VecScatterMemcpyPlanDestroy(&((VecScatter_Seq_General*)ctx->todata)->memcpy_plan);;
 39:   PetscFree2(ctx->todata,ctx->fromdata);
 40:   return(0);
 41: }

 43: PetscErrorCode VecScatterDestroy_SGToSS(VecScatter ctx)
 44: {

 48:   PetscFree(((VecScatter_Seq_General*)ctx->fromdata)->vslots);
 49:   VecScatterMemcpyPlanDestroy(&((VecScatter_Seq_General*)ctx->fromdata)->memcpy_plan);;
 50:   PetscFree2(ctx->todata,ctx->fromdata);
 51:   return(0);
 52: }

 54: PetscErrorCode VecScatterDestroy_SSToSG(VecScatter ctx)
 55: {

 59:   PetscFree(((VecScatter_Seq_General*)ctx->todata)->vslots);
 60:   VecScatterMemcpyPlanDestroy(&((VecScatter_Seq_General*)ctx->todata)->memcpy_plan);;
 61:   PetscFree2(ctx->todata,ctx->fromdata);
 62:   return(0);
 63: }

 65: PetscErrorCode VecScatterDestroy_SSToSS(VecScatter ctx)
 66: {

 70:   PetscFree2(ctx->todata,ctx->fromdata);
 71:   return(0);
 72: }

 74: /* --------------------------------------------------------------------------------------*/
 75: /*
 76:         Scatter: sequential general to sequential general
 77: */
 78: PetscErrorCode VecScatterBegin_SGToSG(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
 79: {
 80:   VecScatter_Seq_General *gen_to   = (VecScatter_Seq_General*)ctx->todata;
 81:   VecScatter_Seq_General *gen_from = (VecScatter_Seq_General*)ctx->fromdata;
 82:   PetscErrorCode         ierr;
 83:   PetscInt               i,n = gen_from->n,*fslots,*tslots;
 84:   PetscScalar            *xv,*yv;
 85: #if defined(PETSC_HAVE_CUDA)
 86:   PetscBool              is_veccuda;
 87: #endif

 90: #if defined(PETSC_HAVE_CUDA)
 91:   PetscObjectTypeCompareAny((PetscObject)x,&is_veccuda,VECSEQCUDA,VECMPICUDA,VECCUDA,"");
 92:   if (is_veccuda && x->valid_GPU_array == PETSC_OFFLOAD_GPU) {
 93:     /* create the scatter indices if not done already */
 94:     if (!ctx->spptr) {
 95:       PetscInt tofirst = 0,tostep = 0,fromfirst = 0,fromstep = 0;
 96:       fslots = gen_from->vslots;
 97:       tslots = gen_to->vslots;
 98:       VecScatterCUDAIndicesCreate_StoS(n,tofirst,fromfirst,tostep,fromstep,tslots,fslots,(PetscCUDAIndices*)&(ctx->spptr));
 99:     }
100:     /* next do the scatter */
101:     VecScatterCUDA_StoS(x,y,(PetscCUDAIndices)ctx->spptr,addv,mode);
102:     return(0);
103:   }
104: #endif

106:   VecGetArrayPair(x,y,&xv,&yv);
107:   if (mode & SCATTER_REVERSE) {
108:     gen_to   = (VecScatter_Seq_General*)ctx->fromdata;
109:     gen_from = (VecScatter_Seq_General*)ctx->todata;
110:   }
111:   fslots = gen_from->vslots;
112:   tslots = gen_to->vslots;

114:   if (gen_from->memcpy_plan.optimized[0]) { VecScatterMemcpyPlanExecute_Scatter(0,xv,&gen_from->memcpy_plan,yv,&gen_to->memcpy_plan,addv); }
115:   else if (addv == INSERT_VALUES) { for (i=0; i<n; i++) yv[tslots[i]]  = xv[fslots[i]]; }
116:   else if (addv == ADD_VALUES)    { for (i=0; i<n; i++) yv[tslots[i]] += xv[fslots[i]]; }
117: #if !defined(PETSC_USE_COMPLEX)
118:   else if (addv == MAX_VALUES)    { for (i=0; i<n; i++) yv[tslots[i]]  = PetscMax(yv[tslots[i]],xv[fslots[i]]); }
119: #endif
120:   else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
121:   VecRestoreArrayPair(x,y,&xv,&yv);
122:   return(0);
123: }

125: /*
126:     Scatter: sequential general to sequential stride 1
127: */
128: PetscErrorCode VecScatterBegin_SGToSS_Stride1(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
129: {
130:   VecScatter_Seq_Stride  *gen_to   = (VecScatter_Seq_Stride*)ctx->todata;
131:   VecScatter_Seq_General *gen_from = (VecScatter_Seq_General*)ctx->fromdata;
132:   PetscInt               i,n = gen_from->n,*fslots = gen_from->vslots;
133:   PetscErrorCode         ierr;
134:   PetscInt               first = gen_to->first;
135:   PetscScalar            *xv,*yv;
136: #if defined(PETSC_HAVE_CUDA)
137:   PetscBool              is_veccuda;
138: #endif

141: #if defined(PETSC_HAVE_CUDA)
142:   PetscObjectTypeCompareAny((PetscObject)x,&is_veccuda,VECSEQCUDA,VECMPICUDA,VECCUDA,"");
143:   if (is_veccuda && x->valid_GPU_array == PETSC_OFFLOAD_GPU) {
144:     /* create the scatter indices if not done already */
145:     if (!ctx->spptr) {
146:       PetscInt tofirst = first,tostep = 1,fromfirst = 0,fromstep = 0;
147:       PetscInt *tslots = 0;
148:       VecScatterCUDAIndicesCreate_StoS(n,tofirst,fromfirst,tostep,fromstep,tslots,fslots,(PetscCUDAIndices*)&(ctx->spptr));
149:     }
150:     /* next do the scatter */
151:     VecScatterCUDA_StoS(x,y,(PetscCUDAIndices)ctx->spptr,addv,mode);
152:     return(0);
153:   }
154: #endif

156:   VecGetArrayPair(x,y,&xv,&yv);
157:   if (mode & SCATTER_REVERSE) {
158:     PetscScalar *xxv = xv + first;
159:     if (gen_from->memcpy_plan.optimized[0]) { VecScatterMemcpyPlanExecute_Unpack(0,xxv,yv,&gen_from->memcpy_plan,addv,1); }
160:     else if (addv == INSERT_VALUES) { for (i=0; i<n; i++) yv[fslots[i]]  = xxv[i]; }
161:     else if (addv == ADD_VALUES)    { for (i=0; i<n; i++) yv[fslots[i]] += xxv[i]; }
162: #if !defined(PETSC_USE_COMPLEX)
163:     else if (addv == MAX_VALUES)    { for (i=0; i<n; i++) yv[fslots[i]]  = PetscMax(yv[fslots[i]],xxv[i]); }
164: #endif
165:     else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
166:   } else {
167:     PetscScalar *yyv = yv + first;
168:     if (gen_from->memcpy_plan.optimized[0]) { VecScatterMemcpyPlanExecute_Pack(0,xv,&gen_from->memcpy_plan,yyv,addv,1); }
169:     else if (addv == INSERT_VALUES) { for (i=0; i<n; i++) yyv[i]  = xv[fslots[i]]; }
170:     else if (addv == ADD_VALUES)    { for (i=0; i<n; i++) yyv[i] += xv[fslots[i]]; }
171: #if !defined(PETSC_USE_COMPLEX)
172:     else if (addv == MAX_VALUES)    { for (i=0; i<n; i++) yyv[i]  = PetscMax(yyv[i],xv[fslots[i]]); }
173: #endif
174:     else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
175:   }
176:   VecRestoreArrayPair(x,y,&xv,&yv);
177:   return(0);
178: }

180: /*
181:    Scatter: sequential general to sequential stride
182: */
183: PetscErrorCode VecScatterBegin_SGToSS(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
184: {
185:   VecScatter_Seq_Stride  *gen_to   = (VecScatter_Seq_Stride*)ctx->todata;
186:   VecScatter_Seq_General *gen_from = (VecScatter_Seq_General*)ctx->fromdata;
187:   PetscInt               i,n = gen_from->n,*fslots = gen_from->vslots;
188:   PetscErrorCode         ierr;
189:   PetscInt               first = gen_to->first,step = gen_to->step;
190:   PetscScalar            *xv,*yv;
191: #if defined(PETSC_HAVE_CUDA)
192:   PetscBool              is_veccuda;
193: #endif

196: #if defined(PETSC_HAVE_CUDA)
197:   PetscObjectTypeCompareAny((PetscObject)x,&is_veccuda,VECSEQCUDA,VECMPICUDA,VECCUDA,"");
198:   if (is_veccuda && x->valid_GPU_array == PETSC_OFFLOAD_GPU) {
199:     /* create the scatter indices if not done already */
200:     if (!ctx->spptr) {
201:       PetscInt tofirst = first,tostep = step,fromfirst = 0,fromstep = 0;
202:       PetscInt * tslots = 0;
203:       VecScatterCUDAIndicesCreate_StoS(n,tofirst,fromfirst,tostep,fromstep,tslots,fslots,(PetscCUDAIndices*)&(ctx->spptr));
204:     }
205:     /* next do the scatter */
206:     VecScatterCUDA_StoS(x,y,(PetscCUDAIndices)ctx->spptr,addv,mode);
207:     return(0);
208:   }
209: #endif

211:   VecGetArrayPair(x,y,&xv,&yv);
212:   if (mode & SCATTER_REVERSE) {
213:     if (addv == INSERT_VALUES) {
214:       for (i=0; i<n; i++) yv[fslots[i]] = xv[first + i*step];
215:     } else if (addv == ADD_VALUES) {
216:       for (i=0; i<n; i++) yv[fslots[i]] += xv[first + i*step];
217: #if !defined(PETSC_USE_COMPLEX)
218:     } else if (addv == MAX_VALUES) {
219:       for (i=0; i<n; i++) yv[fslots[i]] = PetscMax(yv[fslots[i]],xv[first + i*step]);
220: #endif
221:     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
222:   } else {
223:     if (addv == INSERT_VALUES) {
224:       for (i=0; i<n; i++) yv[first + i*step] = xv[fslots[i]];
225:     } else if (addv == ADD_VALUES) {
226:       for (i=0; i<n; i++) yv[first + i*step] += xv[fslots[i]];
227: #if !defined(PETSC_USE_COMPLEX)
228:     } else if (addv == MAX_VALUES) {
229:       for (i=0; i<n; i++) yv[first + i*step] = PetscMax(yv[first + i*step],xv[fslots[i]]);
230: #endif
231:     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
232:   }
233:   VecRestoreArrayPair(x,y,&xv,&yv);
234:   return(0);
235: }

237: /*
238:     Scatter: sequential stride 1 to sequential general
239: */
240: PetscErrorCode VecScatterBegin_SSToSG_Stride1(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
241: {
242:   VecScatter_Seq_Stride  *gen_from = (VecScatter_Seq_Stride*)ctx->fromdata;
243:   VecScatter_Seq_General *gen_to   = (VecScatter_Seq_General*)ctx->todata;
244:   PetscInt               i,n = gen_from->n,*tslots = gen_to->vslots;
245:   PetscErrorCode         ierr;
246:   PetscInt               first = gen_from->first;
247:   PetscScalar            *xv,*yv;
248: #if defined(PETSC_HAVE_CUDA)
249:   PetscBool              is_veccuda;
250: #endif

253: #if defined(PETSC_HAVE_CUDA)
254:   PetscObjectTypeCompareAny((PetscObject)x,&is_veccuda,VECSEQCUDA,VECMPICUDA,VECCUDA,"");
255:   if (is_veccuda && x->valid_GPU_array == PETSC_OFFLOAD_GPU) {
256:     /* create the scatter indices if not done already */
257:     if (!ctx->spptr) {
258:       PetscInt tofirst = 0,tostep = 0,fromfirst = first,fromstep = 1;
259:       PetscInt *fslots = 0;
260:       VecScatterCUDAIndicesCreate_StoS(n,tofirst,fromfirst,tostep,fromstep,tslots,fslots,(PetscCUDAIndices*)&(ctx->spptr));
261:     }
262:     /* next do the scatter */
263:     VecScatterCUDA_StoS(x,y,(PetscCUDAIndices)ctx->spptr,addv,mode);
264:     return(0);
265:   }
266: #endif

268:   VecGetArrayPair(x,y,&xv,&yv);
269:   if (mode & SCATTER_REVERSE) {
270:     PetscScalar *yyv = yv + first;
271:     if (gen_to->memcpy_plan.optimized[0]) { VecScatterMemcpyPlanExecute_Pack(0,xv,&gen_to->memcpy_plan,yyv,addv,1); }
272:     else if (addv == INSERT_VALUES) { for (i=0; i<n; i++) yyv[i]  = xv[tslots[i]]; }
273:     else if (addv == ADD_VALUES)    { for (i=0; i<n; i++) yyv[i] += xv[tslots[i]]; }
274: #if !defined(PETSC_USE_COMPLEX)
275:     else if (addv == MAX_VALUES)    { for (i=0; i<n; i++) yyv[i]  = PetscMax(yyv[i],xv[tslots[i]]); }
276: #endif
277:     else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
278:   } else {
279:     PetscScalar *xxv = xv + first;
280:     if (gen_to->memcpy_plan.optimized[0]) { VecScatterMemcpyPlanExecute_Unpack(0,xxv,yv,&gen_to->memcpy_plan,addv,1); }
281:     else if (addv == INSERT_VALUES) { for (i=0; i<n; i++) yv[tslots[i]]  = xxv[i]; }
282:     else if (addv == ADD_VALUES)    { for (i=0; i<n; i++) yv[tslots[i]] += xxv[i]; }
283: #if !defined(PETSC_USE_COMPLEX)
284:     else if (addv == MAX_VALUES)    { for (i=0; i<n; i++) yv[tslots[i]]  = PetscMax(yv[tslots[i]],xxv[i]); }
285: #endif
286:     else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
287:   }
288:   VecRestoreArrayPair(x,y,&xv,&yv);
289:   return(0);
290: }

292: /*
293:    Scatter: sequential stride to sequential general
294: */
295: PetscErrorCode VecScatterBegin_SSToSG(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
296: {
297:   VecScatter_Seq_Stride  *gen_from = (VecScatter_Seq_Stride*)ctx->fromdata;
298:   VecScatter_Seq_General *gen_to   = (VecScatter_Seq_General*)ctx->todata;
299:   PetscInt               i,n = gen_from->n,*tslots = gen_to->vslots;
300:   PetscErrorCode         ierr;
301:   PetscInt               first = gen_from->first,step = gen_from->step;
302:   PetscScalar            *xv,*yv;
303: #if defined(PETSC_HAVE_CUDA)
304:   PetscBool              is_veccuda;
305: #endif

308: #if defined(PETSC_HAVE_CUDA)
309:   PetscObjectTypeCompareAny((PetscObject)x,&is_veccuda,VECSEQCUDA,VECMPICUDA,VECCUDA,"");
310:   if (is_veccuda && x->valid_GPU_array == PETSC_OFFLOAD_GPU) {
311:     /* create the scatter indices if not done already */
312:     if (!ctx->spptr) {
313:       PetscInt tofirst = 0,tostep = 0,fromfirst = first,fromstep = step;
314:       PetscInt *fslots = 0;
315:       VecScatterCUDAIndicesCreate_StoS(n,tofirst,fromfirst,tostep,fromstep,tslots,fslots,(PetscCUDAIndices*)&(ctx->spptr));
316:     }
317:     /* next do the scatter */
318:     VecScatterCUDA_StoS(x,y,(PetscCUDAIndices)ctx->spptr,addv,mode);
319:     return(0);
320:   }
321: #endif

323:   VecGetArrayPair(x,y,&xv,&yv);
324:   if (mode & SCATTER_REVERSE) {
325:     if (addv == INSERT_VALUES) {
326:       for (i=0; i<n; i++) yv[first + i*step] = xv[tslots[i]];
327:     } else if (addv == ADD_VALUES) {
328:       for (i=0; i<n; i++) yv[first + i*step] += xv[tslots[i]];
329: #if !defined(PETSC_USE_COMPLEX)
330:     } else if (addv == MAX_VALUES) {
331:       for (i=0; i<n; i++) yv[first + i*step] = PetscMax(yv[first + i*step],xv[tslots[i]]);
332: #endif
333:     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
334:   } else {
335:     if (addv == INSERT_VALUES) {
336:       for (i=0; i<n; i++) yv[tslots[i]] = xv[first + i*step];
337:     } else if (addv == ADD_VALUES) {
338:       for (i=0; i<n; i++) yv[tslots[i]] += xv[first + i*step];
339: #if !defined(PETSC_USE_COMPLEX)
340:     } else if (addv == MAX_VALUES) {
341:       for (i=0; i<n; i++) yv[tslots[i]] = PetscMax(yv[tslots[i]],xv[first + i*step]);
342: #endif
343:     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
344:   }
345:   VecRestoreArrayPair(x,y,&xv,&yv);
346:   return(0);
347: }

349: PetscErrorCode VecScatterView_SSToSG(VecScatter in,PetscViewer viewer)
350: {
351:   PetscErrorCode         ierr;
352:   VecScatter_Seq_Stride  *in_from = (VecScatter_Seq_Stride*)in->fromdata;
353:   VecScatter_Seq_General *in_to   = (VecScatter_Seq_General*)in->todata;
354:   PetscInt               i;
355:   PetscBool              isascii;

358:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);
359:   if (isascii) {
360:     PetscViewerASCIIPrintf(viewer,"Sequential stride to general scatter\n");
361:     for (i=0; i<in_to->n; i++) {
362:       PetscViewerASCIIPrintf(viewer,"%D to %D\n",in_from->first + in_from->step*i,in_to->vslots[i]);
363:     }
364:     if (in_to->memcpy_plan.optimized[0]) {
365:       PetscViewerASCIIPrintf(viewer,"This stride1 to general scatter is made of %D copies\n",in_to->memcpy_plan.copy_offsets[1]);
366:     }
367:   }
368:   return(0);
369: }
370: /*
371:      Scatter: sequential stride to sequential stride
372: */
373: PetscErrorCode VecScatterBegin_SSToSS(VecScatter ctx,Vec x,Vec y,InsertMode addv,ScatterMode mode)
374: {
375:   VecScatter_Seq_Stride *gen_to   = (VecScatter_Seq_Stride*)ctx->todata;
376:   VecScatter_Seq_Stride *gen_from = (VecScatter_Seq_Stride*)ctx->fromdata;
377:   PetscInt              i,n = gen_from->n,to_first = gen_to->first,to_step = gen_to->step;
378:   PetscErrorCode        ierr;
379:   PetscInt              from_first = gen_from->first,from_step = gen_from->step;
380:   PetscScalar           *xv,*yv;
381: #if defined(PETSC_HAVE_CUDA)
382:   PetscBool              is_veccuda;
383: #endif

386: #if defined(PETSC_HAVE_CUDA)
387:   PetscObjectTypeCompareAny((PetscObject)x,&is_veccuda,VECSEQCUDA,VECMPICUDA,VECCUDA,"");
388:   if (is_veccuda && x->valid_GPU_array == PETSC_OFFLOAD_GPU) {
389:     /* create the scatter indices if not done already */
390:     if (!ctx->spptr) {
391:       PetscInt *tslots = 0,*fslots = 0;
392:       VecScatterCUDAIndicesCreate_StoS(n,to_first,from_first,to_step,from_step,tslots,fslots,(PetscCUDAIndices*)&(ctx->spptr));
393:     }
394:     /* next do the scatter */
395:     VecScatterCUDA_StoS(x,y,(PetscCUDAIndices)ctx->spptr,addv,mode);
396:     return(0);
397:   }
398: #endif

400:   VecGetArrayPair(x,y,&xv,&yv);
401:   if (mode & SCATTER_REVERSE) {
402:     from_first = gen_to->first;
403:     to_first   = gen_from->first;
404:     from_step  = gen_to->step;
405:     to_step    = gen_from->step;
406:   }

408:   if (addv == INSERT_VALUES) {
409:     if (to_step == 1 && from_step == 1) {
410:       PetscMemcpy(yv+to_first,xv+from_first,n*sizeof(PetscScalar));
411:     } else  {
412:       for (i=0; i<n; i++) yv[to_first + i*to_step] = xv[from_first+i*from_step];
413:     }
414:   } else if (addv == ADD_VALUES) {
415:     if (to_step == 1 && from_step == 1) {
416:       PetscScalar *yyv = yv + to_first, *xxv = xv + from_first;
417:       for (i=0; i<n; i++) yyv[i] += xxv[i];
418:     } else {
419:       for (i=0; i<n; i++) yv[to_first + i*to_step] += xv[from_first+i*from_step];
420:     }
421: #if !defined(PETSC_USE_COMPLEX)
422:   } else if (addv == MAX_VALUES) {
423:     if (to_step == 1 && from_step == 1) {
424:       PetscScalar *yyv = yv + to_first, *xxv = xv + from_first;
425:       for (i=0; i<n; i++) yyv[i] = PetscMax(yyv[i],xxv[i]);
426:     } else {
427:       for (i=0; i<n; i++) yv[to_first + i*to_step] = PetscMax(yv[to_first + i*to_step],xv[from_first+i*from_step]);
428:     }
429: #endif
430:   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_UNKNOWN_TYPE,"Wrong insert option");
431:   VecRestoreArrayPair(x,y,&xv,&yv);
432:   return(0);
433: }

435: /* --------------------------------------------------------------------------*/


438: PetscErrorCode VecScatterCopy_SGToSG(VecScatter in,VecScatter out)
439: {
440:   PetscErrorCode         ierr;
441:   VecScatter_Seq_General *in_to   = (VecScatter_Seq_General*)in->todata,*out_to = NULL;
442:   VecScatter_Seq_General *in_from = (VecScatter_Seq_General*)in->fromdata,*out_from = NULL;

445:   out->ops->begin   = in->ops->begin;
446:   out->ops->end     = in->ops->end;
447:   out->ops->copy    = in->ops->copy;
448:   out->ops->destroy = in->ops->destroy;
449:   out->ops->view    = in->ops->view;

451:   PetscMalloc2(1,&out_to,1,&out_from);
452:   PetscMalloc2(in_to->n,&out_to->vslots,in_from->n,&out_from->vslots);
453:   out_to->n                    = in_to->n;
454:   out_to->format               = in_to->format;
455:   out_to->nonmatching_computed = PETSC_FALSE;
456:   out_to->slots_nonmatching    = 0;
457:   PetscMemcpy(out_to->vslots,in_to->vslots,(out_to->n)*sizeof(PetscInt));
458:   VecScatterMemcpyPlanCopy(&in_to->memcpy_plan,&out_to->memcpy_plan);

460:   out_from->n                    = in_from->n;
461:   out_from->format               = in_from->format;
462:   out_from->nonmatching_computed = PETSC_FALSE;
463:   out_from->slots_nonmatching    = 0;
464:   PetscMemcpy(out_from->vslots,in_from->vslots,(out_from->n)*sizeof(PetscInt));
465:   VecScatterMemcpyPlanCopy(&in_from->memcpy_plan,&out_from->memcpy_plan);

467:   out->todata   = (void*)out_to;
468:   out->fromdata = (void*)out_from;
469:   return(0);
470: }

472: PetscErrorCode VecScatterView_SGToSG(VecScatter in,PetscViewer viewer)
473: {
474:   PetscErrorCode         ierr;
475:   VecScatter_Seq_General *in_to   = (VecScatter_Seq_General*)in->todata;
476:   VecScatter_Seq_General *in_from = (VecScatter_Seq_General*)in->fromdata;
477:   PetscInt               i;
478:   PetscBool              isascii;

481:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);
482:   if (isascii) {
483:     PetscViewerASCIIPrintf(viewer,"Sequential general scatter\n");
484:     for (i=0; i<in_to->n; i++) {
485:       PetscViewerASCIIPrintf(viewer,"%D to %D\n",in_from->vslots[i],in_to->vslots[i]);
486:     }
487:     if (in_from->memcpy_plan.optimized[0]) {
488:       PetscViewerASCIIPrintf(viewer,"This general to general scatter is made of %D copies\n",in_from->memcpy_plan.copy_offsets[1]);
489:     }
490:   }
491:   return(0);
492: }


495: PetscErrorCode VecScatterCopy_SGToSS(VecScatter in,VecScatter out)
496: {
497:   PetscErrorCode         ierr;
498:   VecScatter_Seq_Stride  *in_to   = (VecScatter_Seq_Stride*)in->todata,*out_to = NULL;
499:   VecScatter_Seq_General *in_from = (VecScatter_Seq_General*)in->fromdata,*out_from = NULL;

502:   out->ops->begin   = in->ops->begin;
503:   out->ops->end     = in->ops->end;
504:   out->ops->copy    = in->ops->copy;
505:   out->ops->destroy = in->ops->destroy;
506:   out->ops->view    = in->ops->view;

508:   PetscMalloc2(1,&out_to,1,&out_from);
509:   PetscMalloc1(in_from->n,&out_from->vslots);
510:   out_to->n      = in_to->n;
511:   out_to->format = in_to->format;
512:   out_to->first  = in_to->first;
513:   out_to->step   = in_to->step;
514:   out_to->format = in_to->format;

516:   out_from->n                    = in_from->n;
517:   out_from->format               = in_from->format;
518:   out_from->nonmatching_computed = PETSC_FALSE;
519:   out_from->slots_nonmatching    = 0;
520:   PetscMemcpy(out_from->vslots,in_from->vslots,(out_from->n)*sizeof(PetscInt));
521:   VecScatterMemcpyPlanCopy(&in_from->memcpy_plan,&out_from->memcpy_plan);

523:   out->todata   = (void*)out_to;
524:   out->fromdata = (void*)out_from;
525:   return(0);
526: }

528: PetscErrorCode VecScatterView_SGToSS(VecScatter in,PetscViewer viewer)
529: {
530:   PetscErrorCode         ierr;
531:   VecScatter_Seq_Stride  *in_to   = (VecScatter_Seq_Stride*)in->todata;
532:   VecScatter_Seq_General *in_from = (VecScatter_Seq_General*)in->fromdata;
533:   PetscInt               i;
534:   PetscBool              isascii;

537:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);
538:   if (isascii) {
539:     PetscViewerASCIIPrintf(viewer,"Sequential general scatter to stride\n");
540:     for (i=0; i<in_to->n; i++) {
541:       PetscViewerASCIIPrintf(viewer,"%D to %D\n",in_from->vslots[i],in_to->first + in_to->step*i);
542:     }
543:     if (in_from->memcpy_plan.optimized[0]) {
544:       PetscViewerASCIIPrintf(viewer,"This general to stride1 scatter is made of %D copies\n",in_from->memcpy_plan.copy_offsets[1]);
545:     }
546:   }
547:   return(0);
548: }

550: /* --------------------------------------------------------------------------*/
551: /*
552:     Scatter: parallel to sequential vector, sequential strides for both.
553: */
554: PetscErrorCode VecScatterCopy_SSToSS(VecScatter in,VecScatter out)
555: {
556:   VecScatter_Seq_Stride *in_to   = (VecScatter_Seq_Stride*)in->todata,*out_to = NULL;
557:   VecScatter_Seq_Stride *in_from = (VecScatter_Seq_Stride*)in->fromdata,*out_from = NULL;
558:   PetscErrorCode        ierr;

561:   out->ops->begin   = in->ops->begin;
562:   out->ops->end     = in->ops->end;
563:   out->ops->copy    = in->ops->copy;
564:   out->ops->destroy = in->ops->destroy;
565:   out->ops->view    = in->ops->view;

567:   PetscMalloc2(1,&out_to,1,&out_from);
568:   out_to->n       = in_to->n;
569:   out_to->format  = in_to->format;
570:   out_to->first   = in_to->first;
571:   out_to->step    = in_to->step;
572:   out_to->format  = in_to->format;
573:   out_from->n     = in_from->n;
574:   out_from->format= in_from->format;
575:   out_from->first = in_from->first;
576:   out_from->step  = in_from->step;
577:   out_from->format= in_from->format;
578:   out->todata     = (void*)out_to;
579:   out->fromdata   = (void*)out_from;
580:   return(0);
581: }

583: PetscErrorCode VecScatterView_SSToSS(VecScatter in,PetscViewer viewer)
584: {
585:   VecScatter_Seq_Stride *in_to   = (VecScatter_Seq_Stride*)in->todata;
586:   VecScatter_Seq_Stride *in_from = (VecScatter_Seq_Stride*)in->fromdata;
587:   PetscErrorCode        ierr;
588:   PetscBool             isascii;

591:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);
592:   if (isascii) {
593:     PetscViewerASCIIPrintf(viewer,"Sequential stride count %D start %D step to start %D stride %D\n",in_to->n,in_to->first,in_to->step,in_from->first,in_from->step);
594:   }
595:   return(0);
596: }

598: /* --------------------------------------------------------------------------------------*/
599: /* Create a memcpy plan for a sequential general (SG) to SG scatter */
600: PetscErrorCode VecScatterMemcpyPlanCreate_SGToSG(PetscInt bs,VecScatter_Seq_General *to,VecScatter_Seq_General *from)
601: {
602:   PetscInt       n = to->n,i,*to_slots = to->vslots,*from_slots = from->vslots;
603:   PetscInt       j,n_copies;
605:   PetscBool      same_copy_starts;

608:   PetscMemzero(&to->memcpy_plan,sizeof(VecScatterMemcpyPlan));
609:   PetscMemzero(&from->memcpy_plan,sizeof(VecScatterMemcpyPlan));
610:   to->memcpy_plan.n   = 1;
611:   from->memcpy_plan.n = 1;

613:   /* malloc and init the two fields to false and zero */
614:   PetscCalloc2(1,&to->memcpy_plan.optimized,2,&to->memcpy_plan.copy_offsets);
615:   PetscCalloc2(1,&from->memcpy_plan.optimized,2,&from->memcpy_plan.copy_offsets);

617:   /* count number of copies, which runs from 1 to n */
618:   n_copies = 1;
619:   for (i=0; i<n-1; i++) {
620:     if (to_slots[i]+bs != to_slots[i+1] || from_slots[i]+bs != from_slots[i+1]) n_copies++;
621:   }

623:   /* if average copy size >= 256 bytes, use memcpy instead of load/store */
624:   if (bs*n*sizeof(PetscScalar)/n_copies >= 256) {
625:     PetscMalloc2(n_copies,&to->memcpy_plan.copy_starts,n_copies,&to->memcpy_plan.copy_lengths);
626:     PetscMalloc2(n_copies,&from->memcpy_plan.copy_starts,n_copies,&from->memcpy_plan.copy_lengths);

628:     /* set up copy_starts[] & copy_lenghts[] of to and from */
629:     to->memcpy_plan.copy_starts[0]   = to_slots[0];
630:     from->memcpy_plan.copy_starts[0] = from_slots[0];

632:     if (n_copies != 1) { /* one copy is trival and we can save some work */
633:       j = 0;  /* j-th copy */
634:       for (i=0; i<n-1; i++) {
635:         if (to_slots[i]+bs != to_slots[i+1] || from_slots[i]+bs != from_slots[i+1]) {
636:           to->memcpy_plan.copy_lengths[j]    = sizeof(PetscScalar)*(to_slots[i]+bs-to->memcpy_plan.copy_starts[j]);
637:           from->memcpy_plan.copy_lengths[j]  = sizeof(PetscScalar)*(from_slots[i]+bs-from->memcpy_plan.copy_starts[j]);
638:           to->memcpy_plan.copy_starts[j+1]   = to_slots[i+1];
639:           from->memcpy_plan.copy_starts[j+1] = from_slots[i+1];
640:           j++;
641:         }
642:       }
643:     }

645:     /* set up copy_lengths[] of the last copy */
646:     to->memcpy_plan.copy_lengths[n_copies-1]   = sizeof(PetscScalar)*(to_slots[n-1]+bs-to->memcpy_plan.copy_starts[n_copies-1]);
647:     from->memcpy_plan.copy_lengths[n_copies-1] = sizeof(PetscScalar)*(from_slots[n-1]+bs-from->memcpy_plan.copy_starts[n_copies-1]);

649:     /* check if to and from have the same copy_starts[] values */
650:     same_copy_starts = PETSC_TRUE;
651:     for (i=0; i<n_copies; i++) {
652:       if (to->memcpy_plan.copy_starts[i] != from->memcpy_plan.copy_starts[i]) { same_copy_starts = PETSC_FALSE; break; }
653:     }

655:     to->memcpy_plan.optimized[0]        = PETSC_TRUE;
656:     from->memcpy_plan.optimized[0]      = PETSC_TRUE;
657:     to->memcpy_plan.copy_offsets[1]     = n_copies;
658:     from->memcpy_plan.copy_offsets[1]   = n_copies;
659:     to->memcpy_plan.same_copy_starts    = same_copy_starts;
660:     from->memcpy_plan.same_copy_starts  = same_copy_starts;
661:   }

663:   /* we do not do stride optimzation for this kind of scatter since the chance is rare. All related fields are zeroed out */
664:   return(0);
665: }

667: /* -------------------------------------------------- */
668: PetscErrorCode VecScatterSetUp_Seq(VecScatter ctx)
669: {
670:   PetscErrorCode    ierr;
671:   PetscInt          ix_type=-1,iy_type=-1;
672:   IS                tix = NULL,tiy = NULL,ix=ctx->from_is,iy=ctx->to_is;
673:   Vec               xin=ctx->from_v;

676:   GetInputISType_private(ctx,VEC_SEQ_ID,VEC_SEQ_ID,&ix_type,&tix,&iy_type,&tiy);
677:   if (tix) ix = tix;
678:   if (tiy) iy = tiy;

680:   if (ix_type == IS_GENERAL_ID && iy_type == IS_GENERAL_ID) {
681:     PetscInt               nx,ny;
682:     const PetscInt         *idx,*idy;
683:     VecScatter_Seq_General *to = NULL,*from = NULL;

685:     ISGetLocalSize(ix,&nx);
686:     ISGetLocalSize(iy,&ny);
687:     if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
688:     ISGetIndices(ix,&idx);
689:     ISGetIndices(iy,&idy);
690:     PetscMalloc2(1,&to,1,&from);
691:     PetscMalloc2(nx,&to->vslots,nx,&from->vslots);
692:     to->n = nx;
693: #if defined(PETSC_USE_DEBUG)
694:     VecScatterCheckIndices_Private(ctx->to_n,ny,idy);
695: #endif
696:     PetscMemcpy(to->vslots,idy,nx*sizeof(PetscInt));
697:     from->n = nx;
698: #if defined(PETSC_USE_DEBUG)
699:     VecScatterCheckIndices_Private(ctx->from_n,nx,idx);
700: #endif
701:      PetscMemcpy(from->vslots,idx,nx*sizeof(PetscInt));
702:     to->format          = VEC_SCATTER_SEQ_GENERAL;
703:     from->format        = VEC_SCATTER_SEQ_GENERAL;
704:     ctx->todata       = (void*)to;
705:     ctx->fromdata     = (void*)from;
706:     VecScatterMemcpyPlanCreate_SGToSG(1,to,from);
707:     ctx->ops->begin   = VecScatterBegin_SGToSG;
708:     ctx->ops->end     = 0;
709:     ctx->ops->destroy = VecScatterDestroy_SGToSG;
710:     ctx->ops->copy    = VecScatterCopy_SGToSG;
711:     ctx->ops->view    = VecScatterView_SGToSG;
712:     PetscInfo(xin,"Special case: sequential vector general scatter\n");
713:     goto functionend;
714:   } else if (ix_type == IS_STRIDE_ID &&  iy_type == IS_STRIDE_ID) {
715:     PetscInt              nx,ny,to_first,to_step,from_first,from_step;
716:     VecScatter_Seq_Stride *from8 = NULL,*to8 = NULL;

718:     ISGetLocalSize(ix,&nx);
719:     ISGetLocalSize(iy,&ny);
720:     if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
721:     ISStrideGetInfo(iy,&to_first,&to_step);
722:     ISStrideGetInfo(ix,&from_first,&from_step);
723:     PetscMalloc2(1,&to8,1,&from8);
724:     to8->n            = nx;
725:     to8->first        = to_first;
726:     to8->step         = to_step;
727:     from8->n          = nx;
728:     from8->first      = from_first;
729:     from8->step       = from_step;
730:     to8->format         = VEC_SCATTER_SEQ_STRIDE;
731:     from8->format       = VEC_SCATTER_SEQ_STRIDE;
732:     ctx->todata       = (void*)to8;
733:     ctx->fromdata     = (void*)from8;
734:     ctx->ops->begin   = VecScatterBegin_SSToSS;
735:     ctx->ops->end     = 0;
736:     ctx->ops->destroy = VecScatterDestroy_SSToSS;
737:     ctx->ops->copy    = VecScatterCopy_SSToSS;
738:     ctx->ops->view    = VecScatterView_SSToSS;
739:     PetscInfo(xin,"Special case: sequential vector stride to stride\n");
740:     goto functionend;
741:   } else if (ix_type == IS_GENERAL_ID && iy_type == IS_STRIDE_ID) {
742:     PetscInt               nx,ny,first,step;
743:     const PetscInt         *idx;
744:     VecScatter_Seq_General *from9 = NULL;
745:     VecScatter_Seq_Stride  *to9   = NULL;

747:     ISGetLocalSize(ix,&nx);
748:     ISGetIndices(ix,&idx);
749:     ISGetLocalSize(iy,&ny);
750:     ISStrideGetInfo(iy,&first,&step);
751:     if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
752:     PetscMalloc2(1,&to9,1,&from9);
753:     PetscMemzero(&from9->memcpy_plan,sizeof(VecScatterMemcpyPlan));
754:     PetscMalloc1(nx,&from9->vslots);
755:     to9->n     = nx;
756:     to9->first = first;
757:     to9->step  = step;
758:     from9->n   = nx;
759: #if defined(PETSC_USE_DEBUG)
760:     VecScatterCheckIndices_Private(ctx->from_n,nx,idx);
761: #endif
762:     PetscMemcpy(from9->vslots,idx,nx*sizeof(PetscInt));
763:     ctx->todata    = (void*)to9; ctx->fromdata = (void*)from9;
764:     if (step == 1) {
765:       PetscInt tmp[2];
766:       tmp[0] = 0; tmp[1] = nx;
767:       VecScatterMemcpyPlanCreate_Index(1,tmp,from9->vslots,1,&from9->memcpy_plan);
768:       ctx->ops->begin = VecScatterBegin_SGToSS_Stride1;
769:     } else {
770:       ctx->ops->begin = VecScatterBegin_SGToSS;
771:     }
772:     ctx->ops->destroy   = VecScatterDestroy_SGToSS;
773:     ctx->ops->end       = 0;
774:     ctx->ops->copy      = VecScatterCopy_SGToSS;
775:     ctx->ops->view      = VecScatterView_SGToSS;
776:     to9->format      = VEC_SCATTER_SEQ_STRIDE;
777:     from9->format    = VEC_SCATTER_SEQ_GENERAL;
778:     PetscInfo(xin,"Special case: sequential vector general to stride\n");
779:     goto functionend;
780:   } else if (ix_type == IS_STRIDE_ID && iy_type == IS_GENERAL_ID) {
781:     PetscInt               nx,ny,first,step;
782:     const PetscInt         *idy;
783:     VecScatter_Seq_General *to10 = NULL;
784:     VecScatter_Seq_Stride  *from10 = NULL;

786:     ISGetLocalSize(ix,&nx);
787:     ISGetIndices(iy,&idy);
788:     ISGetLocalSize(iy,&ny);
789:     ISStrideGetInfo(ix,&first,&step);
790:     if (nx != ny) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match");
791:     PetscMalloc2(1,&to10,1,&from10);
792:     PetscMemzero(&to10->memcpy_plan,sizeof(VecScatterMemcpyPlan));
793:     PetscMalloc1(nx,&to10->vslots);
794:     from10->n     = nx;
795:     from10->first = first;
796:     from10->step  = step;
797:     to10->n       = nx;
798: #if defined(PETSC_USE_DEBUG)
799:     VecScatterCheckIndices_Private(ctx->to_n,ny,idy);
800: #endif
801:     PetscMemcpy(to10->vslots,idy,nx*sizeof(PetscInt));
802:     ctx->todata   = (void*)to10;
803:     ctx->fromdata = (void*)from10;
804:     if (step == 1) {
805:       PetscInt tmp[2];
806:       tmp[0] = 0; tmp[1] = nx;
807:       VecScatterMemcpyPlanCreate_Index(1,tmp,to10->vslots,1,&to10->memcpy_plan);
808:       ctx->ops->begin = VecScatterBegin_SSToSG_Stride1;
809:     } else {
810:       ctx->ops->begin = VecScatterBegin_SSToSG;
811:     }
812:     ctx->ops->destroy = VecScatterDestroy_SSToSG;
813:     ctx->ops->end     = 0;
814:     ctx->ops->copy    = 0;
815:     ctx->ops->view    = VecScatterView_SSToSG;
816:     to10->format   = VEC_SCATTER_SEQ_GENERAL;
817:     from10->format = VEC_SCATTER_SEQ_STRIDE;
818:     PetscInfo(xin,"Special case: sequential vector stride to general\n");
819:     goto functionend;
820:   } else {
821:     PetscInt               nx,ny;
822:     const PetscInt         *idx,*idy;
823:     VecScatter_Seq_General *to11 = NULL,*from11 = NULL;
824:     PetscBool              idnx,idny;

826:     ISGetLocalSize(ix,&nx);
827:     ISGetLocalSize(iy,&ny);
828:     if (nx != ny) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Local scatter sizes don't match, in %D out %D",nx,ny);

830:     ISIdentity(ix,&idnx);
831:     ISIdentity(iy,&idny);
832:     if (idnx && idny) {
833:       VecScatter_Seq_Stride *to13 = NULL,*from13 = NULL;
834:       PetscMalloc2(1,&to13,1,&from13);
835:       to13->n       = nx;
836:       to13->first   = 0;
837:       to13->step    = 1;
838:       from13->n     = nx;
839:       from13->first = 0;
840:       from13->step  = 1;
841:       to13->format    = VEC_SCATTER_SEQ_STRIDE;
842:       from13->format  = VEC_SCATTER_SEQ_STRIDE;
843:       ctx->todata   = (void*)to13;
844:       ctx->fromdata = (void*)from13;
845:       ctx->ops->begin    = VecScatterBegin_SSToSS;
846:       ctx->ops->end      = 0;
847:       ctx->ops->destroy  = VecScatterDestroy_SSToSS;
848:       ctx->ops->copy     = VecScatterCopy_SSToSS;
849:       ctx->ops->view     = VecScatterView_SSToSS;
850:       PetscInfo(xin,"Special case: sequential copy\n");
851:       goto functionend;
852:     }

854:     ISGetIndices(iy,&idy);
855:     ISGetIndices(ix,&idx);
856:     PetscMalloc2(1,&to11,1,&from11);
857:     PetscMalloc2(nx,&to11->vslots,nx,&from11->vslots);
858:     to11->n = nx;
859: #if defined(PETSC_USE_DEBUG)
860:     VecScatterCheckIndices_Private(ctx->to_n,ny,idy);
861: #endif
862:     PetscMemcpy(to11->vslots,idy,nx*sizeof(PetscInt));
863:     from11->n = nx;
864: #if defined(PETSC_USE_DEBUG)
865:     VecScatterCheckIndices_Private(ctx->from_n,nx,idx);
866: #endif
867:     PetscMemcpy(from11->vslots,idx,nx*sizeof(PetscInt));
868:     to11->format        = VEC_SCATTER_SEQ_GENERAL;
869:     from11->format      = VEC_SCATTER_SEQ_GENERAL;
870:     ctx->todata       = (void*)to11;
871:     ctx->fromdata     = (void*)from11;
872:     ctx->ops->begin   = VecScatterBegin_SGToSG;
873:     ctx->ops->end     = 0;
874:     ctx->ops->destroy = VecScatterDestroy_SGToSG;
875:     ctx->ops->copy    = VecScatterCopy_SGToSG;
876:     ctx->ops->view    = VecScatterView_SGToSG;
877:     VecScatterMemcpyPlanCreate_SGToSG(1,to11,from11);
878:     ISRestoreIndices(ix,&idx);
879:     ISRestoreIndices(iy,&idy);
880:     PetscInfo(xin,"Sequential vector scatter with block indices\n");
881:     goto functionend;
882:   }
883:   functionend:
884:   ISDestroy(&tix);
885:   ISDestroy(&tiy);
886:   VecScatterViewFromOptions(ctx,NULL,"-vecscatter_view");
887:   return(0);
888: }

890: PetscErrorCode VecScatterCreate_Seq(VecScatter ctx)
891: {
892:   PetscErrorCode    ierr;

895:   ctx->ops->setup = VecScatterSetUp_Seq;
896:   PetscObjectChangeTypeName((PetscObject)ctx,VECSCATTERSEQ);
897:   return(0);
898: }