Actual source code: vpscat.c
petsc-3.10.5 2019-03-28
1: /*
2: Defines parallel vector scatters.
3: */
5: #include <petsc/private/vecscatterimpl.h>
7: #if defined(PETSC_HAVE_MPI_WIN_CREATE_FEATURE)
9: PetscErrorCode VecScatterView_MPI(VecScatter ctx,PetscViewer viewer)
10: {
11: VecScatter_MPI_General *to =(VecScatter_MPI_General*)ctx->todata;
12: VecScatter_MPI_General *from=(VecScatter_MPI_General*)ctx->fromdata;
13: PetscErrorCode ierr;
14: PetscInt i;
15: PetscMPIInt rank;
16: PetscViewerFormat format;
17: PetscBool iascii;
20: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
21: if (iascii) {
22: MPI_Comm_rank(PetscObjectComm((PetscObject)ctx),&rank);
23: PetscViewerGetFormat(viewer,&format);
24: if (format == PETSC_VIEWER_ASCII_INFO) {
25: PetscInt nsend_max,nrecv_max,lensend_max,lenrecv_max,alldata,itmp;
27: MPI_Reduce(&to->n,&nsend_max,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)ctx));
28: MPI_Reduce(&from->n,&nrecv_max,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)ctx));
29: itmp = to->starts[to->n+1];
30: MPI_Reduce(&itmp,&lensend_max,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)ctx));
31: itmp = from->starts[from->n+1];
32: MPI_Reduce(&itmp,&lenrecv_max,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)ctx));
33: MPI_Reduce(&itmp,&alldata,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)ctx));
35: PetscViewerASCIIPrintf(viewer,"VecScatter statistics\n");
36: PetscViewerASCIIPrintf(viewer," Blocksize %D\n",to->bs);
37: PetscViewerASCIIPrintf(viewer," Maximum number sends %D\n",nsend_max);
38: PetscViewerASCIIPrintf(viewer," Maximum number receives %D\n",nrecv_max);
39: PetscViewerASCIIPrintf(viewer," Maximum data sent %D\n",(int)(lensend_max*to->bs*sizeof(PetscScalar)));
40: PetscViewerASCIIPrintf(viewer," Maximum data received %D\n",(int)(lenrecv_max*to->bs*sizeof(PetscScalar)));
41: PetscViewerASCIIPrintf(viewer," Total data sent %D\n",(int)(alldata*to->bs*sizeof(PetscScalar)));
42: } else {
43: PetscViewerASCIIPrintf(viewer," VecScatter Blocksize %D\n",to->bs);
44: PetscViewerASCIIPushSynchronized(viewer);
45: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number sends = %D; Number to self = %D\n",rank,to->n,to->local.n);
46: if (to->n) {
47: for (i=0; i<to->n; i++) {
48: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D length = %D to whom %d\n",rank,i,to->starts[i+1]-to->starts[i],to->procs[i]);
49: }
50: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Now the indices for all remote sends (in order by process sent to)\n",rank);
51: for (i=0; i<to->starts[to->n]; i++) {
52: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D \n",rank,to->indices[i]);
53: }
54: }
56: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number receives = %D; Number from self = %D\n",rank,from->n,from->local.n);
57: if (from->n) {
58: for (i=0; i<from->n; i++) {
59: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D length %D from whom %d\n",rank,i,from->starts[i+1]-from->starts[i],from->procs[i]);
60: }
62: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Now the indices for all remote receives (in order by process received from)\n",rank);
63: for (i=0; i<from->starts[from->n]; i++) {
64: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D \n",rank,from->indices[i]);
65: }
66: }
67: if (to->local.n) {
68: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Indices for local part of scatter\n",rank);
69: for (i=0; i<to->local.n; i++) { /* the to and from have the opposite meaning from what you would expect */
70: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] From %D to %D \n",rank,to->local.vslots[i],from->local.vslots[i]);
71: }
72: }
74: for (i=0; i<to->msize; i++) {
75: if (to->sharedspacestarts[i+1] > to->sharedspacestarts[i]) {
76: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Via shared memory to local memory partner %d count %d\n",rank,i,to->sharedspacestarts[i+1]-to->sharedspacestarts[i]);
77: }
78: }
79: for (i=0; i<from->msize; i++) {
80: if (from->sharedspacestarts[i+1] > from->sharedspacestarts[i]) {
81: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Via shared memory from local memory partner %d count %d\n",rank,i,from->sharedspacestarts[i+1]-from->sharedspacestarts[i]);
82: }
83: }
85: PetscViewerFlush(viewer);
86: PetscViewerASCIIPopSynchronized(viewer);
88: PetscViewerASCIIPrintf(viewer,"Method used to implement the VecScatter: ");
89: }
90: }
91: return(0);
92: }
94: /* -----------------------------------------------------------------------------------*/
95: /*
96: The next routine determines what part of the local part of the scatter is an
97: exact copy of values into their current location. We check this here and
98: then know that we need not perform that portion of the scatter when the vector is
99: scattering to itself with INSERT_VALUES.
101: This is currently not used but would speed up, for example DMLocalToLocalBegin/End()
103: */
104: PetscErrorCode VecScatterLocalOptimize_Private(VecScatter scatter,VecScatter_Seq_General *to,VecScatter_Seq_General *from)
105: {
106: PetscInt n = to->n,n_nonmatching = 0,i,*to_slots = to->vslots,*from_slots = from->vslots;
108: PetscInt *nto_slots,*nfrom_slots,j = 0;
111: for (i=0; i<n; i++) {
112: if (to_slots[i] != from_slots[i]) n_nonmatching++;
113: }
115: if (!n_nonmatching) {
116: to->nonmatching_computed = PETSC_TRUE;
117: to->n_nonmatching = from->n_nonmatching = 0;
118: PetscInfo1(scatter,"Reduced %D to 0\n", n);
119: } else if (n_nonmatching == n) {
120: to->nonmatching_computed = PETSC_FALSE;
121: PetscInfo(scatter,"All values non-matching\n");
122: } else {
123: to->nonmatching_computed= PETSC_TRUE;
124: to->n_nonmatching = from->n_nonmatching = n_nonmatching;
126: PetscMalloc1(n_nonmatching,&nto_slots);
127: PetscMalloc1(n_nonmatching,&nfrom_slots);
129: to->slots_nonmatching = nto_slots;
130: from->slots_nonmatching = nfrom_slots;
131: for (i=0; i<n; i++) {
132: if (to_slots[i] != from_slots[i]) {
133: nto_slots[j] = to_slots[i];
134: nfrom_slots[j] = from_slots[i];
135: j++;
136: }
137: }
138: PetscInfo2(scatter,"Reduced %D to %D\n",n,n_nonmatching);
139: }
140: return(0);
141: }
143: /* --------------------------------------------------------------------------------------*/
145: /* -------------------------------------------------------------------------------------*/
146: PetscErrorCode VecScatterDestroy_PtoP_MPI3(VecScatter ctx)
147: {
148: VecScatter_MPI_General *to = (VecScatter_MPI_General*)ctx->todata;
149: VecScatter_MPI_General *from = (VecScatter_MPI_General*)ctx->fromdata;
150: PetscErrorCode ierr;
151: PetscInt i;
154: /* release MPI resources obtained with MPI_Send_init() and MPI_Recv_init() */
155: if (to->requests) {
156: for (i=0; i<to->n; i++) {
157: MPI_Request_free(to->requests + i);
158: }
159: }
160: if (to->rev_requests) {
161: for (i=0; i<to->n; i++) {
162: MPI_Request_free(to->rev_requests + i);
163: }
164: }
165: if (from->requests) {
166: for (i=0; i<from->n; i++) {
167: MPI_Request_free(from->requests + i);
168: }
169: }
170: if (from->rev_requests) {
171: for (i=0; i<from->n; i++) {
172: MPI_Request_free(from->rev_requests + i);
173: }
174: }
175: if (to->sharedwin != MPI_WIN_NULL) {MPI_Win_free(&to->sharedwin);}
176: if (from->sharedwin != MPI_WIN_NULL) {MPI_Win_free(&from->sharedwin);}
177: PetscFree(to->sharedspaces);
178: PetscFree(to->sharedspacesoffset);
179: PetscFree(to->sharedspaceindices);
180: PetscFree(to->sharedspacestarts);
182: PetscFree(from->sharedspaceindices);
183: PetscFree(from->sharedspaces);
184: PetscFree(from->sharedspacesoffset);
185: PetscFree(from->sharedspacestarts);
187: PetscFree(to->local.vslots);
188: PetscFree(from->local.vslots);
189: PetscFree(to->local.slots_nonmatching);
190: PetscFree(from->local.slots_nonmatching);
191: PetscFree(to->rev_requests);
192: PetscFree(from->rev_requests);
193: PetscFree(to->requests);
194: PetscFree(from->requests);
195: PetscFree4(to->values,to->indices,to->starts,to->procs);
196: PetscFree2(to->sstatus,to->rstatus);
197: PetscFree4(from->values,from->indices,from->starts,from->procs);
198: VecScatterMemcpyPlanDestroy_PtoP(to,from);
199: PetscFree(from);
200: PetscFree(to);
201: return(0);
202: }
204: /* --------------------------------------------------------------------------------------*/
206: PetscErrorCode VecScatterCopy_PtoP_X(VecScatter in,VecScatter out)
207: {
208: VecScatter_MPI_General *in_to = (VecScatter_MPI_General*)in->todata;
209: VecScatter_MPI_General *in_from = (VecScatter_MPI_General*)in->fromdata,*out_to,*out_from;
210: PetscErrorCode ierr;
211: PetscInt ny,bs = in_from->bs,jj;
212: PetscShmComm scomm;
213: MPI_Comm mscomm;
214: MPI_Info info;
217: out->ops->begin = in->ops->begin;
218: out->ops->end = in->ops->end;
219: out->ops->copy = in->ops->copy;
220: out->ops->destroy = in->ops->destroy;
221: out->ops->view = in->ops->view;
223: /* allocate entire send scatter context */
224: PetscNewLog(out,&out_to);
225: out_to->sharedwin = MPI_WIN_NULL;
226: PetscNewLog(out,&out_from);
227: out_from->sharedwin = MPI_WIN_NULL;
229: ny = in_to->starts[in_to->n];
230: out_to->n = in_to->n;
231: out_to->format = in_to->format;
233: PetscMalloc1(out_to->n,&out_to->requests);
234: PetscMalloc4(bs*ny,&out_to->values,ny,&out_to->indices,out_to->n+1,&out_to->starts,out_to->n,&out_to->procs);
235: PetscMalloc2(PetscMax(in_to->n,in_from->n),&out_to->sstatus,PetscMax(in_to->n,in_from->n),&out_to->rstatus);
236: PetscMemcpy(out_to->indices,in_to->indices,ny*sizeof(PetscInt));
237: PetscMemcpy(out_to->starts,in_to->starts,(out_to->n+1)*sizeof(PetscInt));
238: PetscMemcpy(out_to->procs,in_to->procs,(out_to->n)*sizeof(PetscMPIInt));
240: out->todata = (void*)out_to;
241: out_to->local.n = in_to->local.n;
242: out_to->local.nonmatching_computed = PETSC_FALSE;
243: out_to->local.n_nonmatching = 0;
244: out_to->local.slots_nonmatching = 0;
245: if (in_to->local.n) {
246: PetscMalloc1(in_to->local.n,&out_to->local.vslots);
247: PetscMalloc1(in_from->local.n,&out_from->local.vslots);
248: PetscMemcpy(out_to->local.vslots,in_to->local.vslots,in_to->local.n*sizeof(PetscInt));
249: PetscMemcpy(out_from->local.vslots,in_from->local.vslots,in_from->local.n*sizeof(PetscInt));
250: } else {
251: out_to->local.vslots = 0;
252: out_from->local.vslots = 0;
253: }
255: /* allocate entire receive context */
256: VecScatterMemcpyPlanCopy(&in_to->local.memcpy_plan,&out_to->local.memcpy_plan);
257: VecScatterMemcpyPlanCopy(&in_from->local.memcpy_plan,&out_from->local.memcpy_plan);
258: VecScatterMemcpyPlanCopy(&in_to->memcpy_plan,&out_to->memcpy_plan);
259: VecScatterMemcpyPlanCopy(&in_from->memcpy_plan,&out_from->memcpy_plan);
261: out_from->format = in_from->format;
262: ny = in_from->starts[in_from->n];
263: out_from->n = in_from->n;
265: PetscMalloc1(out_from->n,&out_from->requests);
266: PetscMalloc4(ny*bs,&out_from->values,ny,&out_from->indices,out_from->n+1,&out_from->starts,out_from->n,&out_from->procs);
267: PetscMemcpy(out_from->indices,in_from->indices,ny*sizeof(PetscInt));
268: PetscMemcpy(out_from->starts,in_from->starts,(out_from->n+1)*sizeof(PetscInt));
269: PetscMemcpy(out_from->procs,in_from->procs,(out_from->n)*sizeof(PetscMPIInt));
271: out->fromdata = (void*)out_from;
272: out_from->local.n = in_from->local.n;
273: out_from->local.nonmatching_computed = PETSC_FALSE;
274: out_from->local.n_nonmatching = 0;
275: out_from->local.slots_nonmatching = 0;
277: /*
278: set up the request arrays for use with isend_init() and irecv_init()
279: */
280: {
281: PetscMPIInt tag;
282: MPI_Comm comm;
283: PetscInt *sstarts = out_to->starts, *rstarts = out_from->starts;
284: PetscMPIInt *sprocs = out_to->procs, *rprocs = out_from->procs;
285: PetscInt i;
286: MPI_Request *swaits = out_to->requests,*rwaits = out_from->requests;
287: MPI_Request *rev_swaits,*rev_rwaits;
288: PetscScalar *Ssvalues = out_to->values, *Srvalues = out_from->values;
290: PetscMalloc1(in_to->n,&out_to->rev_requests);
291: PetscMalloc1(in_from->n,&out_from->rev_requests);
293: rev_rwaits = out_to->rev_requests;
294: rev_swaits = out_from->rev_requests;
296: out_from->bs = out_to->bs = bs;
297: tag = ((PetscObject)out)->tag;
298: PetscObjectGetComm((PetscObject)out,&comm);
300: /* Register the receives that you will use later (sends for scatter reverse) */
301: for (i=0; i<out_from->n; i++) {
302: MPI_Recv_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);
303: MPI_Send_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tag,comm,rev_swaits+i);
304: }
306: for (i=0; i<out_to->n; i++) {
307: MPI_Send_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
308: }
310: /* Register receives for scatter reverse */
311: for (i=0; i<out_to->n; i++) {
312: MPI_Recv_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,rev_rwaits+i);
313: }
314: }
316: /* since the to and from data structures are not symmetric for shared memory copies we insure they always listed in "standard" form */
317: if (!in_to->sharedwin) {
318: VecScatter_MPI_General *totmp = in_to,*out_totmp = out_to;
319: in_to = in_from;
320: in_from = totmp;
321: out_to = out_from;
322: out_from = out_totmp;
323: }
325: /* copy the to parts for the shared memory copies between processes */
326: out_to->sharedcnt = in_to->sharedcnt;
327: out_to->msize = in_to->msize;
328: PetscMalloc1(out_to->msize+1,&out_to->sharedspacestarts);
329: PetscMemcpy(out_to->sharedspacestarts,in_to->sharedspacestarts,(out_to->msize+1)*sizeof(PetscInt));
330: PetscMalloc1(out_to->sharedcnt,&out_to->sharedspaceindices);
331: PetscMemcpy(out_to->sharedspaceindices,in_to->sharedspaceindices,out_to->sharedcnt*sizeof(PetscInt));
333: PetscShmCommGet(PetscObjectComm((PetscObject)in),&scomm);
334: PetscShmCommGetMpiShmComm(scomm,&mscomm);
335: MPI_Info_create(&info);
336: MPI_Info_set(info, "alloc_shared_noncontig", "true");
337: MPIU_Win_allocate_shared(bs*out_to->sharedcnt*sizeof(PetscScalar),sizeof(PetscScalar),info,mscomm,&out_to->sharedspace,&out_to->sharedwin);
338: MPI_Info_free(&info);
340: /* copy the to parts for the shared memory copies between processes */
341: out_from->sharedcnt = in_from->sharedcnt;
342: out_from->msize = in_from->msize;
343: PetscMalloc1(out_from->msize+1,&out_from->sharedspacestarts);
344: PetscMemcpy(out_from->sharedspacestarts,in_from->sharedspacestarts,(out_from->msize+1)*sizeof(PetscInt));
345: PetscMalloc1(out_from->sharedcnt,&out_from->sharedspaceindices);
346: PetscMemcpy(out_from->sharedspaceindices,in_from->sharedspaceindices,out_from->sharedcnt*sizeof(PetscInt));
347: PetscMalloc1(out_from->msize,&out_from->sharedspacesoffset);
348: PetscMemcpy(out_from->sharedspacesoffset,in_from->sharedspacesoffset,out_from->msize*sizeof(PetscInt));
349: PetscCalloc1(out_from->msize,&out_from->sharedspaces);
350: for (jj=0; jj<out_from->msize; jj++) {
351: MPI_Aint isize;
352: PetscMPIInt disp_unit;
353: MPIU_Win_shared_query(out_to->sharedwin,jj,&isize,&disp_unit,&out_from->sharedspaces[jj]);
354: }
355: return(0);
356: }
358: PetscErrorCode VecScatterCopy_PtoP_AllToAll(VecScatter in,VecScatter out)
359: {
360: VecScatter_MPI_General *in_to = (VecScatter_MPI_General*)in->todata;
361: VecScatter_MPI_General *in_from = (VecScatter_MPI_General*)in->fromdata,*out_to,*out_from;
362: PetscErrorCode ierr;
363: PetscInt ny,bs = in_from->bs;
364: PetscMPIInt size;
367: MPI_Comm_size(PetscObjectComm((PetscObject)in),&size);
369: out->ops->begin = in->ops->begin;
370: out->ops->end = in->ops->end;
371: out->ops->copy = in->ops->copy;
372: out->ops->destroy = in->ops->destroy;
373: out->ops->view = in->ops->view;
375: /* allocate entire send scatter context */
376: PetscNewLog(out,&out_to);
377: out_to->sharedwin = MPI_WIN_NULL;
378: PetscNewLog(out,&out_from);
379: out_from->sharedwin = MPI_WIN_NULL;
381: ny = in_to->starts[in_to->n];
382: out_to->n = in_to->n;
383: out_to->format = in_to->format;
385: PetscMalloc1(out_to->n,&out_to->requests);
386: PetscMalloc4(bs*ny,&out_to->values,ny,&out_to->indices,out_to->n+1,&out_to->starts,out_to->n,&out_to->procs);
387: PetscMalloc2(PetscMax(in_to->n,in_from->n),&out_to->sstatus,PetscMax(in_to->n,in_from->n),&out_to->rstatus);
388: PetscMemcpy(out_to->indices,in_to->indices,ny*sizeof(PetscInt));
389: PetscMemcpy(out_to->starts,in_to->starts,(out_to->n+1)*sizeof(PetscInt));
390: PetscMemcpy(out_to->procs,in_to->procs,(out_to->n)*sizeof(PetscMPIInt));
392: out->todata = (void*)out_to;
393: out_to->local.n = in_to->local.n;
394: out_to->local.nonmatching_computed = PETSC_FALSE;
395: out_to->local.n_nonmatching = 0;
396: out_to->local.slots_nonmatching = 0;
397: if (in_to->local.n) {
398: PetscMalloc1(in_to->local.n,&out_to->local.vslots);
399: PetscMalloc1(in_from->local.n,&out_from->local.vslots);
400: PetscMemcpy(out_to->local.vslots,in_to->local.vslots,in_to->local.n*sizeof(PetscInt));
401: PetscMemcpy(out_from->local.vslots,in_from->local.vslots,in_from->local.n*sizeof(PetscInt));
402: } else {
403: out_to->local.vslots = 0;
404: out_from->local.vslots = 0;
405: }
407: /* allocate entire receive context */
408: VecScatterMemcpyPlanCopy_PtoP(in_to,in_from,out_to,out_from);
410: out_from->format = in_from->format;
411: ny = in_from->starts[in_from->n];
412: out_from->n = in_from->n;
414: PetscMalloc1(out_from->n,&out_from->requests);
415: PetscMalloc4(ny*bs,&out_from->values,ny,&out_from->indices,out_from->n+1,&out_from->starts,out_from->n,&out_from->procs);
416: PetscMemcpy(out_from->indices,in_from->indices,ny*sizeof(PetscInt));
417: PetscMemcpy(out_from->starts,in_from->starts,(out_from->n+1)*sizeof(PetscInt));
418: PetscMemcpy(out_from->procs,in_from->procs,(out_from->n)*sizeof(PetscMPIInt));
420: out->fromdata = (void*)out_from;
421: out_from->local.n = in_from->local.n;
422: out_from->local.nonmatching_computed = PETSC_FALSE;
423: out_from->local.n_nonmatching = 0;
424: out_from->local.slots_nonmatching = 0;
426: return(0);
427: }
428: /* --------------------------------------------------------------------------------------------------
429: Packs and unpacks the message data into send or from receive buffers.
431: These could be generated automatically.
433: Fortran kernels etc. could be used.
434: */
435: PETSC_STATIC_INLINE void Pack_1(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
436: {
437: PetscInt i;
438: for (i=0; i<n; i++) y[i] = x[indicesx[i]];
439: }
441: PETSC_STATIC_INLINE PetscErrorCode UnPack_1(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
442: {
443: PetscInt i;
446: switch (addv) {
447: case INSERT_VALUES:
448: case INSERT_ALL_VALUES:
449: for (i=0; i<n; i++) y[indicesy[i]] = x[i];
450: break;
451: case ADD_VALUES:
452: case ADD_ALL_VALUES:
453: for (i=0; i<n; i++) y[indicesy[i]] += x[i];
454: break;
455: #if !defined(PETSC_USE_COMPLEX)
456: case MAX_VALUES:
457: for (i=0; i<n; i++) y[indicesy[i]] = PetscMax(y[indicesy[i]],x[i]);
458: #else
459: case MAX_VALUES:
460: #endif
461: case NOT_SET_VALUES:
462: break;
463: default:
464: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
465: }
466: return(0);
467: }
469: PETSC_STATIC_INLINE PetscErrorCode Scatter_1(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
470: {
471: PetscInt i;
474: switch (addv) {
475: case INSERT_VALUES:
476: case INSERT_ALL_VALUES:
477: for (i=0; i<n; i++) y[indicesy[i]] = x[indicesx[i]];
478: break;
479: case ADD_VALUES:
480: case ADD_ALL_VALUES:
481: for (i=0; i<n; i++) y[indicesy[i]] += x[indicesx[i]];
482: break;
483: #if !defined(PETSC_USE_COMPLEX)
484: case MAX_VALUES:
485: for (i=0; i<n; i++) y[indicesy[i]] = PetscMax(y[indicesy[i]],x[indicesx[i]]);
486: #else
487: case MAX_VALUES:
488: #endif
489: case NOT_SET_VALUES:
490: break;
491: default:
492: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
493: }
494: return(0);
495: }
497: /* ----------------------------------------------------------------------------------------------- */
498: PETSC_STATIC_INLINE void Pack_2(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
499: {
500: PetscInt i,idx;
502: for (i=0; i<n; i++) {
503: idx = *indicesx++;
504: y[0] = x[idx];
505: y[1] = x[idx+1];
506: y += 2;
507: }
508: }
510: PETSC_STATIC_INLINE PetscErrorCode UnPack_2(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
511: {
512: PetscInt i,idy;
515: switch (addv) {
516: case INSERT_VALUES:
517: case INSERT_ALL_VALUES:
518: for (i=0; i<n; i++) {
519: idy = *indicesy++;
520: y[idy] = x[0];
521: y[idy+1] = x[1];
522: x += 2;
523: }
524: break;
525: case ADD_VALUES:
526: case ADD_ALL_VALUES:
527: for (i=0; i<n; i++) {
528: idy = *indicesy++;
529: y[idy] += x[0];
530: y[idy+1] += x[1];
531: x += 2;
532: }
533: break;
534: #if !defined(PETSC_USE_COMPLEX)
535: case MAX_VALUES:
536: for (i=0; i<n; i++) {
537: idy = *indicesy++;
538: y[idy] = PetscMax(y[idy],x[0]);
539: y[idy+1] = PetscMax(y[idy+1],x[1]);
540: x += 2;
541: }
542: #else
543: case MAX_VALUES:
544: #endif
545: case NOT_SET_VALUES:
546: break;
547: default:
548: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
549: }
550: return(0);
551: }
553: PETSC_STATIC_INLINE PetscErrorCode Scatter_2(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
554: {
555: PetscInt i,idx,idy;
558: switch (addv) {
559: case INSERT_VALUES:
560: case INSERT_ALL_VALUES:
561: for (i=0; i<n; i++) {
562: idx = *indicesx++;
563: idy = *indicesy++;
564: y[idy] = x[idx];
565: y[idy+1] = x[idx+1];
566: }
567: break;
568: case ADD_VALUES:
569: case ADD_ALL_VALUES:
570: for (i=0; i<n; i++) {
571: idx = *indicesx++;
572: idy = *indicesy++;
573: y[idy] += x[idx];
574: y[idy+1] += x[idx+1];
575: }
576: break;
577: #if !defined(PETSC_USE_COMPLEX)
578: case MAX_VALUES:
579: for (i=0; i<n; i++) {
580: idx = *indicesx++;
581: idy = *indicesy++;
582: y[idy] = PetscMax(y[idy],x[idx]);
583: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
584: }
585: #else
586: case MAX_VALUES:
587: #endif
588: case NOT_SET_VALUES:
589: break;
590: default:
591: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
592: }
593: return(0);
594: }
595: /* ----------------------------------------------------------------------------------------------- */
596: PETSC_STATIC_INLINE void Pack_3(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
597: {
598: PetscInt i,idx;
600: for (i=0; i<n; i++) {
601: idx = *indicesx++;
602: y[0] = x[idx];
603: y[1] = x[idx+1];
604: y[2] = x[idx+2];
605: y += 3;
606: }
607: }
608: PETSC_STATIC_INLINE PetscErrorCode UnPack_3(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
609: {
610: PetscInt i,idy;
613: switch (addv) {
614: case INSERT_VALUES:
615: case INSERT_ALL_VALUES:
616: for (i=0; i<n; i++) {
617: idy = *indicesy++;
618: y[idy] = x[0];
619: y[idy+1] = x[1];
620: y[idy+2] = x[2];
621: x += 3;
622: }
623: break;
624: case ADD_VALUES:
625: case ADD_ALL_VALUES:
626: for (i=0; i<n; i++) {
627: idy = *indicesy++;
628: y[idy] += x[0];
629: y[idy+1] += x[1];
630: y[idy+2] += x[2];
631: x += 3;
632: }
633: break;
634: #if !defined(PETSC_USE_COMPLEX)
635: case MAX_VALUES:
636: for (i=0; i<n; i++) {
637: idy = *indicesy++;
638: y[idy] = PetscMax(y[idy],x[0]);
639: y[idy+1] = PetscMax(y[idy+1],x[1]);
640: y[idy+2] = PetscMax(y[idy+2],x[2]);
641: x += 3;
642: }
643: #else
644: case MAX_VALUES:
645: #endif
646: case NOT_SET_VALUES:
647: break;
648: default:
649: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
650: }
651: return(0);
652: }
654: PETSC_STATIC_INLINE PetscErrorCode Scatter_3(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
655: {
656: PetscInt i,idx,idy;
659: switch (addv) {
660: case INSERT_VALUES:
661: case INSERT_ALL_VALUES:
662: for (i=0; i<n; i++) {
663: idx = *indicesx++;
664: idy = *indicesy++;
665: y[idy] = x[idx];
666: y[idy+1] = x[idx+1];
667: y[idy+2] = x[idx+2];
668: }
669: break;
670: case ADD_VALUES:
671: case ADD_ALL_VALUES:
672: for (i=0; i<n; i++) {
673: idx = *indicesx++;
674: idy = *indicesy++;
675: y[idy] += x[idx];
676: y[idy+1] += x[idx+1];
677: y[idy+2] += x[idx+2];
678: }
679: break;
680: #if !defined(PETSC_USE_COMPLEX)
681: case MAX_VALUES:
682: for (i=0; i<n; i++) {
683: idx = *indicesx++;
684: idy = *indicesy++;
685: y[idy] = PetscMax(y[idy],x[idx]);
686: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
687: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
688: }
689: #else
690: case MAX_VALUES:
691: #endif
692: case NOT_SET_VALUES:
693: break;
694: default:
695: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
696: }
697: return(0);
698: }
699: /* ----------------------------------------------------------------------------------------------- */
700: PETSC_STATIC_INLINE void Pack_4(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
701: {
702: PetscInt i,idx;
704: for (i=0; i<n; i++) {
705: idx = *indicesx++;
706: y[0] = x[idx];
707: y[1] = x[idx+1];
708: y[2] = x[idx+2];
709: y[3] = x[idx+3];
710: y += 4;
711: }
712: }
713: PETSC_STATIC_INLINE PetscErrorCode UnPack_4(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
714: {
715: PetscInt i,idy;
718: switch (addv) {
719: case INSERT_VALUES:
720: case INSERT_ALL_VALUES:
721: for (i=0; i<n; i++) {
722: idy = *indicesy++;
723: y[idy] = x[0];
724: y[idy+1] = x[1];
725: y[idy+2] = x[2];
726: y[idy+3] = x[3];
727: x += 4;
728: }
729: break;
730: case ADD_VALUES:
731: case ADD_ALL_VALUES:
732: for (i=0; i<n; i++) {
733: idy = *indicesy++;
734: y[idy] += x[0];
735: y[idy+1] += x[1];
736: y[idy+2] += x[2];
737: y[idy+3] += x[3];
738: x += 4;
739: }
740: break;
741: #if !defined(PETSC_USE_COMPLEX)
742: case MAX_VALUES:
743: for (i=0; i<n; i++) {
744: idy = *indicesy++;
745: y[idy] = PetscMax(y[idy],x[0]);
746: y[idy+1] = PetscMax(y[idy+1],x[1]);
747: y[idy+2] = PetscMax(y[idy+2],x[2]);
748: y[idy+3] = PetscMax(y[idy+3],x[3]);
749: x += 4;
750: }
751: #else
752: case MAX_VALUES:
753: #endif
754: case NOT_SET_VALUES:
755: break;
756: default:
757: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
758: }
759: return(0);
760: }
762: PETSC_STATIC_INLINE PetscErrorCode Scatter_4(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
763: {
764: PetscInt i,idx,idy;
767: switch (addv) {
768: case INSERT_VALUES:
769: case INSERT_ALL_VALUES:
770: for (i=0; i<n; i++) {
771: idx = *indicesx++;
772: idy = *indicesy++;
773: y[idy] = x[idx];
774: y[idy+1] = x[idx+1];
775: y[idy+2] = x[idx+2];
776: y[idy+3] = x[idx+3];
777: }
778: break;
779: case ADD_VALUES:
780: case ADD_ALL_VALUES:
781: for (i=0; i<n; i++) {
782: idx = *indicesx++;
783: idy = *indicesy++;
784: y[idy] += x[idx];
785: y[idy+1] += x[idx+1];
786: y[idy+2] += x[idx+2];
787: y[idy+3] += x[idx+3];
788: }
789: break;
790: #if !defined(PETSC_USE_COMPLEX)
791: case MAX_VALUES:
792: for (i=0; i<n; i++) {
793: idx = *indicesx++;
794: idy = *indicesy++;
795: y[idy] = PetscMax(y[idy],x[idx]);
796: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
797: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
798: y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
799: }
800: #else
801: case MAX_VALUES:
802: #endif
803: case NOT_SET_VALUES:
804: break;
805: default:
806: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
807: }
808: return(0);
809: }
810: /* ----------------------------------------------------------------------------------------------- */
811: PETSC_STATIC_INLINE void Pack_5(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
812: {
813: PetscInt i,idx;
815: for (i=0; i<n; i++) {
816: idx = *indicesx++;
817: y[0] = x[idx];
818: y[1] = x[idx+1];
819: y[2] = x[idx+2];
820: y[3] = x[idx+3];
821: y[4] = x[idx+4];
822: y += 5;
823: }
824: }
826: PETSC_STATIC_INLINE PetscErrorCode UnPack_5(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
827: {
828: PetscInt i,idy;
831: switch (addv) {
832: case INSERT_VALUES:
833: case INSERT_ALL_VALUES:
834: for (i=0; i<n; i++) {
835: idy = *indicesy++;
836: y[idy] = x[0];
837: y[idy+1] = x[1];
838: y[idy+2] = x[2];
839: y[idy+3] = x[3];
840: y[idy+4] = x[4];
841: x += 5;
842: }
843: break;
844: case ADD_VALUES:
845: case ADD_ALL_VALUES:
846: for (i=0; i<n; i++) {
847: idy = *indicesy++;
848: y[idy] += x[0];
849: y[idy+1] += x[1];
850: y[idy+2] += x[2];
851: y[idy+3] += x[3];
852: y[idy+4] += x[4];
853: x += 5;
854: }
855: break;
856: #if !defined(PETSC_USE_COMPLEX)
857: case MAX_VALUES:
858: for (i=0; i<n; i++) {
859: idy = *indicesy++;
860: y[idy] = PetscMax(y[idy],x[0]);
861: y[idy+1] = PetscMax(y[idy+1],x[1]);
862: y[idy+2] = PetscMax(y[idy+2],x[2]);
863: y[idy+3] = PetscMax(y[idy+3],x[3]);
864: y[idy+4] = PetscMax(y[idy+4],x[4]);
865: x += 5;
866: }
867: #else
868: case MAX_VALUES:
869: #endif
870: case NOT_SET_VALUES:
871: break;
872: default:
873: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
874: }
875: return(0);
876: }
878: PETSC_STATIC_INLINE PetscErrorCode Scatter_5(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
879: {
880: PetscInt i,idx,idy;
883: switch (addv) {
884: case INSERT_VALUES:
885: case INSERT_ALL_VALUES:
886: for (i=0; i<n; i++) {
887: idx = *indicesx++;
888: idy = *indicesy++;
889: y[idy] = x[idx];
890: y[idy+1] = x[idx+1];
891: y[idy+2] = x[idx+2];
892: y[idy+3] = x[idx+3];
893: y[idy+4] = x[idx+4];
894: }
895: break;
896: case ADD_VALUES:
897: case ADD_ALL_VALUES:
898: for (i=0; i<n; i++) {
899: idx = *indicesx++;
900: idy = *indicesy++;
901: y[idy] += x[idx];
902: y[idy+1] += x[idx+1];
903: y[idy+2] += x[idx+2];
904: y[idy+3] += x[idx+3];
905: y[idy+4] += x[idx+4];
906: }
907: break;
908: #if !defined(PETSC_USE_COMPLEX)
909: case MAX_VALUES:
910: for (i=0; i<n; i++) {
911: idx = *indicesx++;
912: idy = *indicesy++;
913: y[idy] = PetscMax(y[idy],x[idx]);
914: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
915: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
916: y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
917: y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
918: }
919: #else
920: case MAX_VALUES:
921: #endif
922: case NOT_SET_VALUES:
923: break;
924: default:
925: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
926: }
927: return(0);
928: }
929: /* ----------------------------------------------------------------------------------------------- */
930: PETSC_STATIC_INLINE void Pack_6(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
931: {
932: PetscInt i,idx;
934: for (i=0; i<n; i++) {
935: idx = *indicesx++;
936: y[0] = x[idx];
937: y[1] = x[idx+1];
938: y[2] = x[idx+2];
939: y[3] = x[idx+3];
940: y[4] = x[idx+4];
941: y[5] = x[idx+5];
942: y += 6;
943: }
944: }
946: PETSC_STATIC_INLINE PetscErrorCode UnPack_6(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
947: {
948: PetscInt i,idy;
951: switch (addv) {
952: case INSERT_VALUES:
953: case INSERT_ALL_VALUES:
954: for (i=0; i<n; i++) {
955: idy = *indicesy++;
956: y[idy] = x[0];
957: y[idy+1] = x[1];
958: y[idy+2] = x[2];
959: y[idy+3] = x[3];
960: y[idy+4] = x[4];
961: y[idy+5] = x[5];
962: x += 6;
963: }
964: break;
965: case ADD_VALUES:
966: case ADD_ALL_VALUES:
967: for (i=0; i<n; i++) {
968: idy = *indicesy++;
969: y[idy] += x[0];
970: y[idy+1] += x[1];
971: y[idy+2] += x[2];
972: y[idy+3] += x[3];
973: y[idy+4] += x[4];
974: y[idy+5] += x[5];
975: x += 6;
976: }
977: break;
978: #if !defined(PETSC_USE_COMPLEX)
979: case MAX_VALUES:
980: for (i=0; i<n; i++) {
981: idy = *indicesy++;
982: y[idy] = PetscMax(y[idy],x[0]);
983: y[idy+1] = PetscMax(y[idy+1],x[1]);
984: y[idy+2] = PetscMax(y[idy+2],x[2]);
985: y[idy+3] = PetscMax(y[idy+3],x[3]);
986: y[idy+4] = PetscMax(y[idy+4],x[4]);
987: y[idy+5] = PetscMax(y[idy+5],x[5]);
988: x += 6;
989: }
990: #else
991: case MAX_VALUES:
992: #endif
993: case NOT_SET_VALUES:
994: break;
995: default:
996: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
997: }
998: return(0);
999: }
1001: PETSC_STATIC_INLINE PetscErrorCode Scatter_6(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1002: {
1003: PetscInt i,idx,idy;
1006: switch (addv) {
1007: case INSERT_VALUES:
1008: case INSERT_ALL_VALUES:
1009: for (i=0; i<n; i++) {
1010: idx = *indicesx++;
1011: idy = *indicesy++;
1012: y[idy] = x[idx];
1013: y[idy+1] = x[idx+1];
1014: y[idy+2] = x[idx+2];
1015: y[idy+3] = x[idx+3];
1016: y[idy+4] = x[idx+4];
1017: y[idy+5] = x[idx+5];
1018: }
1019: break;
1020: case ADD_VALUES:
1021: case ADD_ALL_VALUES:
1022: for (i=0; i<n; i++) {
1023: idx = *indicesx++;
1024: idy = *indicesy++;
1025: y[idy] += x[idx];
1026: y[idy+1] += x[idx+1];
1027: y[idy+2] += x[idx+2];
1028: y[idy+3] += x[idx+3];
1029: y[idy+4] += x[idx+4];
1030: y[idy+5] += x[idx+5];
1031: }
1032: break;
1033: #if !defined(PETSC_USE_COMPLEX)
1034: case MAX_VALUES:
1035: for (i=0; i<n; i++) {
1036: idx = *indicesx++;
1037: idy = *indicesy++;
1038: y[idy] = PetscMax(y[idy],x[idx]);
1039: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
1040: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
1041: y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
1042: y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1043: y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
1044: }
1045: #else
1046: case MAX_VALUES:
1047: #endif
1048: case NOT_SET_VALUES:
1049: break;
1050: default:
1051: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1052: }
1053: return(0);
1054: }
1055: /* ----------------------------------------------------------------------------------------------- */
1056: PETSC_STATIC_INLINE void Pack_7(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1057: {
1058: PetscInt i,idx;
1060: for (i=0; i<n; i++) {
1061: idx = *indicesx++;
1062: y[0] = x[idx];
1063: y[1] = x[idx+1];
1064: y[2] = x[idx+2];
1065: y[3] = x[idx+3];
1066: y[4] = x[idx+4];
1067: y[5] = x[idx+5];
1068: y[6] = x[idx+6];
1069: y += 7;
1070: }
1071: }
1073: PETSC_STATIC_INLINE PetscErrorCode UnPack_7(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1074: {
1075: PetscInt i,idy;
1078: switch (addv) {
1079: case INSERT_VALUES:
1080: case INSERT_ALL_VALUES:
1081: for (i=0; i<n; i++) {
1082: idy = *indicesy++;
1083: y[idy] = x[0];
1084: y[idy+1] = x[1];
1085: y[idy+2] = x[2];
1086: y[idy+3] = x[3];
1087: y[idy+4] = x[4];
1088: y[idy+5] = x[5];
1089: y[idy+6] = x[6];
1090: x += 7;
1091: }
1092: break;
1093: case ADD_VALUES:
1094: case ADD_ALL_VALUES:
1095: for (i=0; i<n; i++) {
1096: idy = *indicesy++;
1097: y[idy] += x[0];
1098: y[idy+1] += x[1];
1099: y[idy+2] += x[2];
1100: y[idy+3] += x[3];
1101: y[idy+4] += x[4];
1102: y[idy+5] += x[5];
1103: y[idy+6] += x[6];
1104: x += 7;
1105: }
1106: break;
1107: #if !defined(PETSC_USE_COMPLEX)
1108: case MAX_VALUES:
1109: for (i=0; i<n; i++) {
1110: idy = *indicesy++;
1111: y[idy] = PetscMax(y[idy],x[0]);
1112: y[idy+1] = PetscMax(y[idy+1],x[1]);
1113: y[idy+2] = PetscMax(y[idy+2],x[2]);
1114: y[idy+3] = PetscMax(y[idy+3],x[3]);
1115: y[idy+4] = PetscMax(y[idy+4],x[4]);
1116: y[idy+5] = PetscMax(y[idy+5],x[5]);
1117: y[idy+6] = PetscMax(y[idy+6],x[6]);
1118: x += 7;
1119: }
1120: #else
1121: case MAX_VALUES:
1122: #endif
1123: case NOT_SET_VALUES:
1124: break;
1125: default:
1126: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1127: }
1128: return(0);
1129: }
1131: PETSC_STATIC_INLINE PetscErrorCode Scatter_7(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1132: {
1133: PetscInt i,idx,idy;
1136: switch (addv) {
1137: case INSERT_VALUES:
1138: case INSERT_ALL_VALUES:
1139: for (i=0; i<n; i++) {
1140: idx = *indicesx++;
1141: idy = *indicesy++;
1142: y[idy] = x[idx];
1143: y[idy+1] = x[idx+1];
1144: y[idy+2] = x[idx+2];
1145: y[idy+3] = x[idx+3];
1146: y[idy+4] = x[idx+4];
1147: y[idy+5] = x[idx+5];
1148: y[idy+6] = x[idx+6];
1149: }
1150: break;
1151: case ADD_VALUES:
1152: case ADD_ALL_VALUES:
1153: for (i=0; i<n; i++) {
1154: idx = *indicesx++;
1155: idy = *indicesy++;
1156: y[idy] += x[idx];
1157: y[idy+1] += x[idx+1];
1158: y[idy+2] += x[idx+2];
1159: y[idy+3] += x[idx+3];
1160: y[idy+4] += x[idx+4];
1161: y[idy+5] += x[idx+5];
1162: y[idy+6] += x[idx+6];
1163: }
1164: break;
1165: #if !defined(PETSC_USE_COMPLEX)
1166: case MAX_VALUES:
1167: for (i=0; i<n; i++) {
1168: idx = *indicesx++;
1169: idy = *indicesy++;
1170: y[idy] = PetscMax(y[idy],x[idx]);
1171: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
1172: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
1173: y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
1174: y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1175: y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
1176: y[idy+6] = PetscMax(y[idy+6],x[idx+6]);
1177: }
1178: #else
1179: case MAX_VALUES:
1180: #endif
1181: case NOT_SET_VALUES:
1182: break;
1183: default:
1184: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1185: }
1186: return(0);
1187: }
1188: /* ----------------------------------------------------------------------------------------------- */
1189: PETSC_STATIC_INLINE void Pack_8(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1190: {
1191: PetscInt i,idx;
1193: for (i=0; i<n; i++) {
1194: idx = *indicesx++;
1195: y[0] = x[idx];
1196: y[1] = x[idx+1];
1197: y[2] = x[idx+2];
1198: y[3] = x[idx+3];
1199: y[4] = x[idx+4];
1200: y[5] = x[idx+5];
1201: y[6] = x[idx+6];
1202: y[7] = x[idx+7];
1203: y += 8;
1204: }
1205: }
1207: PETSC_STATIC_INLINE PetscErrorCode UnPack_8(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1208: {
1209: PetscInt i,idy;
1212: switch (addv) {
1213: case INSERT_VALUES:
1214: case INSERT_ALL_VALUES:
1215: for (i=0; i<n; i++) {
1216: idy = *indicesy++;
1217: y[idy] = x[0];
1218: y[idy+1] = x[1];
1219: y[idy+2] = x[2];
1220: y[idy+3] = x[3];
1221: y[idy+4] = x[4];
1222: y[idy+5] = x[5];
1223: y[idy+6] = x[6];
1224: y[idy+7] = x[7];
1225: x += 8;
1226: }
1227: break;
1228: case ADD_VALUES:
1229: case ADD_ALL_VALUES:
1230: for (i=0; i<n; i++) {
1231: idy = *indicesy++;
1232: y[idy] += x[0];
1233: y[idy+1] += x[1];
1234: y[idy+2] += x[2];
1235: y[idy+3] += x[3];
1236: y[idy+4] += x[4];
1237: y[idy+5] += x[5];
1238: y[idy+6] += x[6];
1239: y[idy+7] += x[7];
1240: x += 8;
1241: }
1242: break;
1243: #if !defined(PETSC_USE_COMPLEX)
1244: case MAX_VALUES:
1245: for (i=0; i<n; i++) {
1246: idy = *indicesy++;
1247: y[idy] = PetscMax(y[idy],x[0]);
1248: y[idy+1] = PetscMax(y[idy+1],x[1]);
1249: y[idy+2] = PetscMax(y[idy+2],x[2]);
1250: y[idy+3] = PetscMax(y[idy+3],x[3]);
1251: y[idy+4] = PetscMax(y[idy+4],x[4]);
1252: y[idy+5] = PetscMax(y[idy+5],x[5]);
1253: y[idy+6] = PetscMax(y[idy+6],x[6]);
1254: y[idy+7] = PetscMax(y[idy+7],x[7]);
1255: x += 8;
1256: }
1257: #else
1258: case MAX_VALUES:
1259: #endif
1260: case NOT_SET_VALUES:
1261: break;
1262: default:
1263: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1264: }
1265: return(0);
1266: }
1268: PETSC_STATIC_INLINE PetscErrorCode Scatter_8(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1269: {
1270: PetscInt i,idx,idy;
1273: switch (addv) {
1274: case INSERT_VALUES:
1275: case INSERT_ALL_VALUES:
1276: for (i=0; i<n; i++) {
1277: idx = *indicesx++;
1278: idy = *indicesy++;
1279: y[idy] = x[idx];
1280: y[idy+1] = x[idx+1];
1281: y[idy+2] = x[idx+2];
1282: y[idy+3] = x[idx+3];
1283: y[idy+4] = x[idx+4];
1284: y[idy+5] = x[idx+5];
1285: y[idy+6] = x[idx+6];
1286: y[idy+7] = x[idx+7];
1287: }
1288: break;
1289: case ADD_VALUES:
1290: case ADD_ALL_VALUES:
1291: for (i=0; i<n; i++) {
1292: idx = *indicesx++;
1293: idy = *indicesy++;
1294: y[idy] += x[idx];
1295: y[idy+1] += x[idx+1];
1296: y[idy+2] += x[idx+2];
1297: y[idy+3] += x[idx+3];
1298: y[idy+4] += x[idx+4];
1299: y[idy+5] += x[idx+5];
1300: y[idy+6] += x[idx+6];
1301: y[idy+7] += x[idx+7];
1302: }
1303: break;
1304: #if !defined(PETSC_USE_COMPLEX)
1305: case MAX_VALUES:
1306: for (i=0; i<n; i++) {
1307: idx = *indicesx++;
1308: idy = *indicesy++;
1309: y[idy] = PetscMax(y[idy],x[idx]);
1310: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
1311: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
1312: y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
1313: y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1314: y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
1315: y[idy+6] = PetscMax(y[idy+6],x[idx+6]);
1316: y[idy+7] = PetscMax(y[idy+7],x[idx+7]);
1317: }
1318: #else
1319: case MAX_VALUES:
1320: #endif
1321: case NOT_SET_VALUES:
1322: break;
1323: default:
1324: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1325: }
1326: return(0);
1327: }
1329: PETSC_STATIC_INLINE void Pack_9(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1330: {
1331: PetscInt i,idx;
1333: for (i=0; i<n; i++) {
1334: idx = *indicesx++;
1335: y[0] = x[idx];
1336: y[1] = x[idx+1];
1337: y[2] = x[idx+2];
1338: y[3] = x[idx+3];
1339: y[4] = x[idx+4];
1340: y[5] = x[idx+5];
1341: y[6] = x[idx+6];
1342: y[7] = x[idx+7];
1343: y[8] = x[idx+8];
1344: y += 9;
1345: }
1346: }
1348: PETSC_STATIC_INLINE PetscErrorCode UnPack_9(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1349: {
1350: PetscInt i,idy;
1353: switch (addv) {
1354: case INSERT_VALUES:
1355: case INSERT_ALL_VALUES:
1356: for (i=0; i<n; i++) {
1357: idy = *indicesy++;
1358: y[idy] = x[0];
1359: y[idy+1] = x[1];
1360: y[idy+2] = x[2];
1361: y[idy+3] = x[3];
1362: y[idy+4] = x[4];
1363: y[idy+5] = x[5];
1364: y[idy+6] = x[6];
1365: y[idy+7] = x[7];
1366: y[idy+8] = x[8];
1367: x += 9;
1368: }
1369: break;
1370: case ADD_VALUES:
1371: case ADD_ALL_VALUES:
1372: for (i=0; i<n; i++) {
1373: idy = *indicesy++;
1374: y[idy] += x[0];
1375: y[idy+1] += x[1];
1376: y[idy+2] += x[2];
1377: y[idy+3] += x[3];
1378: y[idy+4] += x[4];
1379: y[idy+5] += x[5];
1380: y[idy+6] += x[6];
1381: y[idy+7] += x[7];
1382: y[idy+8] += x[8];
1383: x += 9;
1384: }
1385: break;
1386: #if !defined(PETSC_USE_COMPLEX)
1387: case MAX_VALUES:
1388: for (i=0; i<n; i++) {
1389: idy = *indicesy++;
1390: y[idy] = PetscMax(y[idy],x[0]);
1391: y[idy+1] = PetscMax(y[idy+1],x[1]);
1392: y[idy+2] = PetscMax(y[idy+2],x[2]);
1393: y[idy+3] = PetscMax(y[idy+3],x[3]);
1394: y[idy+4] = PetscMax(y[idy+4],x[4]);
1395: y[idy+5] = PetscMax(y[idy+5],x[5]);
1396: y[idy+6] = PetscMax(y[idy+6],x[6]);
1397: y[idy+7] = PetscMax(y[idy+7],x[7]);
1398: y[idy+8] = PetscMax(y[idy+8],x[8]);
1399: x += 9;
1400: }
1401: #else
1402: case MAX_VALUES:
1403: #endif
1404: case NOT_SET_VALUES:
1405: break;
1406: default:
1407: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1408: }
1409: return(0);
1410: }
1412: PETSC_STATIC_INLINE PetscErrorCode Scatter_9(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1413: {
1414: PetscInt i,idx,idy;
1417: switch (addv) {
1418: case INSERT_VALUES:
1419: case INSERT_ALL_VALUES:
1420: for (i=0; i<n; i++) {
1421: idx = *indicesx++;
1422: idy = *indicesy++;
1423: y[idy] = x[idx];
1424: y[idy+1] = x[idx+1];
1425: y[idy+2] = x[idx+2];
1426: y[idy+3] = x[idx+3];
1427: y[idy+4] = x[idx+4];
1428: y[idy+5] = x[idx+5];
1429: y[idy+6] = x[idx+6];
1430: y[idy+7] = x[idx+7];
1431: y[idy+8] = x[idx+8];
1432: }
1433: break;
1434: case ADD_VALUES:
1435: case ADD_ALL_VALUES:
1436: for (i=0; i<n; i++) {
1437: idx = *indicesx++;
1438: idy = *indicesy++;
1439: y[idy] += x[idx];
1440: y[idy+1] += x[idx+1];
1441: y[idy+2] += x[idx+2];
1442: y[idy+3] += x[idx+3];
1443: y[idy+4] += x[idx+4];
1444: y[idy+5] += x[idx+5];
1445: y[idy+6] += x[idx+6];
1446: y[idy+7] += x[idx+7];
1447: y[idy+8] += x[idx+8];
1448: }
1449: break;
1450: #if !defined(PETSC_USE_COMPLEX)
1451: case MAX_VALUES:
1452: for (i=0; i<n; i++) {
1453: idx = *indicesx++;
1454: idy = *indicesy++;
1455: y[idy] = PetscMax(y[idy],x[idx]);
1456: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
1457: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
1458: y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
1459: y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1460: y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
1461: y[idy+6] = PetscMax(y[idy+6],x[idx+6]);
1462: y[idy+7] = PetscMax(y[idy+7],x[idx+7]);
1463: y[idy+8] = PetscMax(y[idy+8],x[idx+8]);
1464: }
1465: #else
1466: case MAX_VALUES:
1467: #endif
1468: case NOT_SET_VALUES:
1469: break;
1470: default:
1471: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1472: }
1473: return(0);
1474: }
1476: PETSC_STATIC_INLINE void Pack_10(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1477: {
1478: PetscInt i,idx;
1480: for (i=0; i<n; i++) {
1481: idx = *indicesx++;
1482: y[0] = x[idx];
1483: y[1] = x[idx+1];
1484: y[2] = x[idx+2];
1485: y[3] = x[idx+3];
1486: y[4] = x[idx+4];
1487: y[5] = x[idx+5];
1488: y[6] = x[idx+6];
1489: y[7] = x[idx+7];
1490: y[8] = x[idx+8];
1491: y[9] = x[idx+9];
1492: y += 10;
1493: }
1494: }
1496: PETSC_STATIC_INLINE PetscErrorCode UnPack_10(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1497: {
1498: PetscInt i,idy;
1501: switch (addv) {
1502: case INSERT_VALUES:
1503: case INSERT_ALL_VALUES:
1504: for (i=0; i<n; i++) {
1505: idy = *indicesy++;
1506: y[idy] = x[0];
1507: y[idy+1] = x[1];
1508: y[idy+2] = x[2];
1509: y[idy+3] = x[3];
1510: y[idy+4] = x[4];
1511: y[idy+5] = x[5];
1512: y[idy+6] = x[6];
1513: y[idy+7] = x[7];
1514: y[idy+8] = x[8];
1515: y[idy+9] = x[9];
1516: x += 10;
1517: }
1518: break;
1519: case ADD_VALUES:
1520: case ADD_ALL_VALUES:
1521: for (i=0; i<n; i++) {
1522: idy = *indicesy++;
1523: y[idy] += x[0];
1524: y[idy+1] += x[1];
1525: y[idy+2] += x[2];
1526: y[idy+3] += x[3];
1527: y[idy+4] += x[4];
1528: y[idy+5] += x[5];
1529: y[idy+6] += x[6];
1530: y[idy+7] += x[7];
1531: y[idy+8] += x[8];
1532: y[idy+9] += x[9];
1533: x += 10;
1534: }
1535: break;
1536: #if !defined(PETSC_USE_COMPLEX)
1537: case MAX_VALUES:
1538: for (i=0; i<n; i++) {
1539: idy = *indicesy++;
1540: y[idy] = PetscMax(y[idy],x[0]);
1541: y[idy+1] = PetscMax(y[idy+1],x[1]);
1542: y[idy+2] = PetscMax(y[idy+2],x[2]);
1543: y[idy+3] = PetscMax(y[idy+3],x[3]);
1544: y[idy+4] = PetscMax(y[idy+4],x[4]);
1545: y[idy+5] = PetscMax(y[idy+5],x[5]);
1546: y[idy+6] = PetscMax(y[idy+6],x[6]);
1547: y[idy+7] = PetscMax(y[idy+7],x[7]);
1548: y[idy+8] = PetscMax(y[idy+8],x[8]);
1549: y[idy+9] = PetscMax(y[idy+9],x[9]);
1550: x += 10;
1551: }
1552: #else
1553: case MAX_VALUES:
1554: #endif
1555: case NOT_SET_VALUES:
1556: break;
1557: default:
1558: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1559: }
1560: return(0);
1561: }
1563: PETSC_STATIC_INLINE PetscErrorCode Scatter_10(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1564: {
1565: PetscInt i,idx,idy;
1568: switch (addv) {
1569: case INSERT_VALUES:
1570: case INSERT_ALL_VALUES:
1571: for (i=0; i<n; i++) {
1572: idx = *indicesx++;
1573: idy = *indicesy++;
1574: y[idy] = x[idx];
1575: y[idy+1] = x[idx+1];
1576: y[idy+2] = x[idx+2];
1577: y[idy+3] = x[idx+3];
1578: y[idy+4] = x[idx+4];
1579: y[idy+5] = x[idx+5];
1580: y[idy+6] = x[idx+6];
1581: y[idy+7] = x[idx+7];
1582: y[idy+8] = x[idx+8];
1583: y[idy+9] = x[idx+9];
1584: }
1585: break;
1586: case ADD_VALUES:
1587: case ADD_ALL_VALUES:
1588: for (i=0; i<n; i++) {
1589: idx = *indicesx++;
1590: idy = *indicesy++;
1591: y[idy] += x[idx];
1592: y[idy+1] += x[idx+1];
1593: y[idy+2] += x[idx+2];
1594: y[idy+3] += x[idx+3];
1595: y[idy+4] += x[idx+4];
1596: y[idy+5] += x[idx+5];
1597: y[idy+6] += x[idx+6];
1598: y[idy+7] += x[idx+7];
1599: y[idy+8] += x[idx+8];
1600: y[idy+9] += x[idx+9];
1601: }
1602: break;
1603: #if !defined(PETSC_USE_COMPLEX)
1604: case MAX_VALUES:
1605: for (i=0; i<n; i++) {
1606: idx = *indicesx++;
1607: idy = *indicesy++;
1608: y[idy] = PetscMax(y[idy],x[idx]);
1609: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
1610: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
1611: y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
1612: y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1613: y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
1614: y[idy+6] = PetscMax(y[idy+6],x[idx+6]);
1615: y[idy+7] = PetscMax(y[idy+7],x[idx+7]);
1616: y[idy+8] = PetscMax(y[idy+8],x[idx+8]);
1617: y[idy+9] = PetscMax(y[idy+9],x[idx+9]);
1618: }
1619: #else
1620: case MAX_VALUES:
1621: #endif
1622: case NOT_SET_VALUES:
1623: break;
1624: default:
1625: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1626: }
1627: return(0);
1628: }
1630: PETSC_STATIC_INLINE void Pack_11(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1631: {
1632: PetscInt i,idx;
1634: for (i=0; i<n; i++) {
1635: idx = *indicesx++;
1636: y[0] = x[idx];
1637: y[1] = x[idx+1];
1638: y[2] = x[idx+2];
1639: y[3] = x[idx+3];
1640: y[4] = x[idx+4];
1641: y[5] = x[idx+5];
1642: y[6] = x[idx+6];
1643: y[7] = x[idx+7];
1644: y[8] = x[idx+8];
1645: y[9] = x[idx+9];
1646: y[10] = x[idx+10];
1647: y += 11;
1648: }
1649: }
1651: PETSC_STATIC_INLINE PetscErrorCode UnPack_11(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1652: {
1653: PetscInt i,idy;
1656: switch (addv) {
1657: case INSERT_VALUES:
1658: case INSERT_ALL_VALUES:
1659: for (i=0; i<n; i++) {
1660: idy = *indicesy++;
1661: y[idy] = x[0];
1662: y[idy+1] = x[1];
1663: y[idy+2] = x[2];
1664: y[idy+3] = x[3];
1665: y[idy+4] = x[4];
1666: y[idy+5] = x[5];
1667: y[idy+6] = x[6];
1668: y[idy+7] = x[7];
1669: y[idy+8] = x[8];
1670: y[idy+9] = x[9];
1671: y[idy+10] = x[10];
1672: x += 11;
1673: }
1674: break;
1675: case ADD_VALUES:
1676: case ADD_ALL_VALUES:
1677: for (i=0; i<n; i++) {
1678: idy = *indicesy++;
1679: y[idy] += x[0];
1680: y[idy+1] += x[1];
1681: y[idy+2] += x[2];
1682: y[idy+3] += x[3];
1683: y[idy+4] += x[4];
1684: y[idy+5] += x[5];
1685: y[idy+6] += x[6];
1686: y[idy+7] += x[7];
1687: y[idy+8] += x[8];
1688: y[idy+9] += x[9];
1689: y[idy+10] += x[10];
1690: x += 11;
1691: }
1692: break;
1693: #if !defined(PETSC_USE_COMPLEX)
1694: case MAX_VALUES:
1695: for (i=0; i<n; i++) {
1696: idy = *indicesy++;
1697: y[idy] = PetscMax(y[idy],x[0]);
1698: y[idy+1] = PetscMax(y[idy+1],x[1]);
1699: y[idy+2] = PetscMax(y[idy+2],x[2]);
1700: y[idy+3] = PetscMax(y[idy+3],x[3]);
1701: y[idy+4] = PetscMax(y[idy+4],x[4]);
1702: y[idy+5] = PetscMax(y[idy+5],x[5]);
1703: y[idy+6] = PetscMax(y[idy+6],x[6]);
1704: y[idy+7] = PetscMax(y[idy+7],x[7]);
1705: y[idy+8] = PetscMax(y[idy+8],x[8]);
1706: y[idy+9] = PetscMax(y[idy+9],x[9]);
1707: y[idy+10] = PetscMax(y[idy+10],x[10]);
1708: x += 11;
1709: }
1710: #else
1711: case MAX_VALUES:
1712: #endif
1713: case NOT_SET_VALUES:
1714: break;
1715: default:
1716: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1717: }
1718: return(0);
1719: }
1721: PETSC_STATIC_INLINE PetscErrorCode Scatter_11(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1722: {
1723: PetscInt i,idx,idy;
1726: switch (addv) {
1727: case INSERT_VALUES:
1728: case INSERT_ALL_VALUES:
1729: for (i=0; i<n; i++) {
1730: idx = *indicesx++;
1731: idy = *indicesy++;
1732: y[idy] = x[idx];
1733: y[idy+1] = x[idx+1];
1734: y[idy+2] = x[idx+2];
1735: y[idy+3] = x[idx+3];
1736: y[idy+4] = x[idx+4];
1737: y[idy+5] = x[idx+5];
1738: y[idy+6] = x[idx+6];
1739: y[idy+7] = x[idx+7];
1740: y[idy+8] = x[idx+8];
1741: y[idy+9] = x[idx+9];
1742: y[idy+10] = x[idx+10];
1743: }
1744: break;
1745: case ADD_VALUES:
1746: case ADD_ALL_VALUES:
1747: for (i=0; i<n; i++) {
1748: idx = *indicesx++;
1749: idy = *indicesy++;
1750: y[idy] += x[idx];
1751: y[idy+1] += x[idx+1];
1752: y[idy+2] += x[idx+2];
1753: y[idy+3] += x[idx+3];
1754: y[idy+4] += x[idx+4];
1755: y[idy+5] += x[idx+5];
1756: y[idy+6] += x[idx+6];
1757: y[idy+7] += x[idx+7];
1758: y[idy+8] += x[idx+8];
1759: y[idy+9] += x[idx+9];
1760: y[idy+10] += x[idx+10];
1761: }
1762: break;
1763: #if !defined(PETSC_USE_COMPLEX)
1764: case MAX_VALUES:
1765: for (i=0; i<n; i++) {
1766: idx = *indicesx++;
1767: idy = *indicesy++;
1768: y[idy] = PetscMax(y[idy],x[idx]);
1769: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
1770: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
1771: y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
1772: y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1773: y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
1774: y[idy+6] = PetscMax(y[idy+6],x[idx+6]);
1775: y[idy+7] = PetscMax(y[idy+7],x[idx+7]);
1776: y[idy+8] = PetscMax(y[idy+8],x[idx+8]);
1777: y[idy+9] = PetscMax(y[idy+9],x[idx+9]);
1778: y[idy+10] = PetscMax(y[idy+10],x[idx+10]);
1779: }
1780: #else
1781: case MAX_VALUES:
1782: #endif
1783: case NOT_SET_VALUES:
1784: break;
1785: default:
1786: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1787: }
1788: return(0);
1789: }
1791: /* ----------------------------------------------------------------------------------------------- */
1792: PETSC_STATIC_INLINE void Pack_12(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1793: {
1794: PetscInt i,idx;
1796: for (i=0; i<n; i++) {
1797: idx = *indicesx++;
1798: y[0] = x[idx];
1799: y[1] = x[idx+1];
1800: y[2] = x[idx+2];
1801: y[3] = x[idx+3];
1802: y[4] = x[idx+4];
1803: y[5] = x[idx+5];
1804: y[6] = x[idx+6];
1805: y[7] = x[idx+7];
1806: y[8] = x[idx+8];
1807: y[9] = x[idx+9];
1808: y[10] = x[idx+10];
1809: y[11] = x[idx+11];
1810: y += 12;
1811: }
1812: }
1814: PETSC_STATIC_INLINE PetscErrorCode UnPack_12(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1815: {
1816: PetscInt i,idy;
1819: switch (addv) {
1820: case INSERT_VALUES:
1821: case INSERT_ALL_VALUES:
1822: for (i=0; i<n; i++) {
1823: idy = *indicesy++;
1824: y[idy] = x[0];
1825: y[idy+1] = x[1];
1826: y[idy+2] = x[2];
1827: y[idy+3] = x[3];
1828: y[idy+4] = x[4];
1829: y[idy+5] = x[5];
1830: y[idy+6] = x[6];
1831: y[idy+7] = x[7];
1832: y[idy+8] = x[8];
1833: y[idy+9] = x[9];
1834: y[idy+10] = x[10];
1835: y[idy+11] = x[11];
1836: x += 12;
1837: }
1838: break;
1839: case ADD_VALUES:
1840: case ADD_ALL_VALUES:
1841: for (i=0; i<n; i++) {
1842: idy = *indicesy++;
1843: y[idy] += x[0];
1844: y[idy+1] += x[1];
1845: y[idy+2] += x[2];
1846: y[idy+3] += x[3];
1847: y[idy+4] += x[4];
1848: y[idy+5] += x[5];
1849: y[idy+6] += x[6];
1850: y[idy+7] += x[7];
1851: y[idy+8] += x[8];
1852: y[idy+9] += x[9];
1853: y[idy+10] += x[10];
1854: y[idy+11] += x[11];
1855: x += 12;
1856: }
1857: break;
1858: #if !defined(PETSC_USE_COMPLEX)
1859: case MAX_VALUES:
1860: for (i=0; i<n; i++) {
1861: idy = *indicesy++;
1862: y[idy] = PetscMax(y[idy],x[0]);
1863: y[idy+1] = PetscMax(y[idy+1],x[1]);
1864: y[idy+2] = PetscMax(y[idy+2],x[2]);
1865: y[idy+3] = PetscMax(y[idy+3],x[3]);
1866: y[idy+4] = PetscMax(y[idy+4],x[4]);
1867: y[idy+5] = PetscMax(y[idy+5],x[5]);
1868: y[idy+6] = PetscMax(y[idy+6],x[6]);
1869: y[idy+7] = PetscMax(y[idy+7],x[7]);
1870: y[idy+8] = PetscMax(y[idy+8],x[8]);
1871: y[idy+9] = PetscMax(y[idy+9],x[9]);
1872: y[idy+10] = PetscMax(y[idy+10],x[10]);
1873: y[idy+11] = PetscMax(y[idy+11],x[11]);
1874: x += 12;
1875: }
1876: #else
1877: case MAX_VALUES:
1878: #endif
1879: case NOT_SET_VALUES:
1880: break;
1881: default:
1882: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1883: }
1884: return(0);
1885: }
1887: PETSC_STATIC_INLINE PetscErrorCode Scatter_12(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1888: {
1889: PetscInt i,idx,idy;
1892: switch (addv) {
1893: case INSERT_VALUES:
1894: case INSERT_ALL_VALUES:
1895: for (i=0; i<n; i++) {
1896: idx = *indicesx++;
1897: idy = *indicesy++;
1898: y[idy] = x[idx];
1899: y[idy+1] = x[idx+1];
1900: y[idy+2] = x[idx+2];
1901: y[idy+3] = x[idx+3];
1902: y[idy+4] = x[idx+4];
1903: y[idy+5] = x[idx+5];
1904: y[idy+6] = x[idx+6];
1905: y[idy+7] = x[idx+7];
1906: y[idy+8] = x[idx+8];
1907: y[idy+9] = x[idx+9];
1908: y[idy+10] = x[idx+10];
1909: y[idy+11] = x[idx+11];
1910: }
1911: break;
1912: case ADD_VALUES:
1913: case ADD_ALL_VALUES:
1914: for (i=0; i<n; i++) {
1915: idx = *indicesx++;
1916: idy = *indicesy++;
1917: y[idy] += x[idx];
1918: y[idy+1] += x[idx+1];
1919: y[idy+2] += x[idx+2];
1920: y[idy+3] += x[idx+3];
1921: y[idy+4] += x[idx+4];
1922: y[idy+5] += x[idx+5];
1923: y[idy+6] += x[idx+6];
1924: y[idy+7] += x[idx+7];
1925: y[idy+8] += x[idx+8];
1926: y[idy+9] += x[idx+9];
1927: y[idy+10] += x[idx+10];
1928: y[idy+11] += x[idx+11];
1929: }
1930: break;
1931: #if !defined(PETSC_USE_COMPLEX)
1932: case MAX_VALUES:
1933: for (i=0; i<n; i++) {
1934: idx = *indicesx++;
1935: idy = *indicesy++;
1936: y[idy] = PetscMax(y[idy],x[idx]);
1937: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
1938: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
1939: y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
1940: y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1941: y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
1942: y[idy+6] = PetscMax(y[idy+6],x[idx+6]);
1943: y[idy+7] = PetscMax(y[idy+7],x[idx+7]);
1944: y[idy+8] = PetscMax(y[idy+8],x[idx+8]);
1945: y[idy+9] = PetscMax(y[idy+9],x[idx+9]);
1946: y[idy+10] = PetscMax(y[idy+10],x[idx+10]);
1947: y[idy+11] = PetscMax(y[idy+11],x[idx+11]);
1948: }
1949: #else
1950: case MAX_VALUES:
1951: #endif
1952: case NOT_SET_VALUES:
1953: break;
1954: default:
1955: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1956: }
1957: return(0);
1958: }
1960: /* ----------------------------------------------------------------------------------------------- */
1961: PETSC_STATIC_INLINE void Pack_bs(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1962: {
1963: PetscInt i,idx;
1965: for (i=0; i<n; i++) {
1966: idx = *indicesx++;
1967: PetscMemcpy(y,x + idx,bs*sizeof(PetscScalar));
1968: y += bs;
1969: }
1970: }
1972: PETSC_STATIC_INLINE PetscErrorCode UnPack_bs(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1973: {
1974: PetscInt i,idy,j;
1977: switch (addv) {
1978: case INSERT_VALUES:
1979: case INSERT_ALL_VALUES:
1980: for (i=0; i<n; i++) {
1981: idy = *indicesy++;
1982: PetscMemcpy(y + idy,x,bs*sizeof(PetscScalar));
1983: x += bs;
1984: }
1985: break;
1986: case ADD_VALUES:
1987: case ADD_ALL_VALUES:
1988: for (i=0; i<n; i++) {
1989: idy = *indicesy++;
1990: for (j=0; j<bs; j++) y[idy+j] += x[j];
1991: x += bs;
1992: }
1993: break;
1994: #if !defined(PETSC_USE_COMPLEX)
1995: case MAX_VALUES:
1996: for (i=0; i<n; i++) {
1997: idy = *indicesy++;
1998: for (j=0; j<bs; j++) y[idy+j] = PetscMax(y[idy+j],x[j]);
1999: x += bs;
2000: }
2001: #else
2002: case MAX_VALUES:
2003: #endif
2004: case NOT_SET_VALUES:
2005: break;
2006: default:
2007: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
2008: }
2009: return(0);
2010: }
2012: PETSC_STATIC_INLINE PetscErrorCode Scatter_bs(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
2013: {
2014: PetscInt i,idx,idy,j;
2017: switch (addv) {
2018: case INSERT_VALUES:
2019: case INSERT_ALL_VALUES:
2020: for (i=0; i<n; i++) {
2021: idx = *indicesx++;
2022: idy = *indicesy++;
2023: PetscMemcpy(y + idy, x + idx,bs*sizeof(PetscScalar));
2024: }
2025: break;
2026: case ADD_VALUES:
2027: case ADD_ALL_VALUES:
2028: for (i=0; i<n; i++) {
2029: idx = *indicesx++;
2030: idy = *indicesy++;
2031: for (j=0; j<bs; j++ ) y[idy+j] += x[idx+j];
2032: }
2033: break;
2034: #if !defined(PETSC_USE_COMPLEX)
2035: case MAX_VALUES:
2036: for (i=0; i<n; i++) {
2037: idx = *indicesx++;
2038: idy = *indicesy++;
2039: for (j=0; j<bs; j++ ) y[idy+j] = PetscMax(y[idy+j],x[idx+j]);
2040: }
2041: #else
2042: case MAX_VALUES:
2043: #endif
2044: case NOT_SET_VALUES:
2045: break;
2046: default:
2047: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
2048: }
2049: return(0);
2050: }
2052: /* Create the VecScatterBegin/End_P for our chosen block sizes */
2053: #define BS 1
2054: #include <../src/vec/vscat/impls/vpscat.h>
2055: #define BS 2
2056: #include <../src/vec/vscat/impls/vpscat.h>
2057: #define BS 3
2058: #include <../src/vec/vscat/impls/vpscat.h>
2059: #define BS 4
2060: #include <../src/vec/vscat/impls/vpscat.h>
2061: #define BS 5
2062: #include <../src/vec/vscat/impls/vpscat.h>
2063: #define BS 6
2064: #include <../src/vec/vscat/impls/vpscat.h>
2065: #define BS 7
2066: #include <../src/vec/vscat/impls/vpscat.h>
2067: #define BS 8
2068: #include <../src/vec/vscat/impls/vpscat.h>
2069: #define BS 9
2070: #include <../src/vec/vscat/impls/vpscat.h>
2071: #define BS 10
2072: #include <../src/vec/vscat/impls/vpscat.h>
2073: #define BS 11
2074: #include <../src/vec/vscat/impls/vpscat.h>
2075: #define BS 12
2076: #include <../src/vec/vscat/impls/vpscat.h>
2077: #define BS bs
2078: #include <../src/vec/vscat/impls/vpscat.h>
2080: /* ==========================================================================================*/
2081: /* create parallel to sequential scatter context */
2083: extern PetscErrorCode VecScatterCreateCommon_PtoS_MPI3(VecScatter_MPI_General*,VecScatter_MPI_General*,VecScatter);
2085: /*
2086: bs indicates how many elements there are in each block. Normally this would be 1.
2088: contains check that PetscMPIInt can handle the sizes needed
2089: */
2090: #include <petscbt.h>
2091: PetscErrorCode VecScatterCreateLocal_PtoS_MPI3(PetscInt nx,const PetscInt *inidx,PetscInt ny,const PetscInt *inidy,Vec xin,Vec yin,PetscInt bs,VecScatter ctx)
2092: {
2093: VecScatter_MPI_General *from,*to;
2094: PetscMPIInt size,rank,mrank,imdex,tag,n;
2095: PetscInt *source = NULL,*owners = NULL,nxr;
2096: PetscInt *lowner = NULL,*start = NULL,*lsharedowner = NULL,*sharedstart = NULL,lengthy,lengthx;
2097: PetscMPIInt *nprocs = NULL,nrecvs,nrecvshared;
2098: PetscInt i,j,idx = 0,nsends;
2099: PetscMPIInt *owner = NULL;
2100: PetscInt *starts = NULL,count,slen,slenshared;
2101: PetscInt *rvalues,*svalues,base,*values,nprocslocal,recvtotal,*rsvalues;
2102: PetscMPIInt *onodes1,*olengths1;
2103: MPI_Comm comm;
2104: MPI_Request *send_waits = NULL,*recv_waits = NULL;
2105: MPI_Status recv_status,*send_status;
2106: PetscErrorCode ierr;
2107: PetscShmComm scomm;
2108: PetscMPIInt jj;
2109: MPI_Info info;
2110: MPI_Comm mscomm;
2111: MPI_Win sharedoffsetwin; /* Window that owns sharedspaceoffset */
2112: PetscInt *sharedspaceoffset;
2113: VecScatterType type;
2114: PetscBool mpi3node;
2116: PetscObjectGetNewTag((PetscObject)ctx,&tag);
2117: PetscObjectGetComm((PetscObject)xin,&comm);
2118: MPI_Comm_rank(comm,&rank);
2119: MPI_Comm_size(comm,&size);
2121: VecScatterGetType(ctx,&type);
2122: PetscStrcmp(type,"mpi3node",&mpi3node);
2124: PetscShmCommGet(comm,&scomm);
2125: PetscShmCommGetMpiShmComm(scomm,&mscomm);
2127: MPI_Info_create(&info);
2128: MPI_Info_set(info, "alloc_shared_noncontig", "true");
2130: if (mpi3node) {
2131: /* Check if (parallel) inidx has duplicate indices.
2132: Current VecScatterEndMPI3Node() (case StoP) writes the sequential vector to the parallel vector.
2133: Writing to the same shared location without variable locking
2134: leads to incorrect scattering. See src/vec/vscat/examples/runex2_5 and runex3_5 */
2136: PetscInt *mem,**optr;
2137: MPI_Win swin;
2138: PetscMPIInt msize;
2140: MPI_Comm_size(mscomm,&msize);
2141: MPI_Comm_rank(mscomm,&mrank);
2143: PetscMalloc1(msize,&optr);
2144: MPIU_Win_allocate_shared((nx+1)*sizeof(PetscInt),sizeof(PetscInt),MPI_INFO_NULL,mscomm,&mem,&swin);
2146: /* Write local nx and inidx into mem */
2147: mem[0] = nx;
2148: for (i=1; i<=nx; i++) mem[i] = inidx[i-1];
2149: MPI_Barrier(mscomm); /* sync shared memory */
2151: if (!mrank) {
2152: PetscBT table;
2153: /* sz and dsp_unit are not used. Replacing them with NULL would cause MPI_Win_shared_query() crash */
2154: MPI_Aint sz;
2155: PetscMPIInt dsp_unit;
2156: PetscInt N = xin->map->N;
2158: PetscBTCreate(N,&table); /* may not be scalable */
2159: PetscBTMemzero(N,table);
2161: jj = 0;
2162: while (jj<msize && !ctx->is_duplicate) {
2163: if (jj == mrank) {jj++; continue;}
2164: MPIU_Win_shared_query(swin,jj,&sz,&dsp_unit,&optr[jj]);
2165: for (j=1; j<=optr[jj][0]; j++) { /* optr[jj][0] = nx */
2166: if (PetscBTLookupSet(table,optr[jj][j])) {
2167: ctx->is_duplicate = PETSC_TRUE; /* only mrank=0 has this info., will be checked in VecScatterEndMPI3Node() */
2168: break;
2169: }
2170: }
2171: jj++;
2172: }
2173: PetscBTDestroy(&table);
2174: }
2176: if (swin != MPI_WIN_NULL) {MPI_Win_free(&swin);}
2177: PetscFree(optr);
2178: }
2180: owners = xin->map->range;
2181: VecGetSize(yin,&lengthy);
2182: VecGetSize(xin,&lengthx);
2184: /* first count number of contributors to each processor */
2185: PetscMalloc2(size,&nprocs,nx,&owner);
2186: PetscMemzero(nprocs,size*sizeof(PetscMPIInt));
2188: j = 0;
2189: nsends = 0;
2190: for (i=0; i<nx; i++) {
2191: PetscIntMultError(bs,inidx[i],&idx);
2192: if (idx < owners[j]) j = 0;
2193: for (; j<size; j++) {
2194: if (idx < owners[j+1]) {
2195: if (!nprocs[j]++) nsends++;
2196: owner[i] = j;
2197: break;
2198: }
2199: }
2200: if (j == size) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"ith %D block entry %D not owned by any process, upper bound %D",i,idx,owners[size]);
2201: }
2203: nprocslocal = nprocs[rank];
2204: nprocs[rank] = 0;
2205: if (nprocslocal) nsends--;
2206: /* inform other processors of number of messages and max length*/
2207: PetscGatherNumberOfMessages(comm,NULL,nprocs,&nrecvs);
2208: PetscGatherMessageLengths(comm,nsends,nrecvs,nprocs,&onodes1,&olengths1);
2209: PetscSortMPIIntWithArray(nrecvs,onodes1,olengths1);
2210: recvtotal = 0;
2211: for (i=0; i<nrecvs; i++) {
2212: PetscIntSumError(recvtotal,olengths1[i],&recvtotal);
2213: }
2215: /* post receives: */
2216: PetscMalloc3(recvtotal,&rvalues,nrecvs,&source,nrecvs,&recv_waits);
2217: count = 0;
2218: for (i=0; i<nrecvs; i++) {
2219: MPI_Irecv((rvalues+count),olengths1[i],MPIU_INT,onodes1[i],tag,comm,recv_waits+i);
2220: count += olengths1[i];
2221: }
2223: /* do sends:
2224: 1) starts[i] gives the starting index in svalues for stuff going to
2225: the ith processor
2226: */
2227: nxr = 0;
2228: for (i=0; i<nx; i++) {
2229: if (owner[i] != rank) nxr++;
2230: }
2231: PetscMalloc3(nxr,&svalues,nsends,&send_waits,size+1,&starts);
2233: starts[0] = 0;
2234: for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[i-1];
2235: for (i=0; i<nx; i++) {
2236: if (owner[i] != rank) svalues[starts[owner[i]]++] = bs*inidx[i];
2237: }
2238: starts[0] = 0;
2239: for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[i-1];
2240: count = 0;
2241: for (i=0; i<size; i++) {
2242: if (nprocs[i]) {
2243: MPI_Isend(svalues+starts[i],nprocs[i],MPIU_INT,i,tag,comm,send_waits+count++);
2244: }
2245: }
2247: /* wait on receives; this is everyone who we need to deliver data to */
2248: count = nrecvs;
2249: slen = 0;
2250: nrecvshared = 0;
2251: slenshared = 0;
2252: while (count) {
2253: MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
2254: /* unpack receives into our local space */
2255: MPI_Get_count(&recv_status,MPIU_INT,&n);
2256: PetscShmCommGlobalToLocal(scomm,onodes1[imdex],&jj);
2257: if (jj > -1) {
2258: PetscInfo3(NULL,"[%d] Sending values to shared memory partner %d,global rank %d\n",rank,jj,onodes1[imdex]);
2259: nrecvshared++;
2260: slenshared += n;
2261: }
2262: slen += n;
2263: count--;
2264: }
2266: if (slen != recvtotal) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Total message lengths %D not expected %D",slen,recvtotal);
2268: /* allocate entire send scatter context */
2269: PetscNewLog(ctx,&to);
2270: to->sharedwin = MPI_WIN_NULL;
2271: to->n = nrecvs-nrecvshared;
2273: PetscMalloc1(nrecvs-nrecvshared,&to->requests);
2274: PetscMalloc4(bs*(slen-slenshared),&to->values,slen-slenshared,&to->indices,nrecvs-nrecvshared+1,&to->starts,nrecvs-nrecvshared,&to->procs);
2275: PetscMalloc2(PetscMax(to->n,nsends),&to->sstatus,PetscMax(to->n,nsends),&to->rstatus);
2277: MPI_Comm_size(mscomm,&to->msize);
2278: PetscMalloc1(slenshared,&to->sharedspaceindices);
2279: PetscCalloc1(to->msize+1,&to->sharedspacestarts);
2281: ctx->todata = (void*)to;
2282: to->starts[0] = 0;
2283: to->sharedspacestarts[0] = 0;
2285: /* Allocate shared memory space for shared memory partner communication */
2286: MPIU_Win_allocate_shared(to->msize*sizeof(PetscInt),sizeof(PetscInt),info,mscomm,&sharedspaceoffset,&sharedoffsetwin);
2287: for (i=0; i<to->msize; i++) sharedspaceoffset[i] = -1; /* mark with -1 for later error checking */
2288: if (nrecvs) {
2289: /* move the data into the send scatter */
2290: base = owners[rank];
2291: rsvalues = rvalues;
2292: to->n = 0;
2293: for (i=0; i<nrecvs; i++) {
2294: values = rsvalues;
2295: PetscShmCommGlobalToLocal(scomm,(PetscMPIInt)onodes1[i],&jj);
2296: if (jj > -1) {
2297: to->sharedspacestarts[jj] = to->sharedcnt;
2298: to->sharedspacestarts[jj+1] = to->sharedcnt + olengths1[i];
2299: for (j=0; j<olengths1[i]; j++) to->sharedspaceindices[to->sharedcnt + j] = values[j] - base;
2300: sharedspaceoffset[jj] = to->sharedcnt;
2301: to->sharedcnt += olengths1[i];
2302: PetscInfo4(NULL,"[%d] Sending %d values to shared memory partner %d,global rank %d\n",rank,olengths1[i],jj,onodes1[i]);
2303: } else {
2304: to->starts[to->n+1] = to->starts[to->n] + olengths1[i];
2305: to->procs[to->n] = onodes1[i];
2306: for (j=0; j<olengths1[i]; j++) to->indices[to->starts[to->n] + j] = values[j] - base;
2307: to->n++;
2308: }
2309: rsvalues += olengths1[i];
2310: }
2311: }
2312: PetscFree(olengths1);
2313: PetscFree(onodes1);
2314: PetscFree3(rvalues,source,recv_waits);
2316: if (mpi3node) {
2317: /* to->sharedspace is used only as a flag */
2318: i = 0;
2319: if (to->sharedcnt) i = 1;
2320: MPIU_Win_allocate_shared(i*sizeof(PetscScalar),sizeof(PetscScalar),info,mscomm,&to->sharedspace,&to->sharedwin);
2321: } else { /* mpi3 */
2322: MPIU_Win_allocate_shared(bs*to->sharedcnt*sizeof(PetscScalar),sizeof(PetscScalar),info,mscomm,&to->sharedspace,&to->sharedwin);
2323: }
2324: if (to->sharedwin == MPI_WIN_NULL) SETERRQ(PETSC_COMM_SELF,100,"what the");
2325: MPI_Info_free(&info);
2327: /* allocate entire receive scatter context */
2328: PetscNewLog(ctx,&from);
2329: from->sharedwin = MPI_WIN_NULL;
2330: from->msize = to->msize;
2331: /* we don't actually know the correct value for from->n at this point so we use the upper bound */
2332: from->n = nsends;
2334: PetscMalloc1(nsends,&from->requests);
2335: PetscMalloc4((ny-nprocslocal)*bs,&from->values,ny-nprocslocal,&from->indices,nsends+1,&from->starts,from->n,&from->procs);
2336: ctx->fromdata = (void*)from;
2338: PetscCalloc1(to->msize+1,&from->sharedspacestarts);
2339: /* move data into receive scatter */
2340: PetscMalloc2(size,&lowner,nsends+1,&start);
2341: PetscMalloc2(size,&lsharedowner,to->msize+1,&sharedstart);
2342: count = 0; from->starts[0] = start[0] = 0;
2343: from->sharedcnt = 0;
2344: for (i=0; i<size; i++) {
2345: lsharedowner[i] = -1;
2346: lowner[i] = -1;
2347: if (nprocs[i]) {
2348: PetscShmCommGlobalToLocal(scomm,i,&jj);
2349: if (jj > -1) {
2350: from->sharedspacestarts[jj] = sharedstart[jj] = from->sharedcnt;
2351: from->sharedspacestarts[jj+1] = sharedstart[jj+1] = from->sharedspacestarts[jj] + nprocs[i];
2352: from->sharedcnt += nprocs[i];
2353: lsharedowner[i] = jj;
2354: } else {
2355: lowner[i] = count++;
2356: from->procs[count-1] = i;
2357: from->starts[count] = start[count] = start[count-1] + nprocs[i];
2358: }
2359: }
2360: }
2361: from->n = count;
2363: for (i=0; i<nx; i++) {
2364: if (owner[i] != rank && lowner[owner[i]] > -1) {
2365: from->indices[start[lowner[owner[i]]]++] = bs*inidy[i];
2366: if (bs*inidy[i] >= lengthy) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Scattering past end of TO vector");
2367: }
2368: }
2370: /* copy over appropriate parts of inidy[] into from->sharedspaceindices */
2371: if (mpi3node) {
2372: /* in addition, copy over appropriate parts of inidx[] into 2nd part of from->sharedspaceindices */
2373: PetscInt *sharedspaceindicesx;
2374: PetscMalloc1(2*from->sharedcnt,&from->sharedspaceindices);
2375: sharedspaceindicesx = from->sharedspaceindices + from->sharedcnt;
2376: for (i=0; i<nx; i++) {
2377: if (owner[i] != rank && lsharedowner[owner[i]] > -1) {
2378: if (sharedstart[lsharedowner[owner[i]]] > from->sharedcnt) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_MEM,"won");
2380: from->sharedspaceindices[sharedstart[lsharedowner[owner[i]]]] = bs*inidy[i];
2381: sharedspaceindicesx[sharedstart[lsharedowner[owner[i]]]] = bs*inidx[i]-owners[owner[i]]; /* local inidx */
2382: sharedstart[lsharedowner[owner[i]]]++;
2383: }
2384: }
2385: } else { /* mpi3 */
2386: PetscMalloc1(from->sharedcnt,&from->sharedspaceindices);
2387: for (i=0; i<nx; i++) {
2388: if (owner[i] != rank && lsharedowner[owner[i]] > -1) {
2389: if (sharedstart[lsharedowner[owner[i]]] > from->sharedcnt) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_MEM,"won");
2390: from->sharedspaceindices[sharedstart[lsharedowner[owner[i]]]++] = bs*inidy[i];
2391: }
2392: }
2393: }
2395: PetscFree2(lowner,start);
2396: PetscFree2(lsharedowner,sharedstart);
2397: PetscFree2(nprocs,owner);
2399: /* wait on sends */
2400: if (nsends) {
2401: PetscMalloc1(nsends,&send_status);
2402: MPI_Waitall(nsends,send_waits,send_status);
2403: PetscFree(send_status);
2404: }
2405: PetscFree3(svalues,send_waits,starts);
2407: if (nprocslocal) {
2408: PetscInt nt = from->local.n = to->local.n = nprocslocal;
2409: /* we have a scatter to ourselves */
2410: PetscMalloc1(nt,&to->local.vslots);
2411: PetscMalloc1(nt,&from->local.vslots);
2412: nt = 0;
2413: for (i=0; i<nx; i++) {
2414: idx = bs*inidx[i];
2415: if (idx >= owners[rank] && idx < owners[rank+1]) {
2416: to->local.vslots[nt] = idx - owners[rank];
2417: from->local.vslots[nt++] = bs*inidy[i];
2418: if (bs*inidy[i] >= lengthy) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Scattering past end of TO vector");
2419: }
2420: }
2421: PetscLogObjectMemory((PetscObject)ctx,2*nt*sizeof(PetscInt));
2422: } else {
2423: from->local.n = 0;
2424: from->local.vslots = 0;
2425: to->local.n = 0;
2426: to->local.vslots = 0;
2427: }
2429: /* Get the shared memory address for all processes we will be copying data from */
2430: PetscCalloc1(to->msize,&from->sharedspaces);
2431: PetscCalloc1(to->msize,&from->sharedspacesoffset);
2432: MPI_Comm_rank(mscomm,&mrank);
2433: for (jj=0; jj<to->msize; jj++) {
2434: MPI_Aint isize;
2435: PetscMPIInt disp_unit;
2436: PetscInt *ptr;
2437: MPIU_Win_shared_query(to->sharedwin,jj,&isize,&disp_unit,&from->sharedspaces[jj]);
2438: MPIU_Win_shared_query(sharedoffsetwin,jj,&isize,&disp_unit,&ptr);
2439: from->sharedspacesoffset[jj] = ptr[mrank];
2440: }
2441: MPI_Win_free(&sharedoffsetwin);
2443: if (mpi3node && !from->sharedspace) {
2444: /* comput from->notdone to be used by VecScatterEndMPI3Node() */
2445: PetscInt notdone = 0;
2446: for (i=0; i<from->msize; i++) {
2447: if (from->sharedspacesoffset && from->sharedspacesoffset[i] > -1) {
2448: notdone++;
2449: }
2450: }
2451: from->notdone = notdone;
2452: }
2454: from->local.nonmatching_computed = PETSC_FALSE;
2455: from->local.n_nonmatching = 0;
2456: from->local.slots_nonmatching = 0;
2457: to->local.nonmatching_computed = PETSC_FALSE;
2458: to->local.n_nonmatching = 0;
2459: to->local.slots_nonmatching = 0;
2461: from->format = VEC_SCATTER_MPI_GENERAL;
2462: to->format = VEC_SCATTER_MPI_GENERAL;
2463: from->bs = bs;
2464: to->bs = bs;
2466: VecScatterCreateCommon_PtoS_MPI3(from,to,ctx);
2467: return(0);
2468: }
2470: /*
2471: bs indicates how many elements there are in each block. Normally this would be 1.
2472: */
2473: PetscErrorCode VecScatterCreateCommon_PtoS_MPI3(VecScatter_MPI_General *from,VecScatter_MPI_General *to,VecScatter ctx)
2474: {
2475: MPI_Comm comm;
2476: PetscMPIInt tag = ((PetscObject)ctx)->tag, tagr;
2477: PetscInt bs = to->bs;
2478: PetscMPIInt size;
2479: PetscInt i, n;
2481: VecScatterType type;
2482: PetscBool mpi3node;
2485: PetscObjectGetComm((PetscObject)ctx,&comm);
2486: PetscObjectGetNewTag((PetscObject)ctx,&tagr);
2487: ctx->ops->destroy = VecScatterDestroy_PtoP_MPI3;
2489: MPI_Comm_size(comm,&size);
2490: /* check if the receives are ALL going into contiguous locations; if so can skip indexing */
2491: to->contiq = PETSC_FALSE;
2492: n = from->starts[from->n];
2493: from->contiq = PETSC_TRUE;
2494: for (i=1; i<n; i++) {
2495: if (from->indices[i] != from->indices[i-1] + bs) {
2496: from->contiq = PETSC_FALSE;
2497: break;
2498: }
2499: }
2501: ctx->ops->copy = VecScatterCopy_PtoP_AllToAll;
2502: {
2503: PetscInt *sstarts = to->starts, *rstarts = from->starts;
2504: PetscMPIInt *sprocs = to->procs, *rprocs = from->procs;
2505: MPI_Request *swaits = to->requests,*rwaits = from->requests;
2506: MPI_Request *rev_swaits,*rev_rwaits;
2507: PetscScalar *Ssvalues = to->values, *Srvalues = from->values;
2509: /* allocate additional wait variables for the "reverse" scatter */
2510: PetscMalloc1(to->n,&rev_rwaits);
2511: PetscMalloc1(from->n,&rev_swaits);
2512: to->rev_requests = rev_rwaits;
2513: from->rev_requests = rev_swaits;
2515: for (i=0; i<from->n; i++) {
2516: MPI_Send_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tagr,comm,rev_swaits+i);
2517: }
2519: for (i=0; i<to->n; i++) {
2520: MPI_Send_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
2521: }
2522: /* Register receives for scatter and reverse */
2523: for (i=0; i<from->n; i++) {
2524: MPI_Recv_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);
2525: }
2526: for (i=0; i<to->n; i++) {
2527: MPI_Recv_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tagr,comm,rev_rwaits+i);
2528: }
2529: ctx->ops->copy = VecScatterCopy_PtoP_X;
2530: }
2531: PetscInfo1(ctx,"Using blocksize %D scatter\n",bs);
2533: #if defined(PETSC_USE_DEBUG)
2534: MPIU_Allreduce(&bs,&i,1,MPIU_INT,MPI_MIN,PetscObjectComm((PetscObject)ctx));
2535: MPIU_Allreduce(&bs,&n,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)ctx));
2536: if (bs!=i || bs!=n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Blocks size %D != %D or %D",bs,i,n);
2537: #endif
2539: VecScatterGetType(ctx,&type);
2540: PetscStrcmp(type,"mpi3node",&mpi3node);
2542: if (mpi3node) {
2543: switch (bs) {
2544: case 12:
2545: ctx->ops->begin = VecScatterBeginMPI3Node_12;
2546: ctx->ops->end = VecScatterEndMPI3Node_12;
2547: break;
2548: case 11:
2549: ctx->ops->begin = VecScatterBeginMPI3Node_11;
2550: ctx->ops->end = VecScatterEndMPI3Node_11;
2551: break;
2552: case 10:
2553: ctx->ops->begin = VecScatterBeginMPI3Node_10;
2554: ctx->ops->end = VecScatterEndMPI3Node_10;
2555: break;
2556: case 9:
2557: ctx->ops->begin = VecScatterBeginMPI3Node_9;
2558: ctx->ops->end = VecScatterEndMPI3Node_9;
2559: break;
2560: case 8:
2561: ctx->ops->begin = VecScatterBeginMPI3Node_8;
2562: ctx->ops->end = VecScatterEndMPI3Node_8;
2563: break;
2564: case 7:
2565: ctx->ops->begin = VecScatterBeginMPI3Node_7;
2566: ctx->ops->end = VecScatterEndMPI3Node_7;
2567: break;
2568: case 6:
2569: ctx->ops->begin = VecScatterBeginMPI3Node_6;
2570: ctx->ops->end = VecScatterEndMPI3Node_6;
2571: break;
2572: case 5:
2573: ctx->ops->begin = VecScatterBeginMPI3Node_5;
2574: ctx->ops->end = VecScatterEndMPI3Node_5;
2575: break;
2576: case 4:
2577: ctx->ops->begin = VecScatterBeginMPI3Node_4;
2578: ctx->ops->end = VecScatterEndMPI3Node_4;
2579: break;
2580: case 3:
2581: ctx->ops->begin = VecScatterBeginMPI3Node_3;
2582: ctx->ops->end = VecScatterEndMPI3Node_3;
2583: break;
2584: case 2:
2585: ctx->ops->begin = VecScatterBeginMPI3Node_2;
2586: ctx->ops->end = VecScatterEndMPI3Node_2;
2587: break;
2588: case 1:
2589: ctx->ops->begin = VecScatterBeginMPI3Node_1;
2590: ctx->ops->end = VecScatterEndMPI3Node_1;
2591: break;
2592: default:
2593: ctx->ops->begin = VecScatterBegin_bs;
2594: ctx->ops->end = VecScatterEnd_bs;
2595: }
2596: } else { /* !mpi3node */
2598: switch (bs) {
2599: case 12:
2600: ctx->ops->begin = VecScatterBegin_12;
2601: ctx->ops->end = VecScatterEnd_12;
2602: break;
2603: case 11:
2604: ctx->ops->begin = VecScatterBegin_11;
2605: ctx->ops->end = VecScatterEnd_11;
2606: break;
2607: case 10:
2608: ctx->ops->begin = VecScatterBegin_10;
2609: ctx->ops->end = VecScatterEnd_10;
2610: break;
2611: case 9:
2612: ctx->ops->begin = VecScatterBegin_9;
2613: ctx->ops->end = VecScatterEnd_9;
2614: break;
2615: case 8:
2616: ctx->ops->begin = VecScatterBegin_8;
2617: ctx->ops->end = VecScatterEnd_8;
2618: break;
2619: case 7:
2620: ctx->ops->begin = VecScatterBegin_7;
2621: ctx->ops->end = VecScatterEnd_7;
2622: break;
2623: case 6:
2624: ctx->ops->begin = VecScatterBegin_6;
2625: ctx->ops->end = VecScatterEnd_6;
2626: break;
2627: case 5:
2628: ctx->ops->begin = VecScatterBegin_5;
2629: ctx->ops->end = VecScatterEnd_5;
2630: break;
2631: case 4:
2632: ctx->ops->begin = VecScatterBegin_4;
2633: ctx->ops->end = VecScatterEnd_4;
2634: break;
2635: case 3:
2636: ctx->ops->begin = VecScatterBegin_3;
2637: ctx->ops->end = VecScatterEnd_3;
2638: break;
2639: case 2:
2640: ctx->ops->begin = VecScatterBegin_2;
2641: ctx->ops->end = VecScatterEnd_2;
2642: break;
2643: case 1:
2644: if (mpi3node) {
2645: ctx->ops->begin = VecScatterBeginMPI3Node_1;
2646: ctx->ops->end = VecScatterEndMPI3Node_1;
2647: } else {
2648: ctx->ops->begin = VecScatterBegin_1;
2649: ctx->ops->end = VecScatterEnd_1;
2650: }
2651: break;
2652: default:
2653: ctx->ops->begin = VecScatterBegin_bs;
2654: ctx->ops->end = VecScatterEnd_bs;
2655: }
2656: }
2658: ctx->ops->view = VecScatterView_MPI;
2660: /* try to optimize PtoP vecscatter with memcpy's */
2661: VecScatterMemcpyPlanCreate_PtoP(to,from);
2662: return(0);
2663: }
2665: /* ------------------------------------------------------------------------------------*/
2666: /*
2667: Scatter from local Seq vectors to a parallel vector.
2668: Reverses the order of the arguments, calls VecScatterCreateLocal_PtoS() then
2669: reverses the result.
2670: */
2671: PetscErrorCode VecScatterCreateLocal_StoP_MPI3(PetscInt nx,const PetscInt *inidx,PetscInt ny,const PetscInt *inidy,Vec xin,Vec yin,PetscInt bs,VecScatter ctx)
2672: {
2673: PetscErrorCode ierr;
2674: MPI_Request *waits;
2675: VecScatter_MPI_General *to,*from;
2678: VecScatterCreateLocal_PtoS_MPI3(ny,inidy,nx,inidx,yin,xin,bs,ctx);
2679: to = (VecScatter_MPI_General*)ctx->fromdata;
2680: from = (VecScatter_MPI_General*)ctx->todata;
2681: ctx->todata = (void*)to;
2682: ctx->fromdata = (void*)from;
2683: /* these two are special, they are ALWAYS stored in to struct */
2684: to->sstatus = from->sstatus;
2685: to->rstatus = from->rstatus;
2687: from->sstatus = 0;
2688: from->rstatus = 0;
2689: waits = from->rev_requests;
2690: from->rev_requests = from->requests;
2691: from->requests = waits;
2692: waits = to->rev_requests;
2693: to->rev_requests = to->requests;
2694: to->requests = waits;
2695: return(0);
2696: }
2698: /* ---------------------------------------------------------------------------------*/
2699: PetscErrorCode VecScatterCreateLocal_PtoP_MPI3(PetscInt nx,const PetscInt *inidx,PetscInt ny,const PetscInt *inidy,Vec xin,Vec yin,PetscInt bs,VecScatter ctx)
2700: {
2702: PetscMPIInt size,rank,tag,imdex,n;
2703: PetscInt *owners = xin->map->range;
2704: PetscMPIInt *nprocs = NULL;
2705: PetscInt i,j,idx,nsends,*local_inidx = NULL,*local_inidy = NULL;
2706: PetscMPIInt *owner = NULL;
2707: PetscInt *starts = NULL,count,slen;
2708: PetscInt *rvalues = NULL,*svalues = NULL,base,*values = NULL,*rsvalues,recvtotal,lastidx;
2709: PetscMPIInt *onodes1,*olengths1,nrecvs;
2710: MPI_Comm comm;
2711: MPI_Request *send_waits = NULL,*recv_waits = NULL;
2712: MPI_Status recv_status,*send_status = NULL;
2713: PetscBool duplicate = PETSC_FALSE;
2714: #if defined(PETSC_USE_DEBUG)
2715: PetscBool found = PETSC_FALSE;
2716: #endif
2719: PetscObjectGetNewTag((PetscObject)ctx,&tag);
2720: PetscObjectGetComm((PetscObject)xin,&comm);
2721: MPI_Comm_size(comm,&size);
2722: MPI_Comm_rank(comm,&rank);
2723: if (size == 1) {
2724: VecScatterCreateLocal_StoP_MPI3(nx,inidx,ny,inidy,xin,yin,bs,ctx);
2725: return(0);
2726: }
2728: /*
2729: Each processor ships off its inidx[j] and inidy[j] to the appropriate processor
2730: They then call the StoPScatterCreate()
2731: */
2732: /* first count number of contributors to each processor */
2733: PetscMalloc3(size,&nprocs,nx,&owner,(size+1),&starts);
2734: PetscMemzero(nprocs,size*sizeof(PetscMPIInt));
2736: lastidx = -1;
2737: j = 0;
2738: for (i=0; i<nx; i++) {
2739: /* if indices are NOT locally sorted, need to start search at the beginning */
2740: if (lastidx > (idx = bs*inidx[i])) j = 0;
2741: lastidx = idx;
2742: for (; j<size; j++) {
2743: if (idx >= owners[j] && idx < owners[j+1]) {
2744: nprocs[j]++;
2745: owner[i] = j;
2746: #if defined(PETSC_USE_DEBUG)
2747: found = PETSC_TRUE;
2748: #endif
2749: break;
2750: }
2751: }
2752: #if defined(PETSC_USE_DEBUG)
2753: if (!found) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index %D out of range",idx);
2754: found = PETSC_FALSE;
2755: #endif
2756: }
2757: nsends = 0;
2758: for (i=0; i<size; i++) nsends += (nprocs[i] > 0);
2760: /* inform other processors of number of messages and max length*/
2761: PetscGatherNumberOfMessages(comm,NULL,nprocs,&nrecvs);
2762: PetscGatherMessageLengths(comm,nsends,nrecvs,nprocs,&onodes1,&olengths1);
2763: PetscSortMPIIntWithArray(nrecvs,onodes1,olengths1);
2764: recvtotal = 0;
2765: for (i=0; i<nrecvs; i++) {
2766: PetscIntSumError(recvtotal,olengths1[i],&recvtotal);
2767: }
2769: /* post receives: */
2770: PetscMalloc5(2*recvtotal,&rvalues,2*nx,&svalues,nrecvs,&recv_waits,nsends,&send_waits,nsends,&send_status);
2772: count = 0;
2773: for (i=0; i<nrecvs; i++) {
2774: MPI_Irecv((rvalues+2*count),2*olengths1[i],MPIU_INT,onodes1[i],tag,comm,recv_waits+i);
2775: PetscIntSumError(count,olengths1[i],&count);
2776: }
2777: PetscFree(onodes1);
2779: /* do sends:
2780: 1) starts[i] gives the starting index in svalues for stuff going to
2781: the ith processor
2782: */
2783: starts[0]= 0;
2784: for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[i-1];
2785: for (i=0; i<nx; i++) {
2786: svalues[2*starts[owner[i]]] = bs*inidx[i];
2787: svalues[1 + 2*starts[owner[i]]++] = bs*inidy[i];
2788: }
2790: starts[0] = 0;
2791: for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[i-1];
2792: count = 0;
2793: for (i=0; i<size; i++) {
2794: if (nprocs[i]) {
2795: MPI_Isend(svalues+2*starts[i],2*nprocs[i],MPIU_INT,i,tag,comm,send_waits+count);
2796: count++;
2797: }
2798: }
2799: PetscFree3(nprocs,owner,starts);
2801: /* wait on receives */
2802: count = nrecvs;
2803: slen = 0;
2804: while (count) {
2805: MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
2806: /* unpack receives into our local space */
2807: MPI_Get_count(&recv_status,MPIU_INT,&n);
2808: slen += n/2;
2809: count--;
2810: }
2811: if (slen != recvtotal) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Total message lengths %D not as expected %D",slen,recvtotal);
2813: PetscMalloc2(slen,&local_inidx,slen,&local_inidy);
2814: base = owners[rank];
2815: count = 0;
2816: rsvalues = rvalues;
2817: for (i=0; i<nrecvs; i++) {
2818: values = rsvalues;
2819: rsvalues += 2*olengths1[i];
2820: for (j=0; j<olengths1[i]; j++) {
2821: local_inidx[count] = values[2*j] - base;
2822: local_inidy[count++] = values[2*j+1];
2823: }
2824: }
2825: PetscFree(olengths1);
2827: /* wait on sends */
2828: if (nsends) {MPI_Waitall(nsends,send_waits,send_status);}
2829: PetscFree5(rvalues,svalues,recv_waits,send_waits,send_status);
2831: /*
2832: should sort and remove duplicates from local_inidx,local_inidy
2833: */
2834: #if defined(do_it_slow)
2835: /* sort on the from index */
2836: PetscSortIntWithArray(slen,local_inidx,local_inidy);
2837: start = 0;
2838: while (start < slen) {
2839: count = start+1;
2840: last = local_inidx[start];
2841: while (count < slen && last == local_inidx[count]) count++;
2842: if (count > start + 1) { /* found 2 or more same local_inidx[] in a row */
2843: /* sort on to index */
2844: PetscSortInt(count-start,local_inidy+start);
2845: }
2846: /* remove duplicates; not most efficient way, but probably good enough */
2847: i = start;
2848: while (i < count-1) {
2849: if (local_inidy[i] != local_inidy[i+1]) i++;
2850: else { /* found a duplicate */
2851: duplicate = PETSC_TRUE;
2852: for (j=i; j<slen-1; j++) {
2853: local_inidx[j] = local_inidx[j+1];
2854: local_inidy[j] = local_inidy[j+1];
2855: }
2856: slen--;
2857: count--;
2858: }
2859: }
2860: start = count;
2861: }
2862: #endif
2863: if (duplicate) {
2864: PetscInfo(ctx,"Duplicate from to indices passed in VecScatterCreate(), they are ignored\n");
2865: }
2866: VecScatterCreateLocal_StoP_MPI3(slen,local_inidx,slen,local_inidy,xin,yin,bs,ctx);
2867: PetscFree2(local_inidx,local_inidy);
2868: return(0);
2869: }
2871: #endif