Actual source code: vpscat.c
petsc-3.13.6 2020-09-29
1: /*
2: Defines parallel vector scatters.
3: */
5: #include <petsc/private/vecscatterimpl.h>
8: PetscErrorCode VecScatterView_MPI(VecScatter ctx,PetscViewer viewer)
9: {
10: VecScatter_MPI_General *to =(VecScatter_MPI_General*)ctx->todata;
11: VecScatter_MPI_General *from=(VecScatter_MPI_General*)ctx->fromdata;
12: PetscErrorCode ierr;
13: PetscInt i;
14: PetscMPIInt rank;
15: PetscViewerFormat format;
16: PetscBool iascii;
19: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
20: if (iascii) {
21: MPI_Comm_rank(PetscObjectComm((PetscObject)ctx),&rank);
22: PetscViewerGetFormat(viewer,&format);
23: if (format == PETSC_VIEWER_ASCII_INFO) {
24: PetscInt nsend_max,nrecv_max,lensend_max,lenrecv_max,alldata,itmp;
26: MPI_Reduce(&to->n,&nsend_max,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)ctx));
27: MPI_Reduce(&from->n,&nrecv_max,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)ctx));
28: itmp = to->starts[to->n+1];
29: MPI_Reduce(&itmp,&lensend_max,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)ctx));
30: itmp = from->starts[from->n+1];
31: MPI_Reduce(&itmp,&lenrecv_max,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)ctx));
32: MPI_Reduce(&itmp,&alldata,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)ctx));
34: PetscViewerASCIIPrintf(viewer,"VecScatter statistics\n");
35: PetscViewerASCIIPrintf(viewer," Blocksize %D\n",to->bs);
36: PetscViewerASCIIPrintf(viewer," Maximum number sends %D\n",nsend_max);
37: PetscViewerASCIIPrintf(viewer," Maximum number receives %D\n",nrecv_max);
38: PetscViewerASCIIPrintf(viewer," Maximum data sent %D\n",(int)(lensend_max*to->bs*sizeof(PetscScalar)));
39: PetscViewerASCIIPrintf(viewer," Maximum data received %D\n",(int)(lenrecv_max*to->bs*sizeof(PetscScalar)));
40: PetscViewerASCIIPrintf(viewer," Total data sent %D\n",(int)(alldata*to->bs*sizeof(PetscScalar)));
41: } else {
42: PetscViewerASCIIPrintf(viewer," VecScatter Blocksize %D\n",to->bs);
43: PetscViewerASCIIPushSynchronized(viewer);
44: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number sends = %D; Number to self = %D\n",rank,to->n,to->local.n);
45: if (to->n) {
46: for (i=0; i<to->n; i++) {
47: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D length = %D to whom %d\n",rank,i,to->starts[i+1]-to->starts[i],to->procs[i]);
48: }
49: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Now the indices for all remote sends (in order by process sent to)\n",rank);
50: for (i=0; i<to->starts[to->n]; i++) {
51: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D \n",rank,to->indices[i]);
52: }
53: }
55: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number receives = %D; Number from self = %D\n",rank,from->n,from->local.n);
56: if (from->n) {
57: for (i=0; i<from->n; i++) {
58: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D length %D from whom %d\n",rank,i,from->starts[i+1]-from->starts[i],from->procs[i]);
59: }
61: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Now the indices for all remote receives (in order by process received from)\n",rank);
62: for (i=0; i<from->starts[from->n]; i++) {
63: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D \n",rank,from->indices[i]);
64: }
65: }
66: if (to->local.n) {
67: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Indices for local part of scatter\n",rank);
68: for (i=0; i<to->local.n; i++) { /* the to and from have the opposite meaning from what you would expect */
69: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] From %D to %D \n",rank,to->local.vslots[i],from->local.vslots[i]);
70: }
71: }
73: for (i=0; i<to->msize; i++) {
74: if (to->sharedspacestarts[i+1] > to->sharedspacestarts[i]) {
75: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Via shared memory to local memory partner %d count %d\n",rank,i,to->sharedspacestarts[i+1]-to->sharedspacestarts[i]);
76: }
77: }
78: for (i=0; i<from->msize; i++) {
79: if (from->sharedspacestarts[i+1] > from->sharedspacestarts[i]) {
80: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Via shared memory from local memory partner %d count %d\n",rank,i,from->sharedspacestarts[i+1]-from->sharedspacestarts[i]);
81: }
82: }
84: PetscViewerFlush(viewer);
85: PetscViewerASCIIPopSynchronized(viewer);
87: PetscViewerASCIIPrintf(viewer,"Method used to implement the VecScatter: ");
88: }
89: }
90: return(0);
91: }
93: /* -----------------------------------------------------------------------------------*/
94: /*
95: The next routine determines what part of the local part of the scatter is an
96: exact copy of values into their current location. We check this here and
97: then know that we need not perform that portion of the scatter when the vector is
98: scattering to itself with INSERT_VALUES.
100: This is currently not used but would speed up, for example DMLocalToLocalBegin/End()
102: */
103: PetscErrorCode VecScatterLocalOptimize_Private(VecScatter scatter,VecScatter_Seq_General *to,VecScatter_Seq_General *from)
104: {
105: PetscInt n = to->n,n_nonmatching = 0,i,*to_slots = to->vslots,*from_slots = from->vslots;
107: PetscInt *nto_slots,*nfrom_slots,j = 0;
110: for (i=0; i<n; i++) {
111: if (to_slots[i] != from_slots[i]) n_nonmatching++;
112: }
114: if (!n_nonmatching) {
115: to->nonmatching_computed = PETSC_TRUE;
116: to->n_nonmatching = from->n_nonmatching = 0;
117: PetscInfo1(scatter,"Reduced %D to 0\n", n);
118: } else if (n_nonmatching == n) {
119: to->nonmatching_computed = PETSC_FALSE;
120: PetscInfo(scatter,"All values non-matching\n");
121: } else {
122: to->nonmatching_computed= PETSC_TRUE;
123: to->n_nonmatching = from->n_nonmatching = n_nonmatching;
125: PetscMalloc1(n_nonmatching,&nto_slots);
126: PetscMalloc1(n_nonmatching,&nfrom_slots);
128: to->slots_nonmatching = nto_slots;
129: from->slots_nonmatching = nfrom_slots;
130: for (i=0; i<n; i++) {
131: if (to_slots[i] != from_slots[i]) {
132: nto_slots[j] = to_slots[i];
133: nfrom_slots[j] = from_slots[i];
134: j++;
135: }
136: }
137: PetscInfo2(scatter,"Reduced %D to %D\n",n,n_nonmatching);
138: }
139: return(0);
140: }
142: /* --------------------------------------------------------------------------------------*/
144: /* -------------------------------------------------------------------------------------*/
145: PetscErrorCode VecScatterDestroy_PtoP_MPI3(VecScatter ctx)
146: {
147: VecScatter_MPI_General *to = (VecScatter_MPI_General*)ctx->todata;
148: VecScatter_MPI_General *from = (VecScatter_MPI_General*)ctx->fromdata;
149: PetscErrorCode ierr;
150: PetscInt i;
153: /* release MPI resources obtained with MPI_Send_init() and MPI_Recv_init() */
154: if (to->requests) {
155: for (i=0; i<to->n; i++) {
156: MPI_Request_free(to->requests + i);
157: }
158: }
159: if (to->rev_requests) {
160: for (i=0; i<to->n; i++) {
161: MPI_Request_free(to->rev_requests + i);
162: }
163: }
164: if (from->requests) {
165: for (i=0; i<from->n; i++) {
166: MPI_Request_free(from->requests + i);
167: }
168: }
169: if (from->rev_requests) {
170: for (i=0; i<from->n; i++) {
171: MPI_Request_free(from->rev_requests + i);
172: }
173: }
174: if (to->sharedwin != MPI_WIN_NULL) {MPI_Win_free(&to->sharedwin);}
175: if (from->sharedwin != MPI_WIN_NULL) {MPI_Win_free(&from->sharedwin);}
176: PetscFree(to->sharedspaces);
177: PetscFree(to->sharedspacesoffset);
178: PetscFree(to->sharedspaceindices);
179: PetscFree(to->sharedspacestarts);
181: PetscFree(from->sharedspaceindices);
182: PetscFree(from->sharedspaces);
183: PetscFree(from->sharedspacesoffset);
184: PetscFree(from->sharedspacestarts);
186: PetscFree(to->local.vslots);
187: PetscFree(from->local.vslots);
188: PetscFree(to->local.slots_nonmatching);
189: PetscFree(from->local.slots_nonmatching);
190: PetscFree(to->rev_requests);
191: PetscFree(from->rev_requests);
192: PetscFree(to->requests);
193: PetscFree(from->requests);
194: PetscFree4(to->values,to->indices,to->starts,to->procs);
195: PetscFree2(to->sstatus,to->rstatus);
196: PetscFree4(from->values,from->indices,from->starts,from->procs);
197: VecScatterMemcpyPlanDestroy_PtoP(to,from);
198: PetscFree(from);
199: PetscFree(to);
200: return(0);
201: }
203: /* --------------------------------------------------------------------------------------*/
205: PetscErrorCode VecScatterCopy_PtoP_X(VecScatter in,VecScatter out)
206: {
207: VecScatter_MPI_General *in_to = (VecScatter_MPI_General*)in->todata;
208: VecScatter_MPI_General *in_from = (VecScatter_MPI_General*)in->fromdata,*out_to,*out_from;
209: PetscErrorCode ierr;
210: PetscInt ny,bs = in_from->bs,jj;
211: PetscShmComm scomm;
212: MPI_Comm mscomm;
213: MPI_Info info;
216: out->ops->begin = in->ops->begin;
217: out->ops->end = in->ops->end;
218: out->ops->copy = in->ops->copy;
219: out->ops->destroy = in->ops->destroy;
220: out->ops->view = in->ops->view;
222: /* allocate entire send scatter context */
223: PetscNewLog(out,&out_to);
224: out_to->sharedwin = MPI_WIN_NULL;
225: PetscNewLog(out,&out_from);
226: out_from->sharedwin = MPI_WIN_NULL;
228: ny = in_to->starts[in_to->n];
229: out_to->n = in_to->n;
230: out_to->format = in_to->format;
232: PetscMalloc1(out_to->n,&out_to->requests);
233: PetscMalloc4(bs*ny,&out_to->values,ny,&out_to->indices,out_to->n+1,&out_to->starts,out_to->n,&out_to->procs);
234: PetscMalloc2(PetscMax(in_to->n,in_from->n),&out_to->sstatus,PetscMax(in_to->n,in_from->n),&out_to->rstatus);
235: PetscArraycpy(out_to->indices,in_to->indices,ny);
236: PetscArraycpy(out_to->starts,in_to->starts,out_to->n+1);
237: PetscArraycpy(out_to->procs,in_to->procs,out_to->n);
239: out->todata = (void*)out_to;
240: out_to->local.n = in_to->local.n;
241: out_to->local.nonmatching_computed = PETSC_FALSE;
242: out_to->local.n_nonmatching = 0;
243: out_to->local.slots_nonmatching = 0;
244: if (in_to->local.n) {
245: PetscMalloc1(in_to->local.n,&out_to->local.vslots);
246: PetscMalloc1(in_from->local.n,&out_from->local.vslots);
247: PetscArraycpy(out_to->local.vslots,in_to->local.vslots,in_to->local.n);
248: PetscArraycpy(out_from->local.vslots,in_from->local.vslots,in_from->local.n);
249: } else {
250: out_to->local.vslots = 0;
251: out_from->local.vslots = 0;
252: }
254: /* allocate entire receive context */
255: VecScatterMemcpyPlanCopy(&in_to->local.memcpy_plan,&out_to->local.memcpy_plan);
256: VecScatterMemcpyPlanCopy(&in_from->local.memcpy_plan,&out_from->local.memcpy_plan);
257: VecScatterMemcpyPlanCopy(&in_to->memcpy_plan,&out_to->memcpy_plan);
258: VecScatterMemcpyPlanCopy(&in_from->memcpy_plan,&out_from->memcpy_plan);
260: out_from->format = in_from->format;
261: ny = in_from->starts[in_from->n];
262: out_from->n = in_from->n;
264: PetscMalloc1(out_from->n,&out_from->requests);
265: PetscMalloc4(ny*bs,&out_from->values,ny,&out_from->indices,out_from->n+1,&out_from->starts,out_from->n,&out_from->procs);
266: PetscArraycpy(out_from->indices,in_from->indices,ny);
267: PetscArraycpy(out_from->starts,in_from->starts,out_from->n+1);
268: PetscArraycpy(out_from->procs,in_from->procs,out_from->n);
270: out->fromdata = (void*)out_from;
271: out_from->local.n = in_from->local.n;
272: out_from->local.nonmatching_computed = PETSC_FALSE;
273: out_from->local.n_nonmatching = 0;
274: out_from->local.slots_nonmatching = 0;
276: /*
277: set up the request arrays for use with isend_init() and irecv_init()
278: */
279: {
280: PetscMPIInt tag;
281: MPI_Comm comm;
282: PetscInt *sstarts = out_to->starts, *rstarts = out_from->starts;
283: PetscMPIInt *sprocs = out_to->procs, *rprocs = out_from->procs;
284: PetscInt i;
285: MPI_Request *swaits = out_to->requests,*rwaits = out_from->requests;
286: MPI_Request *rev_swaits,*rev_rwaits;
287: PetscScalar *Ssvalues = out_to->values, *Srvalues = out_from->values;
289: PetscMalloc1(in_to->n,&out_to->rev_requests);
290: PetscMalloc1(in_from->n,&out_from->rev_requests);
292: rev_rwaits = out_to->rev_requests;
293: rev_swaits = out_from->rev_requests;
295: out_from->bs = out_to->bs = bs;
296: tag = ((PetscObject)out)->tag;
297: PetscObjectGetComm((PetscObject)out,&comm);
299: /* Register the receives that you will use later (sends for scatter reverse) */
300: for (i=0; i<out_from->n; i++) {
301: MPI_Recv_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);
302: MPI_Send_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tag,comm,rev_swaits+i);
303: }
305: for (i=0; i<out_to->n; i++) {
306: MPI_Send_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
307: }
309: /* Register receives for scatter reverse */
310: for (i=0; i<out_to->n; i++) {
311: MPI_Recv_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,rev_rwaits+i);
312: }
313: }
315: /* since the to and from data structures are not symmetric for shared memory copies we insure they always listed in "standard" form */
316: if (!in_to->sharedwin) {
317: VecScatter_MPI_General *totmp = in_to,*out_totmp = out_to;
318: in_to = in_from;
319: in_from = totmp;
320: out_to = out_from;
321: out_from = out_totmp;
322: }
324: /* copy the to parts for the shared memory copies between processes */
325: out_to->sharedcnt = in_to->sharedcnt;
326: out_to->msize = in_to->msize;
327: PetscMalloc1(out_to->msize+1,&out_to->sharedspacestarts);
328: PetscArraycpy(out_to->sharedspacestarts,in_to->sharedspacestarts,out_to->msize+1);
329: PetscMalloc1(out_to->sharedcnt,&out_to->sharedspaceindices);
330: PetscArraycpy(out_to->sharedspaceindices,in_to->sharedspaceindices,out_to->sharedcnt);
332: PetscShmCommGet(PetscObjectComm((PetscObject)in),&scomm);
333: PetscShmCommGetMpiShmComm(scomm,&mscomm);
334: MPI_Info_create(&info);
335: MPI_Info_set(info, "alloc_shared_noncontig", "true");
336: MPIU_Win_allocate_shared(bs*out_to->sharedcnt*sizeof(PetscScalar),sizeof(PetscScalar),info,mscomm,&out_to->sharedspace,&out_to->sharedwin);
337: MPI_Info_free(&info);
339: /* copy the to parts for the shared memory copies between processes */
340: out_from->sharedcnt = in_from->sharedcnt;
341: out_from->msize = in_from->msize;
342: PetscMalloc1(out_from->msize+1,&out_from->sharedspacestarts);
343: PetscArraycpy(out_from->sharedspacestarts,in_from->sharedspacestarts,out_from->msize+1);
344: PetscMalloc1(out_from->sharedcnt,&out_from->sharedspaceindices);
345: PetscArraycpy(out_from->sharedspaceindices,in_from->sharedspaceindices,out_from->sharedcnt);
346: PetscMalloc1(out_from->msize,&out_from->sharedspacesoffset);
347: PetscArraycpy(out_from->sharedspacesoffset,in_from->sharedspacesoffset,out_from->msize);
348: PetscCalloc1(out_from->msize,&out_from->sharedspaces);
349: for (jj=0; jj<out_from->msize; jj++) {
350: MPI_Aint isize;
351: PetscMPIInt disp_unit;
352: MPIU_Win_shared_query(out_to->sharedwin,jj,&isize,&disp_unit,&out_from->sharedspaces[jj]);
353: }
354: return(0);
355: }
357: PetscErrorCode VecScatterCopy_PtoP_AllToAll(VecScatter in,VecScatter out)
358: {
359: VecScatter_MPI_General *in_to = (VecScatter_MPI_General*)in->todata;
360: VecScatter_MPI_General *in_from = (VecScatter_MPI_General*)in->fromdata,*out_to,*out_from;
361: PetscErrorCode ierr;
362: PetscInt ny,bs = in_from->bs;
363: PetscMPIInt size;
366: MPI_Comm_size(PetscObjectComm((PetscObject)in),&size);
368: out->ops->begin = in->ops->begin;
369: out->ops->end = in->ops->end;
370: out->ops->copy = in->ops->copy;
371: out->ops->destroy = in->ops->destroy;
372: out->ops->view = in->ops->view;
374: /* allocate entire send scatter context */
375: PetscNewLog(out,&out_to);
376: out_to->sharedwin = MPI_WIN_NULL;
377: PetscNewLog(out,&out_from);
378: out_from->sharedwin = MPI_WIN_NULL;
380: ny = in_to->starts[in_to->n];
381: out_to->n = in_to->n;
382: out_to->format = in_to->format;
384: PetscMalloc1(out_to->n,&out_to->requests);
385: PetscMalloc4(bs*ny,&out_to->values,ny,&out_to->indices,out_to->n+1,&out_to->starts,out_to->n,&out_to->procs);
386: PetscMalloc2(PetscMax(in_to->n,in_from->n),&out_to->sstatus,PetscMax(in_to->n,in_from->n),&out_to->rstatus);
387: PetscArraycpy(out_to->indices,in_to->indices,ny);
388: PetscArraycpy(out_to->starts,in_to->starts,out_to->n+1);
389: PetscArraycpy(out_to->procs,in_to->procs,out_to->n);
391: out->todata = (void*)out_to;
392: out_to->local.n = in_to->local.n;
393: out_to->local.nonmatching_computed = PETSC_FALSE;
394: out_to->local.n_nonmatching = 0;
395: out_to->local.slots_nonmatching = 0;
396: if (in_to->local.n) {
397: PetscMalloc1(in_to->local.n,&out_to->local.vslots);
398: PetscMalloc1(in_from->local.n,&out_from->local.vslots);
399: PetscArraycpy(out_to->local.vslots,in_to->local.vslots,in_to->local.n);
400: PetscArraycpy(out_from->local.vslots,in_from->local.vslots,in_from->local.n);
401: } else {
402: out_to->local.vslots = 0;
403: out_from->local.vslots = 0;
404: }
406: /* allocate entire receive context */
407: VecScatterMemcpyPlanCopy_PtoP(in_to,in_from,out_to,out_from);
409: out_from->format = in_from->format;
410: ny = in_from->starts[in_from->n];
411: out_from->n = in_from->n;
413: PetscMalloc1(out_from->n,&out_from->requests);
414: PetscMalloc4(ny*bs,&out_from->values,ny,&out_from->indices,out_from->n+1,&out_from->starts,out_from->n,&out_from->procs);
415: PetscArraycpy(out_from->indices,in_from->indices,ny);
416: PetscArraycpy(out_from->starts,in_from->starts,out_from->n+1);
417: PetscArraycpy(out_from->procs,in_from->procs,out_from->n);
419: out->fromdata = (void*)out_from;
420: out_from->local.n = in_from->local.n;
421: out_from->local.nonmatching_computed = PETSC_FALSE;
422: out_from->local.n_nonmatching = 0;
423: out_from->local.slots_nonmatching = 0;
425: return(0);
426: }
427: /* --------------------------------------------------------------------------------------------------
428: Packs and unpacks the message data into send or from receive buffers.
430: These could be generated automatically.
432: Fortran kernels etc. could be used.
433: */
434: PETSC_STATIC_INLINE void Pack_1(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
435: {
436: PetscInt i;
437: for (i=0; i<n; i++) y[i] = x[indicesx[i]];
438: }
440: PETSC_STATIC_INLINE PetscErrorCode UnPack_1(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
441: {
442: PetscInt i;
445: switch (addv) {
446: case INSERT_VALUES:
447: case INSERT_ALL_VALUES:
448: for (i=0; i<n; i++) y[indicesy[i]] = x[i];
449: break;
450: case ADD_VALUES:
451: case ADD_ALL_VALUES:
452: for (i=0; i<n; i++) y[indicesy[i]] += x[i];
453: break;
454: #if !defined(PETSC_USE_COMPLEX)
455: case MAX_VALUES:
456: for (i=0; i<n; i++) y[indicesy[i]] = PetscMax(y[indicesy[i]],x[i]);
457: #else
458: case MAX_VALUES:
459: #endif
460: case NOT_SET_VALUES:
461: break;
462: default:
463: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
464: }
465: return(0);
466: }
468: PETSC_STATIC_INLINE PetscErrorCode Scatter_1(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
469: {
470: PetscInt i;
473: switch (addv) {
474: case INSERT_VALUES:
475: case INSERT_ALL_VALUES:
476: for (i=0; i<n; i++) y[indicesy[i]] = x[indicesx[i]];
477: break;
478: case ADD_VALUES:
479: case ADD_ALL_VALUES:
480: for (i=0; i<n; i++) y[indicesy[i]] += x[indicesx[i]];
481: break;
482: #if !defined(PETSC_USE_COMPLEX)
483: case MAX_VALUES:
484: for (i=0; i<n; i++) y[indicesy[i]] = PetscMax(y[indicesy[i]],x[indicesx[i]]);
485: #else
486: case MAX_VALUES:
487: #endif
488: case NOT_SET_VALUES:
489: break;
490: default:
491: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
492: }
493: return(0);
494: }
496: /* ----------------------------------------------------------------------------------------------- */
497: PETSC_STATIC_INLINE void Pack_2(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
498: {
499: PetscInt i,idx;
501: for (i=0; i<n; i++) {
502: idx = *indicesx++;
503: y[0] = x[idx];
504: y[1] = x[idx+1];
505: y += 2;
506: }
507: }
509: PETSC_STATIC_INLINE PetscErrorCode UnPack_2(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
510: {
511: PetscInt i,idy;
514: switch (addv) {
515: case INSERT_VALUES:
516: case INSERT_ALL_VALUES:
517: for (i=0; i<n; i++) {
518: idy = *indicesy++;
519: y[idy] = x[0];
520: y[idy+1] = x[1];
521: x += 2;
522: }
523: break;
524: case ADD_VALUES:
525: case ADD_ALL_VALUES:
526: for (i=0; i<n; i++) {
527: idy = *indicesy++;
528: y[idy] += x[0];
529: y[idy+1] += x[1];
530: x += 2;
531: }
532: break;
533: #if !defined(PETSC_USE_COMPLEX)
534: case MAX_VALUES:
535: for (i=0; i<n; i++) {
536: idy = *indicesy++;
537: y[idy] = PetscMax(y[idy],x[0]);
538: y[idy+1] = PetscMax(y[idy+1],x[1]);
539: x += 2;
540: }
541: #else
542: case MAX_VALUES:
543: #endif
544: case NOT_SET_VALUES:
545: break;
546: default:
547: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
548: }
549: return(0);
550: }
552: PETSC_STATIC_INLINE PetscErrorCode Scatter_2(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
553: {
554: PetscInt i,idx,idy;
557: switch (addv) {
558: case INSERT_VALUES:
559: case INSERT_ALL_VALUES:
560: for (i=0; i<n; i++) {
561: idx = *indicesx++;
562: idy = *indicesy++;
563: y[idy] = x[idx];
564: y[idy+1] = x[idx+1];
565: }
566: break;
567: case ADD_VALUES:
568: case ADD_ALL_VALUES:
569: for (i=0; i<n; i++) {
570: idx = *indicesx++;
571: idy = *indicesy++;
572: y[idy] += x[idx];
573: y[idy+1] += x[idx+1];
574: }
575: break;
576: #if !defined(PETSC_USE_COMPLEX)
577: case MAX_VALUES:
578: for (i=0; i<n; i++) {
579: idx = *indicesx++;
580: idy = *indicesy++;
581: y[idy] = PetscMax(y[idy],x[idx]);
582: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
583: }
584: #else
585: case MAX_VALUES:
586: #endif
587: case NOT_SET_VALUES:
588: break;
589: default:
590: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
591: }
592: return(0);
593: }
594: /* ----------------------------------------------------------------------------------------------- */
595: PETSC_STATIC_INLINE void Pack_3(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
596: {
597: PetscInt i,idx;
599: for (i=0; i<n; i++) {
600: idx = *indicesx++;
601: y[0] = x[idx];
602: y[1] = x[idx+1];
603: y[2] = x[idx+2];
604: y += 3;
605: }
606: }
607: PETSC_STATIC_INLINE PetscErrorCode UnPack_3(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
608: {
609: PetscInt i,idy;
612: switch (addv) {
613: case INSERT_VALUES:
614: case INSERT_ALL_VALUES:
615: for (i=0; i<n; i++) {
616: idy = *indicesy++;
617: y[idy] = x[0];
618: y[idy+1] = x[1];
619: y[idy+2] = x[2];
620: x += 3;
621: }
622: break;
623: case ADD_VALUES:
624: case ADD_ALL_VALUES:
625: for (i=0; i<n; i++) {
626: idy = *indicesy++;
627: y[idy] += x[0];
628: y[idy+1] += x[1];
629: y[idy+2] += x[2];
630: x += 3;
631: }
632: break;
633: #if !defined(PETSC_USE_COMPLEX)
634: case MAX_VALUES:
635: for (i=0; i<n; i++) {
636: idy = *indicesy++;
637: y[idy] = PetscMax(y[idy],x[0]);
638: y[idy+1] = PetscMax(y[idy+1],x[1]);
639: y[idy+2] = PetscMax(y[idy+2],x[2]);
640: x += 3;
641: }
642: #else
643: case MAX_VALUES:
644: #endif
645: case NOT_SET_VALUES:
646: break;
647: default:
648: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
649: }
650: return(0);
651: }
653: PETSC_STATIC_INLINE PetscErrorCode Scatter_3(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
654: {
655: PetscInt i,idx,idy;
658: switch (addv) {
659: case INSERT_VALUES:
660: case INSERT_ALL_VALUES:
661: for (i=0; i<n; i++) {
662: idx = *indicesx++;
663: idy = *indicesy++;
664: y[idy] = x[idx];
665: y[idy+1] = x[idx+1];
666: y[idy+2] = x[idx+2];
667: }
668: break;
669: case ADD_VALUES:
670: case ADD_ALL_VALUES:
671: for (i=0; i<n; i++) {
672: idx = *indicesx++;
673: idy = *indicesy++;
674: y[idy] += x[idx];
675: y[idy+1] += x[idx+1];
676: y[idy+2] += x[idx+2];
677: }
678: break;
679: #if !defined(PETSC_USE_COMPLEX)
680: case MAX_VALUES:
681: for (i=0; i<n; i++) {
682: idx = *indicesx++;
683: idy = *indicesy++;
684: y[idy] = PetscMax(y[idy],x[idx]);
685: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
686: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
687: }
688: #else
689: case MAX_VALUES:
690: #endif
691: case NOT_SET_VALUES:
692: break;
693: default:
694: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
695: }
696: return(0);
697: }
698: /* ----------------------------------------------------------------------------------------------- */
699: PETSC_STATIC_INLINE void Pack_4(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
700: {
701: PetscInt i,idx;
703: for (i=0; i<n; i++) {
704: idx = *indicesx++;
705: y[0] = x[idx];
706: y[1] = x[idx+1];
707: y[2] = x[idx+2];
708: y[3] = x[idx+3];
709: y += 4;
710: }
711: }
712: PETSC_STATIC_INLINE PetscErrorCode UnPack_4(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
713: {
714: PetscInt i,idy;
717: switch (addv) {
718: case INSERT_VALUES:
719: case INSERT_ALL_VALUES:
720: for (i=0; i<n; i++) {
721: idy = *indicesy++;
722: y[idy] = x[0];
723: y[idy+1] = x[1];
724: y[idy+2] = x[2];
725: y[idy+3] = x[3];
726: x += 4;
727: }
728: break;
729: case ADD_VALUES:
730: case ADD_ALL_VALUES:
731: for (i=0; i<n; i++) {
732: idy = *indicesy++;
733: y[idy] += x[0];
734: y[idy+1] += x[1];
735: y[idy+2] += x[2];
736: y[idy+3] += x[3];
737: x += 4;
738: }
739: break;
740: #if !defined(PETSC_USE_COMPLEX)
741: case MAX_VALUES:
742: for (i=0; i<n; i++) {
743: idy = *indicesy++;
744: y[idy] = PetscMax(y[idy],x[0]);
745: y[idy+1] = PetscMax(y[idy+1],x[1]);
746: y[idy+2] = PetscMax(y[idy+2],x[2]);
747: y[idy+3] = PetscMax(y[idy+3],x[3]);
748: x += 4;
749: }
750: #else
751: case MAX_VALUES:
752: #endif
753: case NOT_SET_VALUES:
754: break;
755: default:
756: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
757: }
758: return(0);
759: }
761: PETSC_STATIC_INLINE PetscErrorCode Scatter_4(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
762: {
763: PetscInt i,idx,idy;
766: switch (addv) {
767: case INSERT_VALUES:
768: case INSERT_ALL_VALUES:
769: for (i=0; i<n; i++) {
770: idx = *indicesx++;
771: idy = *indicesy++;
772: y[idy] = x[idx];
773: y[idy+1] = x[idx+1];
774: y[idy+2] = x[idx+2];
775: y[idy+3] = x[idx+3];
776: }
777: break;
778: case ADD_VALUES:
779: case ADD_ALL_VALUES:
780: for (i=0; i<n; i++) {
781: idx = *indicesx++;
782: idy = *indicesy++;
783: y[idy] += x[idx];
784: y[idy+1] += x[idx+1];
785: y[idy+2] += x[idx+2];
786: y[idy+3] += x[idx+3];
787: }
788: break;
789: #if !defined(PETSC_USE_COMPLEX)
790: case MAX_VALUES:
791: for (i=0; i<n; i++) {
792: idx = *indicesx++;
793: idy = *indicesy++;
794: y[idy] = PetscMax(y[idy],x[idx]);
795: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
796: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
797: y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
798: }
799: #else
800: case MAX_VALUES:
801: #endif
802: case NOT_SET_VALUES:
803: break;
804: default:
805: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
806: }
807: return(0);
808: }
809: /* ----------------------------------------------------------------------------------------------- */
810: PETSC_STATIC_INLINE void Pack_5(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
811: {
812: PetscInt i,idx;
814: for (i=0; i<n; i++) {
815: idx = *indicesx++;
816: y[0] = x[idx];
817: y[1] = x[idx+1];
818: y[2] = x[idx+2];
819: y[3] = x[idx+3];
820: y[4] = x[idx+4];
821: y += 5;
822: }
823: }
825: PETSC_STATIC_INLINE PetscErrorCode UnPack_5(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
826: {
827: PetscInt i,idy;
830: switch (addv) {
831: case INSERT_VALUES:
832: case INSERT_ALL_VALUES:
833: for (i=0; i<n; i++) {
834: idy = *indicesy++;
835: y[idy] = x[0];
836: y[idy+1] = x[1];
837: y[idy+2] = x[2];
838: y[idy+3] = x[3];
839: y[idy+4] = x[4];
840: x += 5;
841: }
842: break;
843: case ADD_VALUES:
844: case ADD_ALL_VALUES:
845: for (i=0; i<n; i++) {
846: idy = *indicesy++;
847: y[idy] += x[0];
848: y[idy+1] += x[1];
849: y[idy+2] += x[2];
850: y[idy+3] += x[3];
851: y[idy+4] += x[4];
852: x += 5;
853: }
854: break;
855: #if !defined(PETSC_USE_COMPLEX)
856: case MAX_VALUES:
857: for (i=0; i<n; i++) {
858: idy = *indicesy++;
859: y[idy] = PetscMax(y[idy],x[0]);
860: y[idy+1] = PetscMax(y[idy+1],x[1]);
861: y[idy+2] = PetscMax(y[idy+2],x[2]);
862: y[idy+3] = PetscMax(y[idy+3],x[3]);
863: y[idy+4] = PetscMax(y[idy+4],x[4]);
864: x += 5;
865: }
866: #else
867: case MAX_VALUES:
868: #endif
869: case NOT_SET_VALUES:
870: break;
871: default:
872: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
873: }
874: return(0);
875: }
877: PETSC_STATIC_INLINE PetscErrorCode Scatter_5(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
878: {
879: PetscInt i,idx,idy;
882: switch (addv) {
883: case INSERT_VALUES:
884: case INSERT_ALL_VALUES:
885: for (i=0; i<n; i++) {
886: idx = *indicesx++;
887: idy = *indicesy++;
888: y[idy] = x[idx];
889: y[idy+1] = x[idx+1];
890: y[idy+2] = x[idx+2];
891: y[idy+3] = x[idx+3];
892: y[idy+4] = x[idx+4];
893: }
894: break;
895: case ADD_VALUES:
896: case ADD_ALL_VALUES:
897: for (i=0; i<n; i++) {
898: idx = *indicesx++;
899: idy = *indicesy++;
900: y[idy] += x[idx];
901: y[idy+1] += x[idx+1];
902: y[idy+2] += x[idx+2];
903: y[idy+3] += x[idx+3];
904: y[idy+4] += x[idx+4];
905: }
906: break;
907: #if !defined(PETSC_USE_COMPLEX)
908: case MAX_VALUES:
909: for (i=0; i<n; i++) {
910: idx = *indicesx++;
911: idy = *indicesy++;
912: y[idy] = PetscMax(y[idy],x[idx]);
913: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
914: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
915: y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
916: y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
917: }
918: #else
919: case MAX_VALUES:
920: #endif
921: case NOT_SET_VALUES:
922: break;
923: default:
924: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
925: }
926: return(0);
927: }
928: /* ----------------------------------------------------------------------------------------------- */
929: PETSC_STATIC_INLINE void Pack_6(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
930: {
931: PetscInt i,idx;
933: for (i=0; i<n; i++) {
934: idx = *indicesx++;
935: y[0] = x[idx];
936: y[1] = x[idx+1];
937: y[2] = x[idx+2];
938: y[3] = x[idx+3];
939: y[4] = x[idx+4];
940: y[5] = x[idx+5];
941: y += 6;
942: }
943: }
945: PETSC_STATIC_INLINE PetscErrorCode UnPack_6(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
946: {
947: PetscInt i,idy;
950: switch (addv) {
951: case INSERT_VALUES:
952: case INSERT_ALL_VALUES:
953: for (i=0; i<n; i++) {
954: idy = *indicesy++;
955: y[idy] = x[0];
956: y[idy+1] = x[1];
957: y[idy+2] = x[2];
958: y[idy+3] = x[3];
959: y[idy+4] = x[4];
960: y[idy+5] = x[5];
961: x += 6;
962: }
963: break;
964: case ADD_VALUES:
965: case ADD_ALL_VALUES:
966: for (i=0; i<n; i++) {
967: idy = *indicesy++;
968: y[idy] += x[0];
969: y[idy+1] += x[1];
970: y[idy+2] += x[2];
971: y[idy+3] += x[3];
972: y[idy+4] += x[4];
973: y[idy+5] += x[5];
974: x += 6;
975: }
976: break;
977: #if !defined(PETSC_USE_COMPLEX)
978: case MAX_VALUES:
979: for (i=0; i<n; i++) {
980: idy = *indicesy++;
981: y[idy] = PetscMax(y[idy],x[0]);
982: y[idy+1] = PetscMax(y[idy+1],x[1]);
983: y[idy+2] = PetscMax(y[idy+2],x[2]);
984: y[idy+3] = PetscMax(y[idy+3],x[3]);
985: y[idy+4] = PetscMax(y[idy+4],x[4]);
986: y[idy+5] = PetscMax(y[idy+5],x[5]);
987: x += 6;
988: }
989: #else
990: case MAX_VALUES:
991: #endif
992: case NOT_SET_VALUES:
993: break;
994: default:
995: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
996: }
997: return(0);
998: }
1000: PETSC_STATIC_INLINE PetscErrorCode Scatter_6(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1001: {
1002: PetscInt i,idx,idy;
1005: switch (addv) {
1006: case INSERT_VALUES:
1007: case INSERT_ALL_VALUES:
1008: for (i=0; i<n; i++) {
1009: idx = *indicesx++;
1010: idy = *indicesy++;
1011: y[idy] = x[idx];
1012: y[idy+1] = x[idx+1];
1013: y[idy+2] = x[idx+2];
1014: y[idy+3] = x[idx+3];
1015: y[idy+4] = x[idx+4];
1016: y[idy+5] = x[idx+5];
1017: }
1018: break;
1019: case ADD_VALUES:
1020: case ADD_ALL_VALUES:
1021: for (i=0; i<n; i++) {
1022: idx = *indicesx++;
1023: idy = *indicesy++;
1024: y[idy] += x[idx];
1025: y[idy+1] += x[idx+1];
1026: y[idy+2] += x[idx+2];
1027: y[idy+3] += x[idx+3];
1028: y[idy+4] += x[idx+4];
1029: y[idy+5] += x[idx+5];
1030: }
1031: break;
1032: #if !defined(PETSC_USE_COMPLEX)
1033: case MAX_VALUES:
1034: for (i=0; i<n; i++) {
1035: idx = *indicesx++;
1036: idy = *indicesy++;
1037: y[idy] = PetscMax(y[idy],x[idx]);
1038: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
1039: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
1040: y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
1041: y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1042: y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
1043: }
1044: #else
1045: case MAX_VALUES:
1046: #endif
1047: case NOT_SET_VALUES:
1048: break;
1049: default:
1050: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1051: }
1052: return(0);
1053: }
1054: /* ----------------------------------------------------------------------------------------------- */
1055: PETSC_STATIC_INLINE void Pack_7(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1056: {
1057: PetscInt i,idx;
1059: for (i=0; i<n; i++) {
1060: idx = *indicesx++;
1061: y[0] = x[idx];
1062: y[1] = x[idx+1];
1063: y[2] = x[idx+2];
1064: y[3] = x[idx+3];
1065: y[4] = x[idx+4];
1066: y[5] = x[idx+5];
1067: y[6] = x[idx+6];
1068: y += 7;
1069: }
1070: }
1072: PETSC_STATIC_INLINE PetscErrorCode UnPack_7(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1073: {
1074: PetscInt i,idy;
1077: switch (addv) {
1078: case INSERT_VALUES:
1079: case INSERT_ALL_VALUES:
1080: for (i=0; i<n; i++) {
1081: idy = *indicesy++;
1082: y[idy] = x[0];
1083: y[idy+1] = x[1];
1084: y[idy+2] = x[2];
1085: y[idy+3] = x[3];
1086: y[idy+4] = x[4];
1087: y[idy+5] = x[5];
1088: y[idy+6] = x[6];
1089: x += 7;
1090: }
1091: break;
1092: case ADD_VALUES:
1093: case ADD_ALL_VALUES:
1094: for (i=0; i<n; i++) {
1095: idy = *indicesy++;
1096: y[idy] += x[0];
1097: y[idy+1] += x[1];
1098: y[idy+2] += x[2];
1099: y[idy+3] += x[3];
1100: y[idy+4] += x[4];
1101: y[idy+5] += x[5];
1102: y[idy+6] += x[6];
1103: x += 7;
1104: }
1105: break;
1106: #if !defined(PETSC_USE_COMPLEX)
1107: case MAX_VALUES:
1108: for (i=0; i<n; i++) {
1109: idy = *indicesy++;
1110: y[idy] = PetscMax(y[idy],x[0]);
1111: y[idy+1] = PetscMax(y[idy+1],x[1]);
1112: y[idy+2] = PetscMax(y[idy+2],x[2]);
1113: y[idy+3] = PetscMax(y[idy+3],x[3]);
1114: y[idy+4] = PetscMax(y[idy+4],x[4]);
1115: y[idy+5] = PetscMax(y[idy+5],x[5]);
1116: y[idy+6] = PetscMax(y[idy+6],x[6]);
1117: x += 7;
1118: }
1119: #else
1120: case MAX_VALUES:
1121: #endif
1122: case NOT_SET_VALUES:
1123: break;
1124: default:
1125: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1126: }
1127: return(0);
1128: }
1130: PETSC_STATIC_INLINE PetscErrorCode Scatter_7(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1131: {
1132: PetscInt i,idx,idy;
1135: switch (addv) {
1136: case INSERT_VALUES:
1137: case INSERT_ALL_VALUES:
1138: for (i=0; i<n; i++) {
1139: idx = *indicesx++;
1140: idy = *indicesy++;
1141: y[idy] = x[idx];
1142: y[idy+1] = x[idx+1];
1143: y[idy+2] = x[idx+2];
1144: y[idy+3] = x[idx+3];
1145: y[idy+4] = x[idx+4];
1146: y[idy+5] = x[idx+5];
1147: y[idy+6] = x[idx+6];
1148: }
1149: break;
1150: case ADD_VALUES:
1151: case ADD_ALL_VALUES:
1152: for (i=0; i<n; i++) {
1153: idx = *indicesx++;
1154: idy = *indicesy++;
1155: y[idy] += x[idx];
1156: y[idy+1] += x[idx+1];
1157: y[idy+2] += x[idx+2];
1158: y[idy+3] += x[idx+3];
1159: y[idy+4] += x[idx+4];
1160: y[idy+5] += x[idx+5];
1161: y[idy+6] += x[idx+6];
1162: }
1163: break;
1164: #if !defined(PETSC_USE_COMPLEX)
1165: case MAX_VALUES:
1166: for (i=0; i<n; i++) {
1167: idx = *indicesx++;
1168: idy = *indicesy++;
1169: y[idy] = PetscMax(y[idy],x[idx]);
1170: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
1171: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
1172: y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
1173: y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1174: y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
1175: y[idy+6] = PetscMax(y[idy+6],x[idx+6]);
1176: }
1177: #else
1178: case MAX_VALUES:
1179: #endif
1180: case NOT_SET_VALUES:
1181: break;
1182: default:
1183: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1184: }
1185: return(0);
1186: }
1187: /* ----------------------------------------------------------------------------------------------- */
1188: PETSC_STATIC_INLINE void Pack_8(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1189: {
1190: PetscInt i,idx;
1192: for (i=0; i<n; i++) {
1193: idx = *indicesx++;
1194: y[0] = x[idx];
1195: y[1] = x[idx+1];
1196: y[2] = x[idx+2];
1197: y[3] = x[idx+3];
1198: y[4] = x[idx+4];
1199: y[5] = x[idx+5];
1200: y[6] = x[idx+6];
1201: y[7] = x[idx+7];
1202: y += 8;
1203: }
1204: }
1206: PETSC_STATIC_INLINE PetscErrorCode UnPack_8(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1207: {
1208: PetscInt i,idy;
1211: switch (addv) {
1212: case INSERT_VALUES:
1213: case INSERT_ALL_VALUES:
1214: for (i=0; i<n; i++) {
1215: idy = *indicesy++;
1216: y[idy] = x[0];
1217: y[idy+1] = x[1];
1218: y[idy+2] = x[2];
1219: y[idy+3] = x[3];
1220: y[idy+4] = x[4];
1221: y[idy+5] = x[5];
1222: y[idy+6] = x[6];
1223: y[idy+7] = x[7];
1224: x += 8;
1225: }
1226: break;
1227: case ADD_VALUES:
1228: case ADD_ALL_VALUES:
1229: for (i=0; i<n; i++) {
1230: idy = *indicesy++;
1231: y[idy] += x[0];
1232: y[idy+1] += x[1];
1233: y[idy+2] += x[2];
1234: y[idy+3] += x[3];
1235: y[idy+4] += x[4];
1236: y[idy+5] += x[5];
1237: y[idy+6] += x[6];
1238: y[idy+7] += x[7];
1239: x += 8;
1240: }
1241: break;
1242: #if !defined(PETSC_USE_COMPLEX)
1243: case MAX_VALUES:
1244: for (i=0; i<n; i++) {
1245: idy = *indicesy++;
1246: y[idy] = PetscMax(y[idy],x[0]);
1247: y[idy+1] = PetscMax(y[idy+1],x[1]);
1248: y[idy+2] = PetscMax(y[idy+2],x[2]);
1249: y[idy+3] = PetscMax(y[idy+3],x[3]);
1250: y[idy+4] = PetscMax(y[idy+4],x[4]);
1251: y[idy+5] = PetscMax(y[idy+5],x[5]);
1252: y[idy+6] = PetscMax(y[idy+6],x[6]);
1253: y[idy+7] = PetscMax(y[idy+7],x[7]);
1254: x += 8;
1255: }
1256: #else
1257: case MAX_VALUES:
1258: #endif
1259: case NOT_SET_VALUES:
1260: break;
1261: default:
1262: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1263: }
1264: return(0);
1265: }
1267: PETSC_STATIC_INLINE PetscErrorCode Scatter_8(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1268: {
1269: PetscInt i,idx,idy;
1272: switch (addv) {
1273: case INSERT_VALUES:
1274: case INSERT_ALL_VALUES:
1275: for (i=0; i<n; i++) {
1276: idx = *indicesx++;
1277: idy = *indicesy++;
1278: y[idy] = x[idx];
1279: y[idy+1] = x[idx+1];
1280: y[idy+2] = x[idx+2];
1281: y[idy+3] = x[idx+3];
1282: y[idy+4] = x[idx+4];
1283: y[idy+5] = x[idx+5];
1284: y[idy+6] = x[idx+6];
1285: y[idy+7] = x[idx+7];
1286: }
1287: break;
1288: case ADD_VALUES:
1289: case ADD_ALL_VALUES:
1290: for (i=0; i<n; i++) {
1291: idx = *indicesx++;
1292: idy = *indicesy++;
1293: y[idy] += x[idx];
1294: y[idy+1] += x[idx+1];
1295: y[idy+2] += x[idx+2];
1296: y[idy+3] += x[idx+3];
1297: y[idy+4] += x[idx+4];
1298: y[idy+5] += x[idx+5];
1299: y[idy+6] += x[idx+6];
1300: y[idy+7] += x[idx+7];
1301: }
1302: break;
1303: #if !defined(PETSC_USE_COMPLEX)
1304: case MAX_VALUES:
1305: for (i=0; i<n; i++) {
1306: idx = *indicesx++;
1307: idy = *indicesy++;
1308: y[idy] = PetscMax(y[idy],x[idx]);
1309: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
1310: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
1311: y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
1312: y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1313: y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
1314: y[idy+6] = PetscMax(y[idy+6],x[idx+6]);
1315: y[idy+7] = PetscMax(y[idy+7],x[idx+7]);
1316: }
1317: #else
1318: case MAX_VALUES:
1319: #endif
1320: case NOT_SET_VALUES:
1321: break;
1322: default:
1323: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1324: }
1325: return(0);
1326: }
1328: PETSC_STATIC_INLINE void Pack_9(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1329: {
1330: PetscInt i,idx;
1332: for (i=0; i<n; i++) {
1333: idx = *indicesx++;
1334: y[0] = x[idx];
1335: y[1] = x[idx+1];
1336: y[2] = x[idx+2];
1337: y[3] = x[idx+3];
1338: y[4] = x[idx+4];
1339: y[5] = x[idx+5];
1340: y[6] = x[idx+6];
1341: y[7] = x[idx+7];
1342: y[8] = x[idx+8];
1343: y += 9;
1344: }
1345: }
1347: PETSC_STATIC_INLINE PetscErrorCode UnPack_9(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1348: {
1349: PetscInt i,idy;
1352: switch (addv) {
1353: case INSERT_VALUES:
1354: case INSERT_ALL_VALUES:
1355: for (i=0; i<n; i++) {
1356: idy = *indicesy++;
1357: y[idy] = x[0];
1358: y[idy+1] = x[1];
1359: y[idy+2] = x[2];
1360: y[idy+3] = x[3];
1361: y[idy+4] = x[4];
1362: y[idy+5] = x[5];
1363: y[idy+6] = x[6];
1364: y[idy+7] = x[7];
1365: y[idy+8] = x[8];
1366: x += 9;
1367: }
1368: break;
1369: case ADD_VALUES:
1370: case ADD_ALL_VALUES:
1371: for (i=0; i<n; i++) {
1372: idy = *indicesy++;
1373: y[idy] += x[0];
1374: y[idy+1] += x[1];
1375: y[idy+2] += x[2];
1376: y[idy+3] += x[3];
1377: y[idy+4] += x[4];
1378: y[idy+5] += x[5];
1379: y[idy+6] += x[6];
1380: y[idy+7] += x[7];
1381: y[idy+8] += x[8];
1382: x += 9;
1383: }
1384: break;
1385: #if !defined(PETSC_USE_COMPLEX)
1386: case MAX_VALUES:
1387: for (i=0; i<n; i++) {
1388: idy = *indicesy++;
1389: y[idy] = PetscMax(y[idy],x[0]);
1390: y[idy+1] = PetscMax(y[idy+1],x[1]);
1391: y[idy+2] = PetscMax(y[idy+2],x[2]);
1392: y[idy+3] = PetscMax(y[idy+3],x[3]);
1393: y[idy+4] = PetscMax(y[idy+4],x[4]);
1394: y[idy+5] = PetscMax(y[idy+5],x[5]);
1395: y[idy+6] = PetscMax(y[idy+6],x[6]);
1396: y[idy+7] = PetscMax(y[idy+7],x[7]);
1397: y[idy+8] = PetscMax(y[idy+8],x[8]);
1398: x += 9;
1399: }
1400: #else
1401: case MAX_VALUES:
1402: #endif
1403: case NOT_SET_VALUES:
1404: break;
1405: default:
1406: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1407: }
1408: return(0);
1409: }
1411: PETSC_STATIC_INLINE PetscErrorCode Scatter_9(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1412: {
1413: PetscInt i,idx,idy;
1416: switch (addv) {
1417: case INSERT_VALUES:
1418: case INSERT_ALL_VALUES:
1419: for (i=0; i<n; i++) {
1420: idx = *indicesx++;
1421: idy = *indicesy++;
1422: y[idy] = x[idx];
1423: y[idy+1] = x[idx+1];
1424: y[idy+2] = x[idx+2];
1425: y[idy+3] = x[idx+3];
1426: y[idy+4] = x[idx+4];
1427: y[idy+5] = x[idx+5];
1428: y[idy+6] = x[idx+6];
1429: y[idy+7] = x[idx+7];
1430: y[idy+8] = x[idx+8];
1431: }
1432: break;
1433: case ADD_VALUES:
1434: case ADD_ALL_VALUES:
1435: for (i=0; i<n; i++) {
1436: idx = *indicesx++;
1437: idy = *indicesy++;
1438: y[idy] += x[idx];
1439: y[idy+1] += x[idx+1];
1440: y[idy+2] += x[idx+2];
1441: y[idy+3] += x[idx+3];
1442: y[idy+4] += x[idx+4];
1443: y[idy+5] += x[idx+5];
1444: y[idy+6] += x[idx+6];
1445: y[idy+7] += x[idx+7];
1446: y[idy+8] += x[idx+8];
1447: }
1448: break;
1449: #if !defined(PETSC_USE_COMPLEX)
1450: case MAX_VALUES:
1451: for (i=0; i<n; i++) {
1452: idx = *indicesx++;
1453: idy = *indicesy++;
1454: y[idy] = PetscMax(y[idy],x[idx]);
1455: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
1456: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
1457: y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
1458: y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1459: y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
1460: y[idy+6] = PetscMax(y[idy+6],x[idx+6]);
1461: y[idy+7] = PetscMax(y[idy+7],x[idx+7]);
1462: y[idy+8] = PetscMax(y[idy+8],x[idx+8]);
1463: }
1464: #else
1465: case MAX_VALUES:
1466: #endif
1467: case NOT_SET_VALUES:
1468: break;
1469: default:
1470: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1471: }
1472: return(0);
1473: }
1475: PETSC_STATIC_INLINE void Pack_10(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1476: {
1477: PetscInt i,idx;
1479: for (i=0; i<n; i++) {
1480: idx = *indicesx++;
1481: y[0] = x[idx];
1482: y[1] = x[idx+1];
1483: y[2] = x[idx+2];
1484: y[3] = x[idx+3];
1485: y[4] = x[idx+4];
1486: y[5] = x[idx+5];
1487: y[6] = x[idx+6];
1488: y[7] = x[idx+7];
1489: y[8] = x[idx+8];
1490: y[9] = x[idx+9];
1491: y += 10;
1492: }
1493: }
1495: PETSC_STATIC_INLINE PetscErrorCode UnPack_10(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1496: {
1497: PetscInt i,idy;
1500: switch (addv) {
1501: case INSERT_VALUES:
1502: case INSERT_ALL_VALUES:
1503: for (i=0; i<n; i++) {
1504: idy = *indicesy++;
1505: y[idy] = x[0];
1506: y[idy+1] = x[1];
1507: y[idy+2] = x[2];
1508: y[idy+3] = x[3];
1509: y[idy+4] = x[4];
1510: y[idy+5] = x[5];
1511: y[idy+6] = x[6];
1512: y[idy+7] = x[7];
1513: y[idy+8] = x[8];
1514: y[idy+9] = x[9];
1515: x += 10;
1516: }
1517: break;
1518: case ADD_VALUES:
1519: case ADD_ALL_VALUES:
1520: for (i=0; i<n; i++) {
1521: idy = *indicesy++;
1522: y[idy] += x[0];
1523: y[idy+1] += x[1];
1524: y[idy+2] += x[2];
1525: y[idy+3] += x[3];
1526: y[idy+4] += x[4];
1527: y[idy+5] += x[5];
1528: y[idy+6] += x[6];
1529: y[idy+7] += x[7];
1530: y[idy+8] += x[8];
1531: y[idy+9] += x[9];
1532: x += 10;
1533: }
1534: break;
1535: #if !defined(PETSC_USE_COMPLEX)
1536: case MAX_VALUES:
1537: for (i=0; i<n; i++) {
1538: idy = *indicesy++;
1539: y[idy] = PetscMax(y[idy],x[0]);
1540: y[idy+1] = PetscMax(y[idy+1],x[1]);
1541: y[idy+2] = PetscMax(y[idy+2],x[2]);
1542: y[idy+3] = PetscMax(y[idy+3],x[3]);
1543: y[idy+4] = PetscMax(y[idy+4],x[4]);
1544: y[idy+5] = PetscMax(y[idy+5],x[5]);
1545: y[idy+6] = PetscMax(y[idy+6],x[6]);
1546: y[idy+7] = PetscMax(y[idy+7],x[7]);
1547: y[idy+8] = PetscMax(y[idy+8],x[8]);
1548: y[idy+9] = PetscMax(y[idy+9],x[9]);
1549: x += 10;
1550: }
1551: #else
1552: case MAX_VALUES:
1553: #endif
1554: case NOT_SET_VALUES:
1555: break;
1556: default:
1557: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1558: }
1559: return(0);
1560: }
1562: PETSC_STATIC_INLINE PetscErrorCode Scatter_10(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1563: {
1564: PetscInt i,idx,idy;
1567: switch (addv) {
1568: case INSERT_VALUES:
1569: case INSERT_ALL_VALUES:
1570: for (i=0; i<n; i++) {
1571: idx = *indicesx++;
1572: idy = *indicesy++;
1573: y[idy] = x[idx];
1574: y[idy+1] = x[idx+1];
1575: y[idy+2] = x[idx+2];
1576: y[idy+3] = x[idx+3];
1577: y[idy+4] = x[idx+4];
1578: y[idy+5] = x[idx+5];
1579: y[idy+6] = x[idx+6];
1580: y[idy+7] = x[idx+7];
1581: y[idy+8] = x[idx+8];
1582: y[idy+9] = x[idx+9];
1583: }
1584: break;
1585: case ADD_VALUES:
1586: case ADD_ALL_VALUES:
1587: for (i=0; i<n; i++) {
1588: idx = *indicesx++;
1589: idy = *indicesy++;
1590: y[idy] += x[idx];
1591: y[idy+1] += x[idx+1];
1592: y[idy+2] += x[idx+2];
1593: y[idy+3] += x[idx+3];
1594: y[idy+4] += x[idx+4];
1595: y[idy+5] += x[idx+5];
1596: y[idy+6] += x[idx+6];
1597: y[idy+7] += x[idx+7];
1598: y[idy+8] += x[idx+8];
1599: y[idy+9] += x[idx+9];
1600: }
1601: break;
1602: #if !defined(PETSC_USE_COMPLEX)
1603: case MAX_VALUES:
1604: for (i=0; i<n; i++) {
1605: idx = *indicesx++;
1606: idy = *indicesy++;
1607: y[idy] = PetscMax(y[idy],x[idx]);
1608: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
1609: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
1610: y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
1611: y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1612: y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
1613: y[idy+6] = PetscMax(y[idy+6],x[idx+6]);
1614: y[idy+7] = PetscMax(y[idy+7],x[idx+7]);
1615: y[idy+8] = PetscMax(y[idy+8],x[idx+8]);
1616: y[idy+9] = PetscMax(y[idy+9],x[idx+9]);
1617: }
1618: #else
1619: case MAX_VALUES:
1620: #endif
1621: case NOT_SET_VALUES:
1622: break;
1623: default:
1624: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1625: }
1626: return(0);
1627: }
1629: PETSC_STATIC_INLINE void Pack_11(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1630: {
1631: PetscInt i,idx;
1633: for (i=0; i<n; i++) {
1634: idx = *indicesx++;
1635: y[0] = x[idx];
1636: y[1] = x[idx+1];
1637: y[2] = x[idx+2];
1638: y[3] = x[idx+3];
1639: y[4] = x[idx+4];
1640: y[5] = x[idx+5];
1641: y[6] = x[idx+6];
1642: y[7] = x[idx+7];
1643: y[8] = x[idx+8];
1644: y[9] = x[idx+9];
1645: y[10] = x[idx+10];
1646: y += 11;
1647: }
1648: }
1650: PETSC_STATIC_INLINE PetscErrorCode UnPack_11(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1651: {
1652: PetscInt i,idy;
1655: switch (addv) {
1656: case INSERT_VALUES:
1657: case INSERT_ALL_VALUES:
1658: for (i=0; i<n; i++) {
1659: idy = *indicesy++;
1660: y[idy] = x[0];
1661: y[idy+1] = x[1];
1662: y[idy+2] = x[2];
1663: y[idy+3] = x[3];
1664: y[idy+4] = x[4];
1665: y[idy+5] = x[5];
1666: y[idy+6] = x[6];
1667: y[idy+7] = x[7];
1668: y[idy+8] = x[8];
1669: y[idy+9] = x[9];
1670: y[idy+10] = x[10];
1671: x += 11;
1672: }
1673: break;
1674: case ADD_VALUES:
1675: case ADD_ALL_VALUES:
1676: for (i=0; i<n; i++) {
1677: idy = *indicesy++;
1678: y[idy] += x[0];
1679: y[idy+1] += x[1];
1680: y[idy+2] += x[2];
1681: y[idy+3] += x[3];
1682: y[idy+4] += x[4];
1683: y[idy+5] += x[5];
1684: y[idy+6] += x[6];
1685: y[idy+7] += x[7];
1686: y[idy+8] += x[8];
1687: y[idy+9] += x[9];
1688: y[idy+10] += x[10];
1689: x += 11;
1690: }
1691: break;
1692: #if !defined(PETSC_USE_COMPLEX)
1693: case MAX_VALUES:
1694: for (i=0; i<n; i++) {
1695: idy = *indicesy++;
1696: y[idy] = PetscMax(y[idy],x[0]);
1697: y[idy+1] = PetscMax(y[idy+1],x[1]);
1698: y[idy+2] = PetscMax(y[idy+2],x[2]);
1699: y[idy+3] = PetscMax(y[idy+3],x[3]);
1700: y[idy+4] = PetscMax(y[idy+4],x[4]);
1701: y[idy+5] = PetscMax(y[idy+5],x[5]);
1702: y[idy+6] = PetscMax(y[idy+6],x[6]);
1703: y[idy+7] = PetscMax(y[idy+7],x[7]);
1704: y[idy+8] = PetscMax(y[idy+8],x[8]);
1705: y[idy+9] = PetscMax(y[idy+9],x[9]);
1706: y[idy+10] = PetscMax(y[idy+10],x[10]);
1707: x += 11;
1708: }
1709: #else
1710: case MAX_VALUES:
1711: #endif
1712: case NOT_SET_VALUES:
1713: break;
1714: default:
1715: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1716: }
1717: return(0);
1718: }
1720: PETSC_STATIC_INLINE PetscErrorCode Scatter_11(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1721: {
1722: PetscInt i,idx,idy;
1725: switch (addv) {
1726: case INSERT_VALUES:
1727: case INSERT_ALL_VALUES:
1728: for (i=0; i<n; i++) {
1729: idx = *indicesx++;
1730: idy = *indicesy++;
1731: y[idy] = x[idx];
1732: y[idy+1] = x[idx+1];
1733: y[idy+2] = x[idx+2];
1734: y[idy+3] = x[idx+3];
1735: y[idy+4] = x[idx+4];
1736: y[idy+5] = x[idx+5];
1737: y[idy+6] = x[idx+6];
1738: y[idy+7] = x[idx+7];
1739: y[idy+8] = x[idx+8];
1740: y[idy+9] = x[idx+9];
1741: y[idy+10] = x[idx+10];
1742: }
1743: break;
1744: case ADD_VALUES:
1745: case ADD_ALL_VALUES:
1746: for (i=0; i<n; i++) {
1747: idx = *indicesx++;
1748: idy = *indicesy++;
1749: y[idy] += x[idx];
1750: y[idy+1] += x[idx+1];
1751: y[idy+2] += x[idx+2];
1752: y[idy+3] += x[idx+3];
1753: y[idy+4] += x[idx+4];
1754: y[idy+5] += x[idx+5];
1755: y[idy+6] += x[idx+6];
1756: y[idy+7] += x[idx+7];
1757: y[idy+8] += x[idx+8];
1758: y[idy+9] += x[idx+9];
1759: y[idy+10] += x[idx+10];
1760: }
1761: break;
1762: #if !defined(PETSC_USE_COMPLEX)
1763: case MAX_VALUES:
1764: for (i=0; i<n; i++) {
1765: idx = *indicesx++;
1766: idy = *indicesy++;
1767: y[idy] = PetscMax(y[idy],x[idx]);
1768: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
1769: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
1770: y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
1771: y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1772: y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
1773: y[idy+6] = PetscMax(y[idy+6],x[idx+6]);
1774: y[idy+7] = PetscMax(y[idy+7],x[idx+7]);
1775: y[idy+8] = PetscMax(y[idy+8],x[idx+8]);
1776: y[idy+9] = PetscMax(y[idy+9],x[idx+9]);
1777: y[idy+10] = PetscMax(y[idy+10],x[idx+10]);
1778: }
1779: #else
1780: case MAX_VALUES:
1781: #endif
1782: case NOT_SET_VALUES:
1783: break;
1784: default:
1785: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1786: }
1787: return(0);
1788: }
1790: /* ----------------------------------------------------------------------------------------------- */
1791: PETSC_STATIC_INLINE void Pack_12(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1792: {
1793: PetscInt i,idx;
1795: for (i=0; i<n; i++) {
1796: idx = *indicesx++;
1797: y[0] = x[idx];
1798: y[1] = x[idx+1];
1799: y[2] = x[idx+2];
1800: y[3] = x[idx+3];
1801: y[4] = x[idx+4];
1802: y[5] = x[idx+5];
1803: y[6] = x[idx+6];
1804: y[7] = x[idx+7];
1805: y[8] = x[idx+8];
1806: y[9] = x[idx+9];
1807: y[10] = x[idx+10];
1808: y[11] = x[idx+11];
1809: y += 12;
1810: }
1811: }
1813: PETSC_STATIC_INLINE PetscErrorCode UnPack_12(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1814: {
1815: PetscInt i,idy;
1818: switch (addv) {
1819: case INSERT_VALUES:
1820: case INSERT_ALL_VALUES:
1821: for (i=0; i<n; i++) {
1822: idy = *indicesy++;
1823: y[idy] = x[0];
1824: y[idy+1] = x[1];
1825: y[idy+2] = x[2];
1826: y[idy+3] = x[3];
1827: y[idy+4] = x[4];
1828: y[idy+5] = x[5];
1829: y[idy+6] = x[6];
1830: y[idy+7] = x[7];
1831: y[idy+8] = x[8];
1832: y[idy+9] = x[9];
1833: y[idy+10] = x[10];
1834: y[idy+11] = x[11];
1835: x += 12;
1836: }
1837: break;
1838: case ADD_VALUES:
1839: case ADD_ALL_VALUES:
1840: for (i=0; i<n; i++) {
1841: idy = *indicesy++;
1842: y[idy] += x[0];
1843: y[idy+1] += x[1];
1844: y[idy+2] += x[2];
1845: y[idy+3] += x[3];
1846: y[idy+4] += x[4];
1847: y[idy+5] += x[5];
1848: y[idy+6] += x[6];
1849: y[idy+7] += x[7];
1850: y[idy+8] += x[8];
1851: y[idy+9] += x[9];
1852: y[idy+10] += x[10];
1853: y[idy+11] += x[11];
1854: x += 12;
1855: }
1856: break;
1857: #if !defined(PETSC_USE_COMPLEX)
1858: case MAX_VALUES:
1859: for (i=0; i<n; i++) {
1860: idy = *indicesy++;
1861: y[idy] = PetscMax(y[idy],x[0]);
1862: y[idy+1] = PetscMax(y[idy+1],x[1]);
1863: y[idy+2] = PetscMax(y[idy+2],x[2]);
1864: y[idy+3] = PetscMax(y[idy+3],x[3]);
1865: y[idy+4] = PetscMax(y[idy+4],x[4]);
1866: y[idy+5] = PetscMax(y[idy+5],x[5]);
1867: y[idy+6] = PetscMax(y[idy+6],x[6]);
1868: y[idy+7] = PetscMax(y[idy+7],x[7]);
1869: y[idy+8] = PetscMax(y[idy+8],x[8]);
1870: y[idy+9] = PetscMax(y[idy+9],x[9]);
1871: y[idy+10] = PetscMax(y[idy+10],x[10]);
1872: y[idy+11] = PetscMax(y[idy+11],x[11]);
1873: x += 12;
1874: }
1875: #else
1876: case MAX_VALUES:
1877: #endif
1878: case NOT_SET_VALUES:
1879: break;
1880: default:
1881: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1882: }
1883: return(0);
1884: }
1886: PETSC_STATIC_INLINE PetscErrorCode Scatter_12(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1887: {
1888: PetscInt i,idx,idy;
1891: switch (addv) {
1892: case INSERT_VALUES:
1893: case INSERT_ALL_VALUES:
1894: for (i=0; i<n; i++) {
1895: idx = *indicesx++;
1896: idy = *indicesy++;
1897: y[idy] = x[idx];
1898: y[idy+1] = x[idx+1];
1899: y[idy+2] = x[idx+2];
1900: y[idy+3] = x[idx+3];
1901: y[idy+4] = x[idx+4];
1902: y[idy+5] = x[idx+5];
1903: y[idy+6] = x[idx+6];
1904: y[idy+7] = x[idx+7];
1905: y[idy+8] = x[idx+8];
1906: y[idy+9] = x[idx+9];
1907: y[idy+10] = x[idx+10];
1908: y[idy+11] = x[idx+11];
1909: }
1910: break;
1911: case ADD_VALUES:
1912: case ADD_ALL_VALUES:
1913: for (i=0; i<n; i++) {
1914: idx = *indicesx++;
1915: idy = *indicesy++;
1916: y[idy] += x[idx];
1917: y[idy+1] += x[idx+1];
1918: y[idy+2] += x[idx+2];
1919: y[idy+3] += x[idx+3];
1920: y[idy+4] += x[idx+4];
1921: y[idy+5] += x[idx+5];
1922: y[idy+6] += x[idx+6];
1923: y[idy+7] += x[idx+7];
1924: y[idy+8] += x[idx+8];
1925: y[idy+9] += x[idx+9];
1926: y[idy+10] += x[idx+10];
1927: y[idy+11] += x[idx+11];
1928: }
1929: break;
1930: #if !defined(PETSC_USE_COMPLEX)
1931: case MAX_VALUES:
1932: for (i=0; i<n; i++) {
1933: idx = *indicesx++;
1934: idy = *indicesy++;
1935: y[idy] = PetscMax(y[idy],x[idx]);
1936: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
1937: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
1938: y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
1939: y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1940: y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
1941: y[idy+6] = PetscMax(y[idy+6],x[idx+6]);
1942: y[idy+7] = PetscMax(y[idy+7],x[idx+7]);
1943: y[idy+8] = PetscMax(y[idy+8],x[idx+8]);
1944: y[idy+9] = PetscMax(y[idy+9],x[idx+9]);
1945: y[idy+10] = PetscMax(y[idy+10],x[idx+10]);
1946: y[idy+11] = PetscMax(y[idy+11],x[idx+11]);
1947: }
1948: #else
1949: case MAX_VALUES:
1950: #endif
1951: case NOT_SET_VALUES:
1952: break;
1953: default:
1954: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1955: }
1956: return(0);
1957: }
1959: /* ----------------------------------------------------------------------------------------------- */
1960: PETSC_STATIC_INLINE void Pack_bs(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1961: {
1962: PetscInt i,idx;
1965: for (i=0; i<n; i++) {
1966: idx = *indicesx++;
1967: PetscArraycpy(y,x + idx,bs);CHKERRV(ierr);
1968: y += bs;
1969: }
1970: }
1972: PETSC_STATIC_INLINE PetscErrorCode UnPack_bs(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1973: {
1974: PetscInt i,idy,j;
1978: switch (addv) {
1979: case INSERT_VALUES:
1980: case INSERT_ALL_VALUES:
1981: for (i=0; i<n; i++) {
1982: idy = *indicesy++;
1983: PetscArraycpy(y + idy,x,bs);
1984: x += bs;
1985: }
1986: break;
1987: case ADD_VALUES:
1988: case ADD_ALL_VALUES:
1989: for (i=0; i<n; i++) {
1990: idy = *indicesy++;
1991: for (j=0; j<bs; j++) y[idy+j] += x[j];
1992: x += bs;
1993: }
1994: break;
1995: #if !defined(PETSC_USE_COMPLEX)
1996: case MAX_VALUES:
1997: for (i=0; i<n; i++) {
1998: idy = *indicesy++;
1999: for (j=0; j<bs; j++) y[idy+j] = PetscMax(y[idy+j],x[j]);
2000: x += bs;
2001: }
2002: #else
2003: case MAX_VALUES:
2004: #endif
2005: case NOT_SET_VALUES:
2006: break;
2007: default:
2008: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
2009: }
2010: return(0);
2011: }
2013: PETSC_STATIC_INLINE PetscErrorCode Scatter_bs(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
2014: {
2015: PetscInt i,idx,idy,j;
2019: switch (addv) {
2020: case INSERT_VALUES:
2021: case INSERT_ALL_VALUES:
2022: for (i=0; i<n; i++) {
2023: idx = *indicesx++;
2024: idy = *indicesy++;
2025: PetscArraycpy(y + idy, x + idx,bs);
2026: }
2027: break;
2028: case ADD_VALUES:
2029: case ADD_ALL_VALUES:
2030: for (i=0; i<n; i++) {
2031: idx = *indicesx++;
2032: idy = *indicesy++;
2033: for (j=0; j<bs; j++ ) y[idy+j] += x[idx+j];
2034: }
2035: break;
2036: #if !defined(PETSC_USE_COMPLEX)
2037: case MAX_VALUES:
2038: for (i=0; i<n; i++) {
2039: idx = *indicesx++;
2040: idy = *indicesy++;
2041: for (j=0; j<bs; j++ ) y[idy+j] = PetscMax(y[idy+j],x[idx+j]);
2042: }
2043: #else
2044: case MAX_VALUES:
2045: #endif
2046: case NOT_SET_VALUES:
2047: break;
2048: default:
2049: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
2050: }
2051: return(0);
2052: }
2054: /* Create the VecScatterBegin/End_P for our chosen block sizes */
2055: #define BS 1
2056: #include <../src/vec/vscat/impls/mpi3/vpscat.h>
2057: #define BS 2
2058: #include <../src/vec/vscat/impls/mpi3/vpscat.h>
2059: #define BS 3
2060: #include <../src/vec/vscat/impls/mpi3/vpscat.h>
2061: #define BS 4
2062: #include <../src/vec/vscat/impls/mpi3/vpscat.h>
2063: #define BS 5
2064: #include <../src/vec/vscat/impls/mpi3/vpscat.h>
2065: #define BS 6
2066: #include <../src/vec/vscat/impls/mpi3/vpscat.h>
2067: #define BS 7
2068: #include <../src/vec/vscat/impls/mpi3/vpscat.h>
2069: #define BS 8
2070: #include <../src/vec/vscat/impls/mpi3/vpscat.h>
2071: #define BS 9
2072: #include <../src/vec/vscat/impls/mpi3/vpscat.h>
2073: #define BS 10
2074: #include <../src/vec/vscat/impls/mpi3/vpscat.h>
2075: #define BS 11
2076: #include <../src/vec/vscat/impls/mpi3/vpscat.h>
2077: #define BS 12
2078: #include <../src/vec/vscat/impls/mpi3/vpscat.h>
2079: #define BS bs
2080: #include <../src/vec/vscat/impls/mpi3/vpscat.h>
2082: /* ==========================================================================================*/
2083: /* create parallel to sequential scatter context */
2085: extern PetscErrorCode VecScatterCreateCommon_PtoS_MPI3(VecScatter_MPI_General*,VecScatter_MPI_General*,VecScatter);
2087: /*
2088: bs indicates how many elements there are in each block. Normally this would be 1.
2090: contains check that PetscMPIInt can handle the sizes needed
2091: */
2092: #include <petscbt.h>
2093: PetscErrorCode VecScatterCreateLocal_PtoS_MPI3(PetscInt nx,const PetscInt *inidx,PetscInt ny,const PetscInt *inidy,Vec xin,Vec yin,PetscInt bs,VecScatter ctx)
2094: {
2095: VecScatter_MPI_General *from,*to;
2096: PetscMPIInt size,rank,mrank,imdex,tag,n;
2097: PetscInt *source = NULL,*owners = NULL,nxr;
2098: PetscInt *lowner = NULL,*start = NULL,*lsharedowner = NULL,*sharedstart = NULL,lengthy,lengthx;
2099: PetscMPIInt *nprocs = NULL,nrecvs,nrecvshared;
2100: PetscInt i,j,idx = 0,nsends;
2101: PetscMPIInt *owner = NULL;
2102: PetscInt *starts = NULL,count,slen,slenshared;
2103: PetscInt *rvalues,*svalues,base,*values,nprocslocal,recvtotal,*rsvalues;
2104: PetscMPIInt *onodes1,*olengths1;
2105: MPI_Comm comm;
2106: MPI_Request *send_waits = NULL,*recv_waits = NULL;
2107: MPI_Status recv_status,*send_status;
2108: PetscErrorCode ierr;
2109: PetscShmComm scomm;
2110: PetscMPIInt jj;
2111: MPI_Info info;
2112: MPI_Comm mscomm;
2113: MPI_Win sharedoffsetwin; /* Window that owns sharedspaceoffset */
2114: PetscInt *sharedspaceoffset;
2115: VecScatterType type;
2116: PetscBool mpi3node;
2118: PetscObjectGetNewTag((PetscObject)ctx,&tag);
2119: PetscObjectGetComm((PetscObject)xin,&comm);
2120: MPI_Comm_rank(comm,&rank);
2121: MPI_Comm_size(comm,&size);
2123: VecScatterGetType(ctx,&type);
2124: PetscStrcmp(type,"mpi3node",&mpi3node);
2126: PetscShmCommGet(comm,&scomm);
2127: PetscShmCommGetMpiShmComm(scomm,&mscomm);
2129: MPI_Info_create(&info);
2130: MPI_Info_set(info, "alloc_shared_noncontig", "true");
2132: if (mpi3node) {
2133: /* Check if (parallel) inidx has duplicate indices.
2134: Current VecScatterEndMPI3Node() (case StoP) writes the sequential vector to the parallel vector.
2135: Writing to the same shared location without variable locking
2136: leads to incorrect scattering. See src/vec/vscat/examples/runex2_5 and runex3_5 */
2138: PetscInt *mem,**optr;
2139: MPI_Win swin;
2140: PetscMPIInt msize;
2142: MPI_Comm_size(mscomm,&msize);
2143: MPI_Comm_rank(mscomm,&mrank);
2145: PetscMalloc1(msize,&optr);
2146: MPIU_Win_allocate_shared((nx+1)*sizeof(PetscInt),sizeof(PetscInt),MPI_INFO_NULL,mscomm,&mem,&swin);
2148: /* Write local nx and inidx into mem */
2149: mem[0] = nx;
2150: for (i=1; i<=nx; i++) mem[i] = inidx[i-1];
2151: MPI_Barrier(mscomm); /* sync shared memory */
2153: if (!mrank) {
2154: PetscBT table;
2155: /* sz and dsp_unit are not used. Replacing them with NULL would cause MPI_Win_shared_query() crash */
2156: MPI_Aint sz;
2157: PetscMPIInt dsp_unit;
2158: PetscInt N = xin->map->N;
2160: PetscBTCreate(N,&table); /* may not be scalable */
2161: PetscBTMemzero(N,table);
2163: jj = 0;
2164: while (jj<msize && !ctx->is_duplicate) {
2165: if (jj == mrank) {jj++; continue;}
2166: MPIU_Win_shared_query(swin,jj,&sz,&dsp_unit,&optr[jj]);
2167: for (j=1; j<=optr[jj][0]; j++) { /* optr[jj][0] = nx */
2168: if (PetscBTLookupSet(table,optr[jj][j])) {
2169: ctx->is_duplicate = PETSC_TRUE; /* only mrank=0 has this info., will be checked in VecScatterEndMPI3Node() */
2170: break;
2171: }
2172: }
2173: jj++;
2174: }
2175: PetscBTDestroy(&table);
2176: }
2178: if (swin != MPI_WIN_NULL) {MPI_Win_free(&swin);}
2179: PetscFree(optr);
2180: }
2182: owners = xin->map->range;
2183: VecGetSize(yin,&lengthy);
2184: VecGetSize(xin,&lengthx);
2186: /* first count number of contributors to each processor */
2187: PetscMalloc2(size,&nprocs,nx,&owner);
2188: PetscArrayzero(nprocs,size);
2190: j = 0;
2191: nsends = 0;
2192: for (i=0; i<nx; i++) {
2193: PetscIntMultError(bs,inidx[i],&idx);
2194: if (idx < owners[j]) j = 0;
2195: for (; j<size; j++) {
2196: if (idx < owners[j+1]) {
2197: if (!nprocs[j]++) nsends++;
2198: owner[i] = j;
2199: break;
2200: }
2201: }
2202: if (j == size) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"ith %D block entry %D not owned by any process, upper bound %D",i,idx,owners[size]);
2203: }
2205: nprocslocal = nprocs[rank];
2206: nprocs[rank] = 0;
2207: if (nprocslocal) nsends--;
2208: /* inform other processors of number of messages and max length*/
2209: PetscGatherNumberOfMessages(comm,NULL,nprocs,&nrecvs);
2210: PetscGatherMessageLengths(comm,nsends,nrecvs,nprocs,&onodes1,&olengths1);
2211: PetscSortMPIIntWithArray(nrecvs,onodes1,olengths1);
2212: recvtotal = 0;
2213: for (i=0; i<nrecvs; i++) {
2214: PetscIntSumError(recvtotal,olengths1[i],&recvtotal);
2215: }
2217: /* post receives: */
2218: PetscMalloc3(recvtotal,&rvalues,nrecvs,&source,nrecvs,&recv_waits);
2219: count = 0;
2220: for (i=0; i<nrecvs; i++) {
2221: MPI_Irecv((rvalues+count),olengths1[i],MPIU_INT,onodes1[i],tag,comm,recv_waits+i);
2222: count += olengths1[i];
2223: }
2225: /* do sends:
2226: 1) starts[i] gives the starting index in svalues for stuff going to
2227: the ith processor
2228: */
2229: nxr = 0;
2230: for (i=0; i<nx; i++) {
2231: if (owner[i] != rank) nxr++;
2232: }
2233: PetscMalloc3(nxr,&svalues,nsends,&send_waits,size+1,&starts);
2235: starts[0] = 0;
2236: for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[i-1];
2237: for (i=0; i<nx; i++) {
2238: if (owner[i] != rank) svalues[starts[owner[i]]++] = bs*inidx[i];
2239: }
2240: starts[0] = 0;
2241: for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[i-1];
2242: count = 0;
2243: for (i=0; i<size; i++) {
2244: if (nprocs[i]) {
2245: MPI_Isend(svalues+starts[i],nprocs[i],MPIU_INT,i,tag,comm,send_waits+count++);
2246: }
2247: }
2249: /* wait on receives; this is everyone who we need to deliver data to */
2250: count = nrecvs;
2251: slen = 0;
2252: nrecvshared = 0;
2253: slenshared = 0;
2254: while (count) {
2255: MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
2256: /* unpack receives into our local space */
2257: MPI_Get_count(&recv_status,MPIU_INT,&n);
2258: PetscShmCommGlobalToLocal(scomm,onodes1[imdex],&jj);
2259: if (jj > -1) {
2260: PetscInfo3(NULL,"[%d] Sending values to shared memory partner %d,global rank %d\n",rank,jj,onodes1[imdex]);
2261: nrecvshared++;
2262: slenshared += n;
2263: }
2264: slen += n;
2265: count--;
2266: }
2268: if (slen != recvtotal) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Total message lengths %D not expected %D",slen,recvtotal);
2270: /* allocate entire send scatter context */
2271: PetscNewLog(ctx,&to);
2272: to->sharedwin = MPI_WIN_NULL;
2273: to->n = nrecvs-nrecvshared;
2275: PetscMalloc1(nrecvs-nrecvshared,&to->requests);
2276: PetscMalloc4(bs*(slen-slenshared),&to->values,slen-slenshared,&to->indices,nrecvs-nrecvshared+1,&to->starts,nrecvs-nrecvshared,&to->procs);
2277: PetscMalloc2(PetscMax(to->n,nsends),&to->sstatus,PetscMax(to->n,nsends),&to->rstatus);
2279: MPI_Comm_size(mscomm,&to->msize);
2280: PetscMalloc1(slenshared,&to->sharedspaceindices);
2281: PetscCalloc1(to->msize+1,&to->sharedspacestarts);
2283: ctx->todata = (void*)to;
2284: to->starts[0] = 0;
2285: to->sharedspacestarts[0] = 0;
2287: /* Allocate shared memory space for shared memory partner communication */
2288: MPIU_Win_allocate_shared(to->msize*sizeof(PetscInt),sizeof(PetscInt),info,mscomm,&sharedspaceoffset,&sharedoffsetwin);
2289: for (i=0; i<to->msize; i++) sharedspaceoffset[i] = -1; /* mark with -1 for later error checking */
2290: if (nrecvs) {
2291: /* move the data into the send scatter */
2292: base = owners[rank];
2293: rsvalues = rvalues;
2294: to->n = 0;
2295: for (i=0; i<nrecvs; i++) {
2296: values = rsvalues;
2297: PetscShmCommGlobalToLocal(scomm,(PetscMPIInt)onodes1[i],&jj);
2298: if (jj > -1) {
2299: to->sharedspacestarts[jj] = to->sharedcnt;
2300: to->sharedspacestarts[jj+1] = to->sharedcnt + olengths1[i];
2301: for (j=0; j<olengths1[i]; j++) to->sharedspaceindices[to->sharedcnt + j] = values[j] - base;
2302: sharedspaceoffset[jj] = to->sharedcnt;
2303: to->sharedcnt += olengths1[i];
2304: PetscInfo4(NULL,"[%d] Sending %d values to shared memory partner %d,global rank %d\n",rank,olengths1[i],jj,onodes1[i]);
2305: } else {
2306: to->starts[to->n+1] = to->starts[to->n] + olengths1[i];
2307: to->procs[to->n] = onodes1[i];
2308: for (j=0; j<olengths1[i]; j++) to->indices[to->starts[to->n] + j] = values[j] - base;
2309: to->n++;
2310: }
2311: rsvalues += olengths1[i];
2312: }
2313: }
2314: PetscFree(olengths1);
2315: PetscFree(onodes1);
2316: PetscFree3(rvalues,source,recv_waits);
2318: if (mpi3node) {
2319: /* to->sharedspace is used only as a flag */
2320: i = 0;
2321: if (to->sharedcnt) i = 1;
2322: MPIU_Win_allocate_shared(i*sizeof(PetscScalar),sizeof(PetscScalar),info,mscomm,&to->sharedspace,&to->sharedwin);
2323: } else { /* mpi3 */
2324: MPIU_Win_allocate_shared(bs*to->sharedcnt*sizeof(PetscScalar),sizeof(PetscScalar),info,mscomm,&to->sharedspace,&to->sharedwin);
2325: }
2326: if (to->sharedwin == MPI_WIN_NULL) SETERRQ(PETSC_COMM_SELF,100,"what the");
2327: MPI_Info_free(&info);
2329: /* allocate entire receive scatter context */
2330: PetscNewLog(ctx,&from);
2331: from->sharedwin = MPI_WIN_NULL;
2332: from->msize = to->msize;
2333: /* we don't actually know the correct value for from->n at this point so we use the upper bound */
2334: from->n = nsends;
2336: PetscMalloc1(nsends,&from->requests);
2337: PetscMalloc4((ny-nprocslocal)*bs,&from->values,ny-nprocslocal,&from->indices,nsends+1,&from->starts,from->n,&from->procs);
2338: ctx->fromdata = (void*)from;
2340: PetscCalloc1(to->msize+1,&from->sharedspacestarts);
2341: /* move data into receive scatter */
2342: PetscMalloc2(size,&lowner,nsends+1,&start);
2343: PetscMalloc2(size,&lsharedowner,to->msize+1,&sharedstart);
2344: count = 0; from->starts[0] = start[0] = 0;
2345: from->sharedcnt = 0;
2346: for (i=0; i<size; i++) {
2347: lsharedowner[i] = -1;
2348: lowner[i] = -1;
2349: if (nprocs[i]) {
2350: PetscShmCommGlobalToLocal(scomm,i,&jj);
2351: if (jj > -1) {
2352: from->sharedspacestarts[jj] = sharedstart[jj] = from->sharedcnt;
2353: from->sharedspacestarts[jj+1] = sharedstart[jj+1] = from->sharedspacestarts[jj] + nprocs[i];
2354: from->sharedcnt += nprocs[i];
2355: lsharedowner[i] = jj;
2356: } else {
2357: lowner[i] = count++;
2358: from->procs[count-1] = i;
2359: from->starts[count] = start[count] = start[count-1] + nprocs[i];
2360: }
2361: }
2362: }
2363: from->n = count;
2365: for (i=0; i<nx; i++) {
2366: if (owner[i] != rank && lowner[owner[i]] > -1) {
2367: from->indices[start[lowner[owner[i]]]++] = bs*inidy[i];
2368: if (bs*inidy[i] >= lengthy) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Scattering past end of TO vector");
2369: }
2370: }
2372: /* copy over appropriate parts of inidy[] into from->sharedspaceindices */
2373: if (mpi3node) {
2374: /* in addition, copy over appropriate parts of inidx[] into 2nd part of from->sharedspaceindices */
2375: PetscInt *sharedspaceindicesx;
2376: PetscMalloc1(2*from->sharedcnt,&from->sharedspaceindices);
2377: sharedspaceindicesx = from->sharedspaceindices + from->sharedcnt;
2378: for (i=0; i<nx; i++) {
2379: if (owner[i] != rank && lsharedowner[owner[i]] > -1) {
2380: if (sharedstart[lsharedowner[owner[i]]] > from->sharedcnt) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_MEM,"won");
2382: from->sharedspaceindices[sharedstart[lsharedowner[owner[i]]]] = bs*inidy[i];
2383: sharedspaceindicesx[sharedstart[lsharedowner[owner[i]]]] = bs*inidx[i]-owners[owner[i]]; /* local inidx */
2384: sharedstart[lsharedowner[owner[i]]]++;
2385: }
2386: }
2387: } else { /* mpi3 */
2388: PetscMalloc1(from->sharedcnt,&from->sharedspaceindices);
2389: for (i=0; i<nx; i++) {
2390: if (owner[i] != rank && lsharedowner[owner[i]] > -1) {
2391: if (sharedstart[lsharedowner[owner[i]]] > from->sharedcnt) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_MEM,"won");
2392: from->sharedspaceindices[sharedstart[lsharedowner[owner[i]]]++] = bs*inidy[i];
2393: }
2394: }
2395: }
2397: PetscFree2(lowner,start);
2398: PetscFree2(lsharedowner,sharedstart);
2399: PetscFree2(nprocs,owner);
2401: /* wait on sends */
2402: if (nsends) {
2403: PetscMalloc1(nsends,&send_status);
2404: MPI_Waitall(nsends,send_waits,send_status);
2405: PetscFree(send_status);
2406: }
2407: PetscFree3(svalues,send_waits,starts);
2409: if (nprocslocal) {
2410: PetscInt nt = from->local.n = to->local.n = nprocslocal;
2411: /* we have a scatter to ourselves */
2412: PetscMalloc1(nt,&to->local.vslots);
2413: PetscMalloc1(nt,&from->local.vslots);
2414: nt = 0;
2415: for (i=0; i<nx; i++) {
2416: idx = bs*inidx[i];
2417: if (idx >= owners[rank] && idx < owners[rank+1]) {
2418: to->local.vslots[nt] = idx - owners[rank];
2419: from->local.vslots[nt++] = bs*inidy[i];
2420: if (bs*inidy[i] >= lengthy) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Scattering past end of TO vector");
2421: }
2422: }
2423: PetscLogObjectMemory((PetscObject)ctx,2*nt*sizeof(PetscInt));
2424: } else {
2425: from->local.n = 0;
2426: from->local.vslots = 0;
2427: to->local.n = 0;
2428: to->local.vslots = 0;
2429: }
2431: /* Get the shared memory address for all processes we will be copying data from */
2432: PetscCalloc1(to->msize,&from->sharedspaces);
2433: PetscCalloc1(to->msize,&from->sharedspacesoffset);
2434: MPI_Comm_rank(mscomm,&mrank);
2435: for (jj=0; jj<to->msize; jj++) {
2436: MPI_Aint isize;
2437: PetscMPIInt disp_unit;
2438: PetscInt *ptr;
2439: MPIU_Win_shared_query(to->sharedwin,jj,&isize,&disp_unit,&from->sharedspaces[jj]);
2440: MPIU_Win_shared_query(sharedoffsetwin,jj,&isize,&disp_unit,&ptr);
2441: from->sharedspacesoffset[jj] = ptr[mrank];
2442: }
2443: MPI_Win_free(&sharedoffsetwin);
2445: if (mpi3node && !from->sharedspace) {
2446: /* comput from->notdone to be used by VecScatterEndMPI3Node() */
2447: PetscInt notdone = 0;
2448: for (i=0; i<from->msize; i++) {
2449: if (from->sharedspacesoffset && from->sharedspacesoffset[i] > -1) {
2450: notdone++;
2451: }
2452: }
2453: from->notdone = notdone;
2454: }
2456: from->local.nonmatching_computed = PETSC_FALSE;
2457: from->local.n_nonmatching = 0;
2458: from->local.slots_nonmatching = 0;
2459: to->local.nonmatching_computed = PETSC_FALSE;
2460: to->local.n_nonmatching = 0;
2461: to->local.slots_nonmatching = 0;
2463: from->format = VEC_SCATTER_MPI_GENERAL;
2464: to->format = VEC_SCATTER_MPI_GENERAL;
2465: from->bs = bs;
2466: to->bs = bs;
2468: VecScatterCreateCommon_PtoS_MPI3(from,to,ctx);
2469: return(0);
2470: }
2472: /*
2473: bs indicates how many elements there are in each block. Normally this would be 1.
2474: */
2475: PetscErrorCode VecScatterCreateCommon_PtoS_MPI3(VecScatter_MPI_General *from,VecScatter_MPI_General *to,VecScatter ctx)
2476: {
2477: MPI_Comm comm;
2478: PetscMPIInt tag = ((PetscObject)ctx)->tag, tagr;
2479: PetscInt bs = to->bs;
2480: PetscMPIInt size;
2481: PetscInt i, n;
2483: VecScatterType type;
2484: PetscBool mpi3node;
2487: PetscObjectGetComm((PetscObject)ctx,&comm);
2488: PetscObjectGetNewTag((PetscObject)ctx,&tagr);
2489: ctx->ops->destroy = VecScatterDestroy_PtoP_MPI3;
2491: MPI_Comm_size(comm,&size);
2492: /* check if the receives are ALL going into contiguous locations; if so can skip indexing */
2493: to->contiq = PETSC_FALSE;
2494: n = from->starts[from->n];
2495: from->contiq = PETSC_TRUE;
2496: for (i=1; i<n; i++) {
2497: if (from->indices[i] != from->indices[i-1] + bs) {
2498: from->contiq = PETSC_FALSE;
2499: break;
2500: }
2501: }
2503: ctx->ops->copy = VecScatterCopy_PtoP_AllToAll;
2504: {
2505: PetscInt *sstarts = to->starts, *rstarts = from->starts;
2506: PetscMPIInt *sprocs = to->procs, *rprocs = from->procs;
2507: MPI_Request *swaits = to->requests,*rwaits = from->requests;
2508: MPI_Request *rev_swaits,*rev_rwaits;
2509: PetscScalar *Ssvalues = to->values, *Srvalues = from->values;
2511: /* allocate additional wait variables for the "reverse" scatter */
2512: PetscMalloc1(to->n,&rev_rwaits);
2513: PetscMalloc1(from->n,&rev_swaits);
2514: to->rev_requests = rev_rwaits;
2515: from->rev_requests = rev_swaits;
2517: for (i=0; i<from->n; i++) {
2518: MPI_Send_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tagr,comm,rev_swaits+i);
2519: }
2521: for (i=0; i<to->n; i++) {
2522: MPI_Send_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
2523: }
2524: /* Register receives for scatter and reverse */
2525: for (i=0; i<from->n; i++) {
2526: MPI_Recv_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);
2527: }
2528: for (i=0; i<to->n; i++) {
2529: MPI_Recv_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tagr,comm,rev_rwaits+i);
2530: }
2531: ctx->ops->copy = VecScatterCopy_PtoP_X;
2532: }
2533: PetscInfo1(ctx,"Using blocksize %D scatter\n",bs);
2535: #if defined(PETSC_USE_DEBUG)
2536: MPIU_Allreduce(&bs,&i,1,MPIU_INT,MPI_MIN,PetscObjectComm((PetscObject)ctx));
2537: MPIU_Allreduce(&bs,&n,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)ctx));
2538: if (bs!=i || bs!=n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Blocks size %D != %D or %D",bs,i,n);
2539: #endif
2541: VecScatterGetType(ctx,&type);
2542: PetscStrcmp(type,"mpi3node",&mpi3node);
2544: if (mpi3node) {
2545: switch (bs) {
2546: case 12:
2547: ctx->ops->begin = VecScatterBeginMPI3Node_12;
2548: ctx->ops->end = VecScatterEndMPI3Node_12;
2549: break;
2550: case 11:
2551: ctx->ops->begin = VecScatterBeginMPI3Node_11;
2552: ctx->ops->end = VecScatterEndMPI3Node_11;
2553: break;
2554: case 10:
2555: ctx->ops->begin = VecScatterBeginMPI3Node_10;
2556: ctx->ops->end = VecScatterEndMPI3Node_10;
2557: break;
2558: case 9:
2559: ctx->ops->begin = VecScatterBeginMPI3Node_9;
2560: ctx->ops->end = VecScatterEndMPI3Node_9;
2561: break;
2562: case 8:
2563: ctx->ops->begin = VecScatterBeginMPI3Node_8;
2564: ctx->ops->end = VecScatterEndMPI3Node_8;
2565: break;
2566: case 7:
2567: ctx->ops->begin = VecScatterBeginMPI3Node_7;
2568: ctx->ops->end = VecScatterEndMPI3Node_7;
2569: break;
2570: case 6:
2571: ctx->ops->begin = VecScatterBeginMPI3Node_6;
2572: ctx->ops->end = VecScatterEndMPI3Node_6;
2573: break;
2574: case 5:
2575: ctx->ops->begin = VecScatterBeginMPI3Node_5;
2576: ctx->ops->end = VecScatterEndMPI3Node_5;
2577: break;
2578: case 4:
2579: ctx->ops->begin = VecScatterBeginMPI3Node_4;
2580: ctx->ops->end = VecScatterEndMPI3Node_4;
2581: break;
2582: case 3:
2583: ctx->ops->begin = VecScatterBeginMPI3Node_3;
2584: ctx->ops->end = VecScatterEndMPI3Node_3;
2585: break;
2586: case 2:
2587: ctx->ops->begin = VecScatterBeginMPI3Node_2;
2588: ctx->ops->end = VecScatterEndMPI3Node_2;
2589: break;
2590: case 1:
2591: ctx->ops->begin = VecScatterBeginMPI3Node_1;
2592: ctx->ops->end = VecScatterEndMPI3Node_1;
2593: break;
2594: default:
2595: ctx->ops->begin = VecScatterBegin_bs;
2596: ctx->ops->end = VecScatterEnd_bs;
2597: }
2598: } else { /* !mpi3node */
2600: switch (bs) {
2601: case 12:
2602: ctx->ops->begin = VecScatterBegin_12;
2603: ctx->ops->end = VecScatterEnd_12;
2604: break;
2605: case 11:
2606: ctx->ops->begin = VecScatterBegin_11;
2607: ctx->ops->end = VecScatterEnd_11;
2608: break;
2609: case 10:
2610: ctx->ops->begin = VecScatterBegin_10;
2611: ctx->ops->end = VecScatterEnd_10;
2612: break;
2613: case 9:
2614: ctx->ops->begin = VecScatterBegin_9;
2615: ctx->ops->end = VecScatterEnd_9;
2616: break;
2617: case 8:
2618: ctx->ops->begin = VecScatterBegin_8;
2619: ctx->ops->end = VecScatterEnd_8;
2620: break;
2621: case 7:
2622: ctx->ops->begin = VecScatterBegin_7;
2623: ctx->ops->end = VecScatterEnd_7;
2624: break;
2625: case 6:
2626: ctx->ops->begin = VecScatterBegin_6;
2627: ctx->ops->end = VecScatterEnd_6;
2628: break;
2629: case 5:
2630: ctx->ops->begin = VecScatterBegin_5;
2631: ctx->ops->end = VecScatterEnd_5;
2632: break;
2633: case 4:
2634: ctx->ops->begin = VecScatterBegin_4;
2635: ctx->ops->end = VecScatterEnd_4;
2636: break;
2637: case 3:
2638: ctx->ops->begin = VecScatterBegin_3;
2639: ctx->ops->end = VecScatterEnd_3;
2640: break;
2641: case 2:
2642: ctx->ops->begin = VecScatterBegin_2;
2643: ctx->ops->end = VecScatterEnd_2;
2644: break;
2645: case 1:
2646: if (mpi3node) {
2647: ctx->ops->begin = VecScatterBeginMPI3Node_1;
2648: ctx->ops->end = VecScatterEndMPI3Node_1;
2649: } else {
2650: ctx->ops->begin = VecScatterBegin_1;
2651: ctx->ops->end = VecScatterEnd_1;
2652: }
2653: break;
2654: default:
2655: ctx->ops->begin = VecScatterBegin_bs;
2656: ctx->ops->end = VecScatterEnd_bs;
2657: }
2658: }
2660: ctx->ops->view = VecScatterView_MPI;
2662: /* try to optimize PtoP vecscatter with memcpy's */
2663: VecScatterMemcpyPlanCreate_PtoP(to,from);
2664: return(0);
2665: }
2667: /* ------------------------------------------------------------------------------------*/
2668: /*
2669: Scatter from local Seq vectors to a parallel vector.
2670: Reverses the order of the arguments, calls VecScatterCreateLocal_PtoS() then
2671: reverses the result.
2672: */
2673: PetscErrorCode VecScatterCreateLocal_StoP_MPI3(PetscInt nx,const PetscInt *inidx,PetscInt ny,const PetscInt *inidy,Vec xin,Vec yin,PetscInt bs,VecScatter ctx)
2674: {
2675: PetscErrorCode ierr;
2676: MPI_Request *waits;
2677: VecScatter_MPI_General *to,*from;
2680: VecScatterCreateLocal_PtoS_MPI3(ny,inidy,nx,inidx,yin,xin,bs,ctx);
2681: to = (VecScatter_MPI_General*)ctx->fromdata;
2682: from = (VecScatter_MPI_General*)ctx->todata;
2683: ctx->todata = (void*)to;
2684: ctx->fromdata = (void*)from;
2685: /* these two are special, they are ALWAYS stored in to struct */
2686: to->sstatus = from->sstatus;
2687: to->rstatus = from->rstatus;
2689: from->sstatus = 0;
2690: from->rstatus = 0;
2691: waits = from->rev_requests;
2692: from->rev_requests = from->requests;
2693: from->requests = waits;
2694: waits = to->rev_requests;
2695: to->rev_requests = to->requests;
2696: to->requests = waits;
2697: return(0);
2698: }
2700: /* ---------------------------------------------------------------------------------*/
2701: PetscErrorCode VecScatterCreateLocal_PtoP_MPI3(PetscInt nx,const PetscInt *inidx,PetscInt ny,const PetscInt *inidy,Vec xin,Vec yin,PetscInt bs,VecScatter ctx)
2702: {
2704: PetscMPIInt size,rank,tag,imdex,n;
2705: PetscInt *owners = xin->map->range;
2706: PetscMPIInt *nprocs = NULL;
2707: PetscInt i,j,idx,nsends,*local_inidx = NULL,*local_inidy = NULL;
2708: PetscMPIInt *owner = NULL;
2709: PetscInt *starts = NULL,count,slen;
2710: PetscInt *rvalues = NULL,*svalues = NULL,base,*values = NULL,*rsvalues,recvtotal,lastidx;
2711: PetscMPIInt *onodes1,*olengths1,nrecvs;
2712: MPI_Comm comm;
2713: MPI_Request *send_waits = NULL,*recv_waits = NULL;
2714: MPI_Status recv_status,*send_status = NULL;
2715: PetscBool duplicate = PETSC_FALSE;
2716: #if defined(PETSC_USE_DEBUG)
2717: PetscBool found = PETSC_FALSE;
2718: #endif
2721: PetscObjectGetNewTag((PetscObject)ctx,&tag);
2722: PetscObjectGetComm((PetscObject)xin,&comm);
2723: MPI_Comm_size(comm,&size);
2724: MPI_Comm_rank(comm,&rank);
2725: if (size == 1) {
2726: VecScatterCreateLocal_StoP_MPI3(nx,inidx,ny,inidy,xin,yin,bs,ctx);
2727: return(0);
2728: }
2730: /*
2731: Each processor ships off its inidx[j] and inidy[j] to the appropriate processor
2732: They then call the StoPScatterCreate()
2733: */
2734: /* first count number of contributors to each processor */
2735: PetscMalloc3(size,&nprocs,nx,&owner,(size+1),&starts);
2736: PetscArrayzero(nprocs,size);
2738: lastidx = -1;
2739: j = 0;
2740: for (i=0; i<nx; i++) {
2741: /* if indices are NOT locally sorted, need to start search at the beginning */
2742: if (lastidx > (idx = bs*inidx[i])) j = 0;
2743: lastidx = idx;
2744: for (; j<size; j++) {
2745: if (idx >= owners[j] && idx < owners[j+1]) {
2746: nprocs[j]++;
2747: owner[i] = j;
2748: #if defined(PETSC_USE_DEBUG)
2749: found = PETSC_TRUE;
2750: #endif
2751: break;
2752: }
2753: }
2754: #if defined(PETSC_USE_DEBUG)
2755: if (!found) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index %D out of range",idx);
2756: found = PETSC_FALSE;
2757: #endif
2758: }
2759: nsends = 0;
2760: for (i=0; i<size; i++) nsends += (nprocs[i] > 0);
2762: /* inform other processors of number of messages and max length*/
2763: PetscGatherNumberOfMessages(comm,NULL,nprocs,&nrecvs);
2764: PetscGatherMessageLengths(comm,nsends,nrecvs,nprocs,&onodes1,&olengths1);
2765: PetscSortMPIIntWithArray(nrecvs,onodes1,olengths1);
2766: recvtotal = 0;
2767: for (i=0; i<nrecvs; i++) {
2768: PetscIntSumError(recvtotal,olengths1[i],&recvtotal);
2769: }
2771: /* post receives: */
2772: PetscMalloc5(2*recvtotal,&rvalues,2*nx,&svalues,nrecvs,&recv_waits,nsends,&send_waits,nsends,&send_status);
2774: count = 0;
2775: for (i=0; i<nrecvs; i++) {
2776: MPI_Irecv((rvalues+2*count),2*olengths1[i],MPIU_INT,onodes1[i],tag,comm,recv_waits+i);
2777: PetscIntSumError(count,olengths1[i],&count);
2778: }
2779: PetscFree(onodes1);
2781: /* do sends:
2782: 1) starts[i] gives the starting index in svalues for stuff going to
2783: the ith processor
2784: */
2785: starts[0]= 0;
2786: for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[i-1];
2787: for (i=0; i<nx; i++) {
2788: svalues[2*starts[owner[i]]] = bs*inidx[i];
2789: svalues[1 + 2*starts[owner[i]]++] = bs*inidy[i];
2790: }
2792: starts[0] = 0;
2793: for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[i-1];
2794: count = 0;
2795: for (i=0; i<size; i++) {
2796: if (nprocs[i]) {
2797: MPI_Isend(svalues+2*starts[i],2*nprocs[i],MPIU_INT,i,tag,comm,send_waits+count);
2798: count++;
2799: }
2800: }
2801: PetscFree3(nprocs,owner,starts);
2803: /* wait on receives */
2804: count = nrecvs;
2805: slen = 0;
2806: while (count) {
2807: MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
2808: /* unpack receives into our local space */
2809: MPI_Get_count(&recv_status,MPIU_INT,&n);
2810: slen += n/2;
2811: count--;
2812: }
2813: if (slen != recvtotal) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Total message lengths %D not as expected %D",slen,recvtotal);
2815: PetscMalloc2(slen,&local_inidx,slen,&local_inidy);
2816: base = owners[rank];
2817: count = 0;
2818: rsvalues = rvalues;
2819: for (i=0; i<nrecvs; i++) {
2820: values = rsvalues;
2821: rsvalues += 2*olengths1[i];
2822: for (j=0; j<olengths1[i]; j++) {
2823: local_inidx[count] = values[2*j] - base;
2824: local_inidy[count++] = values[2*j+1];
2825: }
2826: }
2827: PetscFree(olengths1);
2829: /* wait on sends */
2830: if (nsends) {MPI_Waitall(nsends,send_waits,send_status);}
2831: PetscFree5(rvalues,svalues,recv_waits,send_waits,send_status);
2833: /*
2834: should sort and remove duplicates from local_inidx,local_inidy
2835: */
2836: #if defined(do_it_slow)
2837: /* sort on the from index */
2838: PetscSortIntWithArray(slen,local_inidx,local_inidy);
2839: start = 0;
2840: while (start < slen) {
2841: count = start+1;
2842: last = local_inidx[start];
2843: while (count < slen && last == local_inidx[count]) count++;
2844: if (count > start + 1) { /* found 2 or more same local_inidx[] in a row */
2845: /* sort on to index */
2846: PetscSortInt(count-start,local_inidy+start);
2847: }
2848: /* remove duplicates; not most efficient way, but probably good enough */
2849: i = start;
2850: while (i < count-1) {
2851: if (local_inidy[i] != local_inidy[i+1]) i++;
2852: else { /* found a duplicate */
2853: duplicate = PETSC_TRUE;
2854: for (j=i; j<slen-1; j++) {
2855: local_inidx[j] = local_inidx[j+1];
2856: local_inidy[j] = local_inidy[j+1];
2857: }
2858: slen--;
2859: count--;
2860: }
2861: }
2862: start = count;
2863: }
2864: #endif
2865: if (duplicate) {
2866: PetscInfo(ctx,"Duplicate from to indices passed in VecScatterCreate(), they are ignored\n");
2867: }
2868: VecScatterCreateLocal_StoP_MPI3(slen,local_inidx,slen,local_inidy,xin,yin,bs,ctx);
2869: PetscFree2(local_inidx,local_inidy);
2870: return(0);
2871: }
2873: PetscErrorCode VecScatterSetUp_MPI3(VecScatter ctx)
2874: {
2878: VecScatterSetUp_vectype_private(ctx,VecScatterCreateLocal_PtoS_MPI3,VecScatterCreateLocal_StoP_MPI3,VecScatterCreateLocal_PtoP_MPI3);
2879: return(0);
2880: }
2882: PetscErrorCode VecScatterCreate_MPI3(VecScatter ctx)
2883: {
2884: PetscErrorCode ierr;
2887: ctx->ops->setup = VecScatterSetUp_MPI3;
2888: PetscObjectChangeTypeName((PetscObject)ctx,VECSCATTERMPI3);
2889: PetscInfo(ctx,"Using MPI3 for vector scatter\n");
2890: return(0);
2891: }
2893: PetscErrorCode VecScatterCreate_MPI3Node(VecScatter ctx)
2894: {
2895: PetscErrorCode ierr;
2898: ctx->ops->setup = VecScatterSetUp_MPI3;
2899: PetscObjectChangeTypeName((PetscObject)ctx,VECSCATTERMPI3NODE);
2900: PetscInfo(ctx,"Using MPI3NODE for vector scatter\n");
2901: return(0);
2902: }