Actual source code: vpscat.c
petsc-3.11.4 2019-09-28
1: /*
2: Defines parallel vector scatters.
3: */
5: #include <petsc/private/vecscatterimpl.h>
8: PetscErrorCode VecScatterView_MPI(VecScatter ctx,PetscViewer viewer)
9: {
10: VecScatter_MPI_General *to =(VecScatter_MPI_General*)ctx->todata;
11: VecScatter_MPI_General *from=(VecScatter_MPI_General*)ctx->fromdata;
12: PetscErrorCode ierr;
13: PetscInt i;
14: PetscMPIInt rank;
15: PetscViewerFormat format;
16: PetscBool iascii;
19: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
20: if (iascii) {
21: MPI_Comm_rank(PetscObjectComm((PetscObject)ctx),&rank);
22: PetscViewerGetFormat(viewer,&format);
23: if (format == PETSC_VIEWER_ASCII_INFO) {
24: PetscInt nsend_max,nrecv_max,lensend_max,lenrecv_max,alldata,itmp;
26: MPI_Reduce(&to->n,&nsend_max,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)ctx));
27: MPI_Reduce(&from->n,&nrecv_max,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)ctx));
28: itmp = to->starts[to->n+1];
29: MPI_Reduce(&itmp,&lensend_max,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)ctx));
30: itmp = from->starts[from->n+1];
31: MPI_Reduce(&itmp,&lenrecv_max,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)ctx));
32: MPI_Reduce(&itmp,&alldata,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)ctx));
34: PetscViewerASCIIPrintf(viewer,"VecScatter statistics\n");
35: PetscViewerASCIIPrintf(viewer," Blocksize %D\n",to->bs);
36: PetscViewerASCIIPrintf(viewer," Maximum number sends %D\n",nsend_max);
37: PetscViewerASCIIPrintf(viewer," Maximum number receives %D\n",nrecv_max);
38: PetscViewerASCIIPrintf(viewer," Maximum data sent %D\n",(int)(lensend_max*to->bs*sizeof(PetscScalar)));
39: PetscViewerASCIIPrintf(viewer," Maximum data received %D\n",(int)(lenrecv_max*to->bs*sizeof(PetscScalar)));
40: PetscViewerASCIIPrintf(viewer," Total data sent %D\n",(int)(alldata*to->bs*sizeof(PetscScalar)));
41: } else {
42: PetscViewerASCIIPrintf(viewer," VecScatter Blocksize %D\n",to->bs);
43: PetscViewerASCIIPushSynchronized(viewer);
44: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number sends = %D; Number to self = %D\n",rank,to->n,to->local.n);
45: if (to->n) {
46: for (i=0; i<to->n; i++) {
47: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D length = %D to whom %d\n",rank,i,to->starts[i+1]-to->starts[i],to->procs[i]);
48: }
49: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Now the indices for all remote sends (in order by process sent to)\n",rank);
50: for (i=0; i<to->starts[to->n]; i++) {
51: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D \n",rank,to->indices[i]);
52: }
53: }
55: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number receives = %D; Number from self = %D\n",rank,from->n,from->local.n);
56: if (from->n) {
57: for (i=0; i<from->n; i++) {
58: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D length %D from whom %d\n",rank,i,from->starts[i+1]-from->starts[i],from->procs[i]);
59: }
61: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Now the indices for all remote receives (in order by process received from)\n",rank);
62: for (i=0; i<from->starts[from->n]; i++) {
63: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D \n",rank,from->indices[i]);
64: }
65: }
66: if (to->local.n) {
67: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Indices for local part of scatter\n",rank);
68: for (i=0; i<to->local.n; i++) { /* the to and from have the opposite meaning from what you would expect */
69: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] From %D to %D \n",rank,to->local.vslots[i],from->local.vslots[i]);
70: }
71: }
73: for (i=0; i<to->msize; i++) {
74: if (to->sharedspacestarts[i+1] > to->sharedspacestarts[i]) {
75: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Via shared memory to local memory partner %d count %d\n",rank,i,to->sharedspacestarts[i+1]-to->sharedspacestarts[i]);
76: }
77: }
78: for (i=0; i<from->msize; i++) {
79: if (from->sharedspacestarts[i+1] > from->sharedspacestarts[i]) {
80: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Via shared memory from local memory partner %d count %d\n",rank,i,from->sharedspacestarts[i+1]-from->sharedspacestarts[i]);
81: }
82: }
84: PetscViewerFlush(viewer);
85: PetscViewerASCIIPopSynchronized(viewer);
87: PetscViewerASCIIPrintf(viewer,"Method used to implement the VecScatter: ");
88: }
89: }
90: return(0);
91: }
93: /* -----------------------------------------------------------------------------------*/
94: /*
95: The next routine determines what part of the local part of the scatter is an
96: exact copy of values into their current location. We check this here and
97: then know that we need not perform that portion of the scatter when the vector is
98: scattering to itself with INSERT_VALUES.
100: This is currently not used but would speed up, for example DMLocalToLocalBegin/End()
102: */
103: PetscErrorCode VecScatterLocalOptimize_Private(VecScatter scatter,VecScatter_Seq_General *to,VecScatter_Seq_General *from)
104: {
105: PetscInt n = to->n,n_nonmatching = 0,i,*to_slots = to->vslots,*from_slots = from->vslots;
107: PetscInt *nto_slots,*nfrom_slots,j = 0;
110: for (i=0; i<n; i++) {
111: if (to_slots[i] != from_slots[i]) n_nonmatching++;
112: }
114: if (!n_nonmatching) {
115: to->nonmatching_computed = PETSC_TRUE;
116: to->n_nonmatching = from->n_nonmatching = 0;
117: PetscInfo1(scatter,"Reduced %D to 0\n", n);
118: } else if (n_nonmatching == n) {
119: to->nonmatching_computed = PETSC_FALSE;
120: PetscInfo(scatter,"All values non-matching\n");
121: } else {
122: to->nonmatching_computed= PETSC_TRUE;
123: to->n_nonmatching = from->n_nonmatching = n_nonmatching;
125: PetscMalloc1(n_nonmatching,&nto_slots);
126: PetscMalloc1(n_nonmatching,&nfrom_slots);
128: to->slots_nonmatching = nto_slots;
129: from->slots_nonmatching = nfrom_slots;
130: for (i=0; i<n; i++) {
131: if (to_slots[i] != from_slots[i]) {
132: nto_slots[j] = to_slots[i];
133: nfrom_slots[j] = from_slots[i];
134: j++;
135: }
136: }
137: PetscInfo2(scatter,"Reduced %D to %D\n",n,n_nonmatching);
138: }
139: return(0);
140: }
142: /* --------------------------------------------------------------------------------------*/
144: /* -------------------------------------------------------------------------------------*/
145: PetscErrorCode VecScatterDestroy_PtoP_MPI3(VecScatter ctx)
146: {
147: VecScatter_MPI_General *to = (VecScatter_MPI_General*)ctx->todata;
148: VecScatter_MPI_General *from = (VecScatter_MPI_General*)ctx->fromdata;
149: PetscErrorCode ierr;
150: PetscInt i;
153: /* release MPI resources obtained with MPI_Send_init() and MPI_Recv_init() */
154: if (to->requests) {
155: for (i=0; i<to->n; i++) {
156: MPI_Request_free(to->requests + i);
157: }
158: }
159: if (to->rev_requests) {
160: for (i=0; i<to->n; i++) {
161: MPI_Request_free(to->rev_requests + i);
162: }
163: }
164: if (from->requests) {
165: for (i=0; i<from->n; i++) {
166: MPI_Request_free(from->requests + i);
167: }
168: }
169: if (from->rev_requests) {
170: for (i=0; i<from->n; i++) {
171: MPI_Request_free(from->rev_requests + i);
172: }
173: }
174: if (to->sharedwin != MPI_WIN_NULL) {MPI_Win_free(&to->sharedwin);}
175: if (from->sharedwin != MPI_WIN_NULL) {MPI_Win_free(&from->sharedwin);}
176: PetscFree(to->sharedspaces);
177: PetscFree(to->sharedspacesoffset);
178: PetscFree(to->sharedspaceindices);
179: PetscFree(to->sharedspacestarts);
181: PetscFree(from->sharedspaceindices);
182: PetscFree(from->sharedspaces);
183: PetscFree(from->sharedspacesoffset);
184: PetscFree(from->sharedspacestarts);
186: PetscFree(to->local.vslots);
187: PetscFree(from->local.vslots);
188: PetscFree(to->local.slots_nonmatching);
189: PetscFree(from->local.slots_nonmatching);
190: PetscFree(to->rev_requests);
191: PetscFree(from->rev_requests);
192: PetscFree(to->requests);
193: PetscFree(from->requests);
194: PetscFree4(to->values,to->indices,to->starts,to->procs);
195: PetscFree2(to->sstatus,to->rstatus);
196: PetscFree4(from->values,from->indices,from->starts,from->procs);
197: VecScatterMemcpyPlanDestroy_PtoP(to,from);
198: PetscFree(from);
199: PetscFree(to);
200: return(0);
201: }
203: /* --------------------------------------------------------------------------------------*/
205: PetscErrorCode VecScatterCopy_PtoP_X(VecScatter in,VecScatter out)
206: {
207: VecScatter_MPI_General *in_to = (VecScatter_MPI_General*)in->todata;
208: VecScatter_MPI_General *in_from = (VecScatter_MPI_General*)in->fromdata,*out_to,*out_from;
209: PetscErrorCode ierr;
210: PetscInt ny,bs = in_from->bs,jj;
211: PetscShmComm scomm;
212: MPI_Comm mscomm;
213: MPI_Info info;
216: out->ops->begin = in->ops->begin;
217: out->ops->end = in->ops->end;
218: out->ops->copy = in->ops->copy;
219: out->ops->destroy = in->ops->destroy;
220: out->ops->view = in->ops->view;
222: /* allocate entire send scatter context */
223: PetscNewLog(out,&out_to);
224: out_to->sharedwin = MPI_WIN_NULL;
225: PetscNewLog(out,&out_from);
226: out_from->sharedwin = MPI_WIN_NULL;
228: ny = in_to->starts[in_to->n];
229: out_to->n = in_to->n;
230: out_to->format = in_to->format;
232: PetscMalloc1(out_to->n,&out_to->requests);
233: PetscMalloc4(bs*ny,&out_to->values,ny,&out_to->indices,out_to->n+1,&out_to->starts,out_to->n,&out_to->procs);
234: PetscMalloc2(PetscMax(in_to->n,in_from->n),&out_to->sstatus,PetscMax(in_to->n,in_from->n),&out_to->rstatus);
235: PetscMemcpy(out_to->indices,in_to->indices,ny*sizeof(PetscInt));
236: PetscMemcpy(out_to->starts,in_to->starts,(out_to->n+1)*sizeof(PetscInt));
237: PetscMemcpy(out_to->procs,in_to->procs,(out_to->n)*sizeof(PetscMPIInt));
239: out->todata = (void*)out_to;
240: out_to->local.n = in_to->local.n;
241: out_to->local.nonmatching_computed = PETSC_FALSE;
242: out_to->local.n_nonmatching = 0;
243: out_to->local.slots_nonmatching = 0;
244: if (in_to->local.n) {
245: PetscMalloc1(in_to->local.n,&out_to->local.vslots);
246: PetscMalloc1(in_from->local.n,&out_from->local.vslots);
247: PetscMemcpy(out_to->local.vslots,in_to->local.vslots,in_to->local.n*sizeof(PetscInt));
248: PetscMemcpy(out_from->local.vslots,in_from->local.vslots,in_from->local.n*sizeof(PetscInt));
249: } else {
250: out_to->local.vslots = 0;
251: out_from->local.vslots = 0;
252: }
254: /* allocate entire receive context */
255: VecScatterMemcpyPlanCopy(&in_to->local.memcpy_plan,&out_to->local.memcpy_plan);
256: VecScatterMemcpyPlanCopy(&in_from->local.memcpy_plan,&out_from->local.memcpy_plan);
257: VecScatterMemcpyPlanCopy(&in_to->memcpy_plan,&out_to->memcpy_plan);
258: VecScatterMemcpyPlanCopy(&in_from->memcpy_plan,&out_from->memcpy_plan);
260: out_from->format = in_from->format;
261: ny = in_from->starts[in_from->n];
262: out_from->n = in_from->n;
264: PetscMalloc1(out_from->n,&out_from->requests);
265: PetscMalloc4(ny*bs,&out_from->values,ny,&out_from->indices,out_from->n+1,&out_from->starts,out_from->n,&out_from->procs);
266: PetscMemcpy(out_from->indices,in_from->indices,ny*sizeof(PetscInt));
267: PetscMemcpy(out_from->starts,in_from->starts,(out_from->n+1)*sizeof(PetscInt));
268: PetscMemcpy(out_from->procs,in_from->procs,(out_from->n)*sizeof(PetscMPIInt));
270: out->fromdata = (void*)out_from;
271: out_from->local.n = in_from->local.n;
272: out_from->local.nonmatching_computed = PETSC_FALSE;
273: out_from->local.n_nonmatching = 0;
274: out_from->local.slots_nonmatching = 0;
276: /*
277: set up the request arrays for use with isend_init() and irecv_init()
278: */
279: {
280: PetscMPIInt tag;
281: MPI_Comm comm;
282: PetscInt *sstarts = out_to->starts, *rstarts = out_from->starts;
283: PetscMPIInt *sprocs = out_to->procs, *rprocs = out_from->procs;
284: PetscInt i;
285: MPI_Request *swaits = out_to->requests,*rwaits = out_from->requests;
286: MPI_Request *rev_swaits,*rev_rwaits;
287: PetscScalar *Ssvalues = out_to->values, *Srvalues = out_from->values;
289: PetscMalloc1(in_to->n,&out_to->rev_requests);
290: PetscMalloc1(in_from->n,&out_from->rev_requests);
292: rev_rwaits = out_to->rev_requests;
293: rev_swaits = out_from->rev_requests;
295: out_from->bs = out_to->bs = bs;
296: tag = ((PetscObject)out)->tag;
297: PetscObjectGetComm((PetscObject)out,&comm);
299: /* Register the receives that you will use later (sends for scatter reverse) */
300: for (i=0; i<out_from->n; i++) {
301: MPI_Recv_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);
302: MPI_Send_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tag,comm,rev_swaits+i);
303: }
305: for (i=0; i<out_to->n; i++) {
306: MPI_Send_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
307: }
309: /* Register receives for scatter reverse */
310: for (i=0; i<out_to->n; i++) {
311: MPI_Recv_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,rev_rwaits+i);
312: }
313: }
315: /* since the to and from data structures are not symmetric for shared memory copies we insure they always listed in "standard" form */
316: if (!in_to->sharedwin) {
317: VecScatter_MPI_General *totmp = in_to,*out_totmp = out_to;
318: in_to = in_from;
319: in_from = totmp;
320: out_to = out_from;
321: out_from = out_totmp;
322: }
324: /* copy the to parts for the shared memory copies between processes */
325: out_to->sharedcnt = in_to->sharedcnt;
326: out_to->msize = in_to->msize;
327: PetscMalloc1(out_to->msize+1,&out_to->sharedspacestarts);
328: PetscMemcpy(out_to->sharedspacestarts,in_to->sharedspacestarts,(out_to->msize+1)*sizeof(PetscInt));
329: PetscMalloc1(out_to->sharedcnt,&out_to->sharedspaceindices);
330: PetscMemcpy(out_to->sharedspaceindices,in_to->sharedspaceindices,out_to->sharedcnt*sizeof(PetscInt));
332: PetscShmCommGet(PetscObjectComm((PetscObject)in),&scomm);
333: PetscShmCommGetMpiShmComm(scomm,&mscomm);
334: MPI_Info_create(&info);
335: MPI_Info_set(info, "alloc_shared_noncontig", "true");
336: MPIU_Win_allocate_shared(bs*out_to->sharedcnt*sizeof(PetscScalar),sizeof(PetscScalar),info,mscomm,&out_to->sharedspace,&out_to->sharedwin);
337: MPI_Info_free(&info);
339: /* copy the to parts for the shared memory copies between processes */
340: out_from->sharedcnt = in_from->sharedcnt;
341: out_from->msize = in_from->msize;
342: PetscMalloc1(out_from->msize+1,&out_from->sharedspacestarts);
343: PetscMemcpy(out_from->sharedspacestarts,in_from->sharedspacestarts,(out_from->msize+1)*sizeof(PetscInt));
344: PetscMalloc1(out_from->sharedcnt,&out_from->sharedspaceindices);
345: PetscMemcpy(out_from->sharedspaceindices,in_from->sharedspaceindices,out_from->sharedcnt*sizeof(PetscInt));
346: PetscMalloc1(out_from->msize,&out_from->sharedspacesoffset);
347: PetscMemcpy(out_from->sharedspacesoffset,in_from->sharedspacesoffset,out_from->msize*sizeof(PetscInt));
348: PetscCalloc1(out_from->msize,&out_from->sharedspaces);
349: for (jj=0; jj<out_from->msize; jj++) {
350: MPI_Aint isize;
351: PetscMPIInt disp_unit;
352: MPIU_Win_shared_query(out_to->sharedwin,jj,&isize,&disp_unit,&out_from->sharedspaces[jj]);
353: }
354: return(0);
355: }
357: PetscErrorCode VecScatterCopy_PtoP_AllToAll(VecScatter in,VecScatter out)
358: {
359: VecScatter_MPI_General *in_to = (VecScatter_MPI_General*)in->todata;
360: VecScatter_MPI_General *in_from = (VecScatter_MPI_General*)in->fromdata,*out_to,*out_from;
361: PetscErrorCode ierr;
362: PetscInt ny,bs = in_from->bs;
363: PetscMPIInt size;
366: MPI_Comm_size(PetscObjectComm((PetscObject)in),&size);
368: out->ops->begin = in->ops->begin;
369: out->ops->end = in->ops->end;
370: out->ops->copy = in->ops->copy;
371: out->ops->destroy = in->ops->destroy;
372: out->ops->view = in->ops->view;
374: /* allocate entire send scatter context */
375: PetscNewLog(out,&out_to);
376: out_to->sharedwin = MPI_WIN_NULL;
377: PetscNewLog(out,&out_from);
378: out_from->sharedwin = MPI_WIN_NULL;
380: ny = in_to->starts[in_to->n];
381: out_to->n = in_to->n;
382: out_to->format = in_to->format;
384: PetscMalloc1(out_to->n,&out_to->requests);
385: PetscMalloc4(bs*ny,&out_to->values,ny,&out_to->indices,out_to->n+1,&out_to->starts,out_to->n,&out_to->procs);
386: PetscMalloc2(PetscMax(in_to->n,in_from->n),&out_to->sstatus,PetscMax(in_to->n,in_from->n),&out_to->rstatus);
387: PetscMemcpy(out_to->indices,in_to->indices,ny*sizeof(PetscInt));
388: PetscMemcpy(out_to->starts,in_to->starts,(out_to->n+1)*sizeof(PetscInt));
389: PetscMemcpy(out_to->procs,in_to->procs,(out_to->n)*sizeof(PetscMPIInt));
391: out->todata = (void*)out_to;
392: out_to->local.n = in_to->local.n;
393: out_to->local.nonmatching_computed = PETSC_FALSE;
394: out_to->local.n_nonmatching = 0;
395: out_to->local.slots_nonmatching = 0;
396: if (in_to->local.n) {
397: PetscMalloc1(in_to->local.n,&out_to->local.vslots);
398: PetscMalloc1(in_from->local.n,&out_from->local.vslots);
399: PetscMemcpy(out_to->local.vslots,in_to->local.vslots,in_to->local.n*sizeof(PetscInt));
400: PetscMemcpy(out_from->local.vslots,in_from->local.vslots,in_from->local.n*sizeof(PetscInt));
401: } else {
402: out_to->local.vslots = 0;
403: out_from->local.vslots = 0;
404: }
406: /* allocate entire receive context */
407: VecScatterMemcpyPlanCopy_PtoP(in_to,in_from,out_to,out_from);
409: out_from->format = in_from->format;
410: ny = in_from->starts[in_from->n];
411: out_from->n = in_from->n;
413: PetscMalloc1(out_from->n,&out_from->requests);
414: PetscMalloc4(ny*bs,&out_from->values,ny,&out_from->indices,out_from->n+1,&out_from->starts,out_from->n,&out_from->procs);
415: PetscMemcpy(out_from->indices,in_from->indices,ny*sizeof(PetscInt));
416: PetscMemcpy(out_from->starts,in_from->starts,(out_from->n+1)*sizeof(PetscInt));
417: PetscMemcpy(out_from->procs,in_from->procs,(out_from->n)*sizeof(PetscMPIInt));
419: out->fromdata = (void*)out_from;
420: out_from->local.n = in_from->local.n;
421: out_from->local.nonmatching_computed = PETSC_FALSE;
422: out_from->local.n_nonmatching = 0;
423: out_from->local.slots_nonmatching = 0;
425: return(0);
426: }
427: /* --------------------------------------------------------------------------------------------------
428: Packs and unpacks the message data into send or from receive buffers.
430: These could be generated automatically.
432: Fortran kernels etc. could be used.
433: */
434: PETSC_STATIC_INLINE void Pack_1(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
435: {
436: PetscInt i;
437: for (i=0; i<n; i++) y[i] = x[indicesx[i]];
438: }
440: PETSC_STATIC_INLINE PetscErrorCode UnPack_1(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
441: {
442: PetscInt i;
445: switch (addv) {
446: case INSERT_VALUES:
447: case INSERT_ALL_VALUES:
448: for (i=0; i<n; i++) y[indicesy[i]] = x[i];
449: break;
450: case ADD_VALUES:
451: case ADD_ALL_VALUES:
452: for (i=0; i<n; i++) y[indicesy[i]] += x[i];
453: break;
454: #if !defined(PETSC_USE_COMPLEX)
455: case MAX_VALUES:
456: for (i=0; i<n; i++) y[indicesy[i]] = PetscMax(y[indicesy[i]],x[i]);
457: #else
458: case MAX_VALUES:
459: #endif
460: case NOT_SET_VALUES:
461: break;
462: default:
463: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
464: }
465: return(0);
466: }
468: PETSC_STATIC_INLINE PetscErrorCode Scatter_1(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
469: {
470: PetscInt i;
473: switch (addv) {
474: case INSERT_VALUES:
475: case INSERT_ALL_VALUES:
476: for (i=0; i<n; i++) y[indicesy[i]] = x[indicesx[i]];
477: break;
478: case ADD_VALUES:
479: case ADD_ALL_VALUES:
480: for (i=0; i<n; i++) y[indicesy[i]] += x[indicesx[i]];
481: break;
482: #if !defined(PETSC_USE_COMPLEX)
483: case MAX_VALUES:
484: for (i=0; i<n; i++) y[indicesy[i]] = PetscMax(y[indicesy[i]],x[indicesx[i]]);
485: #else
486: case MAX_VALUES:
487: #endif
488: case NOT_SET_VALUES:
489: break;
490: default:
491: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
492: }
493: return(0);
494: }
496: /* ----------------------------------------------------------------------------------------------- */
497: PETSC_STATIC_INLINE void Pack_2(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
498: {
499: PetscInt i,idx;
501: for (i=0; i<n; i++) {
502: idx = *indicesx++;
503: y[0] = x[idx];
504: y[1] = x[idx+1];
505: y += 2;
506: }
507: }
509: PETSC_STATIC_INLINE PetscErrorCode UnPack_2(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
510: {
511: PetscInt i,idy;
514: switch (addv) {
515: case INSERT_VALUES:
516: case INSERT_ALL_VALUES:
517: for (i=0; i<n; i++) {
518: idy = *indicesy++;
519: y[idy] = x[0];
520: y[idy+1] = x[1];
521: x += 2;
522: }
523: break;
524: case ADD_VALUES:
525: case ADD_ALL_VALUES:
526: for (i=0; i<n; i++) {
527: idy = *indicesy++;
528: y[idy] += x[0];
529: y[idy+1] += x[1];
530: x += 2;
531: }
532: break;
533: #if !defined(PETSC_USE_COMPLEX)
534: case MAX_VALUES:
535: for (i=0; i<n; i++) {
536: idy = *indicesy++;
537: y[idy] = PetscMax(y[idy],x[0]);
538: y[idy+1] = PetscMax(y[idy+1],x[1]);
539: x += 2;
540: }
541: #else
542: case MAX_VALUES:
543: #endif
544: case NOT_SET_VALUES:
545: break;
546: default:
547: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
548: }
549: return(0);
550: }
552: PETSC_STATIC_INLINE PetscErrorCode Scatter_2(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
553: {
554: PetscInt i,idx,idy;
557: switch (addv) {
558: case INSERT_VALUES:
559: case INSERT_ALL_VALUES:
560: for (i=0; i<n; i++) {
561: idx = *indicesx++;
562: idy = *indicesy++;
563: y[idy] = x[idx];
564: y[idy+1] = x[idx+1];
565: }
566: break;
567: case ADD_VALUES:
568: case ADD_ALL_VALUES:
569: for (i=0; i<n; i++) {
570: idx = *indicesx++;
571: idy = *indicesy++;
572: y[idy] += x[idx];
573: y[idy+1] += x[idx+1];
574: }
575: break;
576: #if !defined(PETSC_USE_COMPLEX)
577: case MAX_VALUES:
578: for (i=0; i<n; i++) {
579: idx = *indicesx++;
580: idy = *indicesy++;
581: y[idy] = PetscMax(y[idy],x[idx]);
582: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
583: }
584: #else
585: case MAX_VALUES:
586: #endif
587: case NOT_SET_VALUES:
588: break;
589: default:
590: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
591: }
592: return(0);
593: }
594: /* ----------------------------------------------------------------------------------------------- */
595: PETSC_STATIC_INLINE void Pack_3(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
596: {
597: PetscInt i,idx;
599: for (i=0; i<n; i++) {
600: idx = *indicesx++;
601: y[0] = x[idx];
602: y[1] = x[idx+1];
603: y[2] = x[idx+2];
604: y += 3;
605: }
606: }
607: PETSC_STATIC_INLINE PetscErrorCode UnPack_3(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
608: {
609: PetscInt i,idy;
612: switch (addv) {
613: case INSERT_VALUES:
614: case INSERT_ALL_VALUES:
615: for (i=0; i<n; i++) {
616: idy = *indicesy++;
617: y[idy] = x[0];
618: y[idy+1] = x[1];
619: y[idy+2] = x[2];
620: x += 3;
621: }
622: break;
623: case ADD_VALUES:
624: case ADD_ALL_VALUES:
625: for (i=0; i<n; i++) {
626: idy = *indicesy++;
627: y[idy] += x[0];
628: y[idy+1] += x[1];
629: y[idy+2] += x[2];
630: x += 3;
631: }
632: break;
633: #if !defined(PETSC_USE_COMPLEX)
634: case MAX_VALUES:
635: for (i=0; i<n; i++) {
636: idy = *indicesy++;
637: y[idy] = PetscMax(y[idy],x[0]);
638: y[idy+1] = PetscMax(y[idy+1],x[1]);
639: y[idy+2] = PetscMax(y[idy+2],x[2]);
640: x += 3;
641: }
642: #else
643: case MAX_VALUES:
644: #endif
645: case NOT_SET_VALUES:
646: break;
647: default:
648: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
649: }
650: return(0);
651: }
653: PETSC_STATIC_INLINE PetscErrorCode Scatter_3(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
654: {
655: PetscInt i,idx,idy;
658: switch (addv) {
659: case INSERT_VALUES:
660: case INSERT_ALL_VALUES:
661: for (i=0; i<n; i++) {
662: idx = *indicesx++;
663: idy = *indicesy++;
664: y[idy] = x[idx];
665: y[idy+1] = x[idx+1];
666: y[idy+2] = x[idx+2];
667: }
668: break;
669: case ADD_VALUES:
670: case ADD_ALL_VALUES:
671: for (i=0; i<n; i++) {
672: idx = *indicesx++;
673: idy = *indicesy++;
674: y[idy] += x[idx];
675: y[idy+1] += x[idx+1];
676: y[idy+2] += x[idx+2];
677: }
678: break;
679: #if !defined(PETSC_USE_COMPLEX)
680: case MAX_VALUES:
681: for (i=0; i<n; i++) {
682: idx = *indicesx++;
683: idy = *indicesy++;
684: y[idy] = PetscMax(y[idy],x[idx]);
685: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
686: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
687: }
688: #else
689: case MAX_VALUES:
690: #endif
691: case NOT_SET_VALUES:
692: break;
693: default:
694: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
695: }
696: return(0);
697: }
698: /* ----------------------------------------------------------------------------------------------- */
699: PETSC_STATIC_INLINE void Pack_4(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
700: {
701: PetscInt i,idx;
703: for (i=0; i<n; i++) {
704: idx = *indicesx++;
705: y[0] = x[idx];
706: y[1] = x[idx+1];
707: y[2] = x[idx+2];
708: y[3] = x[idx+3];
709: y += 4;
710: }
711: }
712: PETSC_STATIC_INLINE PetscErrorCode UnPack_4(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
713: {
714: PetscInt i,idy;
717: switch (addv) {
718: case INSERT_VALUES:
719: case INSERT_ALL_VALUES:
720: for (i=0; i<n; i++) {
721: idy = *indicesy++;
722: y[idy] = x[0];
723: y[idy+1] = x[1];
724: y[idy+2] = x[2];
725: y[idy+3] = x[3];
726: x += 4;
727: }
728: break;
729: case ADD_VALUES:
730: case ADD_ALL_VALUES:
731: for (i=0; i<n; i++) {
732: idy = *indicesy++;
733: y[idy] += x[0];
734: y[idy+1] += x[1];
735: y[idy+2] += x[2];
736: y[idy+3] += x[3];
737: x += 4;
738: }
739: break;
740: #if !defined(PETSC_USE_COMPLEX)
741: case MAX_VALUES:
742: for (i=0; i<n; i++) {
743: idy = *indicesy++;
744: y[idy] = PetscMax(y[idy],x[0]);
745: y[idy+1] = PetscMax(y[idy+1],x[1]);
746: y[idy+2] = PetscMax(y[idy+2],x[2]);
747: y[idy+3] = PetscMax(y[idy+3],x[3]);
748: x += 4;
749: }
750: #else
751: case MAX_VALUES:
752: #endif
753: case NOT_SET_VALUES:
754: break;
755: default:
756: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
757: }
758: return(0);
759: }
761: PETSC_STATIC_INLINE PetscErrorCode Scatter_4(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
762: {
763: PetscInt i,idx,idy;
766: switch (addv) {
767: case INSERT_VALUES:
768: case INSERT_ALL_VALUES:
769: for (i=0; i<n; i++) {
770: idx = *indicesx++;
771: idy = *indicesy++;
772: y[idy] = x[idx];
773: y[idy+1] = x[idx+1];
774: y[idy+2] = x[idx+2];
775: y[idy+3] = x[idx+3];
776: }
777: break;
778: case ADD_VALUES:
779: case ADD_ALL_VALUES:
780: for (i=0; i<n; i++) {
781: idx = *indicesx++;
782: idy = *indicesy++;
783: y[idy] += x[idx];
784: y[idy+1] += x[idx+1];
785: y[idy+2] += x[idx+2];
786: y[idy+3] += x[idx+3];
787: }
788: break;
789: #if !defined(PETSC_USE_COMPLEX)
790: case MAX_VALUES:
791: for (i=0; i<n; i++) {
792: idx = *indicesx++;
793: idy = *indicesy++;
794: y[idy] = PetscMax(y[idy],x[idx]);
795: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
796: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
797: y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
798: }
799: #else
800: case MAX_VALUES:
801: #endif
802: case NOT_SET_VALUES:
803: break;
804: default:
805: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
806: }
807: return(0);
808: }
809: /* ----------------------------------------------------------------------------------------------- */
810: PETSC_STATIC_INLINE void Pack_5(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
811: {
812: PetscInt i,idx;
814: for (i=0; i<n; i++) {
815: idx = *indicesx++;
816: y[0] = x[idx];
817: y[1] = x[idx+1];
818: y[2] = x[idx+2];
819: y[3] = x[idx+3];
820: y[4] = x[idx+4];
821: y += 5;
822: }
823: }
825: PETSC_STATIC_INLINE PetscErrorCode UnPack_5(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
826: {
827: PetscInt i,idy;
830: switch (addv) {
831: case INSERT_VALUES:
832: case INSERT_ALL_VALUES:
833: for (i=0; i<n; i++) {
834: idy = *indicesy++;
835: y[idy] = x[0];
836: y[idy+1] = x[1];
837: y[idy+2] = x[2];
838: y[idy+3] = x[3];
839: y[idy+4] = x[4];
840: x += 5;
841: }
842: break;
843: case ADD_VALUES:
844: case ADD_ALL_VALUES:
845: for (i=0; i<n; i++) {
846: idy = *indicesy++;
847: y[idy] += x[0];
848: y[idy+1] += x[1];
849: y[idy+2] += x[2];
850: y[idy+3] += x[3];
851: y[idy+4] += x[4];
852: x += 5;
853: }
854: break;
855: #if !defined(PETSC_USE_COMPLEX)
856: case MAX_VALUES:
857: for (i=0; i<n; i++) {
858: idy = *indicesy++;
859: y[idy] = PetscMax(y[idy],x[0]);
860: y[idy+1] = PetscMax(y[idy+1],x[1]);
861: y[idy+2] = PetscMax(y[idy+2],x[2]);
862: y[idy+3] = PetscMax(y[idy+3],x[3]);
863: y[idy+4] = PetscMax(y[idy+4],x[4]);
864: x += 5;
865: }
866: #else
867: case MAX_VALUES:
868: #endif
869: case NOT_SET_VALUES:
870: break;
871: default:
872: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
873: }
874: return(0);
875: }
877: PETSC_STATIC_INLINE PetscErrorCode Scatter_5(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
878: {
879: PetscInt i,idx,idy;
882: switch (addv) {
883: case INSERT_VALUES:
884: case INSERT_ALL_VALUES:
885: for (i=0; i<n; i++) {
886: idx = *indicesx++;
887: idy = *indicesy++;
888: y[idy] = x[idx];
889: y[idy+1] = x[idx+1];
890: y[idy+2] = x[idx+2];
891: y[idy+3] = x[idx+3];
892: y[idy+4] = x[idx+4];
893: }
894: break;
895: case ADD_VALUES:
896: case ADD_ALL_VALUES:
897: for (i=0; i<n; i++) {
898: idx = *indicesx++;
899: idy = *indicesy++;
900: y[idy] += x[idx];
901: y[idy+1] += x[idx+1];
902: y[idy+2] += x[idx+2];
903: y[idy+3] += x[idx+3];
904: y[idy+4] += x[idx+4];
905: }
906: break;
907: #if !defined(PETSC_USE_COMPLEX)
908: case MAX_VALUES:
909: for (i=0; i<n; i++) {
910: idx = *indicesx++;
911: idy = *indicesy++;
912: y[idy] = PetscMax(y[idy],x[idx]);
913: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
914: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
915: y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
916: y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
917: }
918: #else
919: case MAX_VALUES:
920: #endif
921: case NOT_SET_VALUES:
922: break;
923: default:
924: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
925: }
926: return(0);
927: }
928: /* ----------------------------------------------------------------------------------------------- */
929: PETSC_STATIC_INLINE void Pack_6(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
930: {
931: PetscInt i,idx;
933: for (i=0; i<n; i++) {
934: idx = *indicesx++;
935: y[0] = x[idx];
936: y[1] = x[idx+1];
937: y[2] = x[idx+2];
938: y[3] = x[idx+3];
939: y[4] = x[idx+4];
940: y[5] = x[idx+5];
941: y += 6;
942: }
943: }
945: PETSC_STATIC_INLINE PetscErrorCode UnPack_6(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
946: {
947: PetscInt i,idy;
950: switch (addv) {
951: case INSERT_VALUES:
952: case INSERT_ALL_VALUES:
953: for (i=0; i<n; i++) {
954: idy = *indicesy++;
955: y[idy] = x[0];
956: y[idy+1] = x[1];
957: y[idy+2] = x[2];
958: y[idy+3] = x[3];
959: y[idy+4] = x[4];
960: y[idy+5] = x[5];
961: x += 6;
962: }
963: break;
964: case ADD_VALUES:
965: case ADD_ALL_VALUES:
966: for (i=0; i<n; i++) {
967: idy = *indicesy++;
968: y[idy] += x[0];
969: y[idy+1] += x[1];
970: y[idy+2] += x[2];
971: y[idy+3] += x[3];
972: y[idy+4] += x[4];
973: y[idy+5] += x[5];
974: x += 6;
975: }
976: break;
977: #if !defined(PETSC_USE_COMPLEX)
978: case MAX_VALUES:
979: for (i=0; i<n; i++) {
980: idy = *indicesy++;
981: y[idy] = PetscMax(y[idy],x[0]);
982: y[idy+1] = PetscMax(y[idy+1],x[1]);
983: y[idy+2] = PetscMax(y[idy+2],x[2]);
984: y[idy+3] = PetscMax(y[idy+3],x[3]);
985: y[idy+4] = PetscMax(y[idy+4],x[4]);
986: y[idy+5] = PetscMax(y[idy+5],x[5]);
987: x += 6;
988: }
989: #else
990: case MAX_VALUES:
991: #endif
992: case NOT_SET_VALUES:
993: break;
994: default:
995: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
996: }
997: return(0);
998: }
1000: PETSC_STATIC_INLINE PetscErrorCode Scatter_6(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1001: {
1002: PetscInt i,idx,idy;
1005: switch (addv) {
1006: case INSERT_VALUES:
1007: case INSERT_ALL_VALUES:
1008: for (i=0; i<n; i++) {
1009: idx = *indicesx++;
1010: idy = *indicesy++;
1011: y[idy] = x[idx];
1012: y[idy+1] = x[idx+1];
1013: y[idy+2] = x[idx+2];
1014: y[idy+3] = x[idx+3];
1015: y[idy+4] = x[idx+4];
1016: y[idy+5] = x[idx+5];
1017: }
1018: break;
1019: case ADD_VALUES:
1020: case ADD_ALL_VALUES:
1021: for (i=0; i<n; i++) {
1022: idx = *indicesx++;
1023: idy = *indicesy++;
1024: y[idy] += x[idx];
1025: y[idy+1] += x[idx+1];
1026: y[idy+2] += x[idx+2];
1027: y[idy+3] += x[idx+3];
1028: y[idy+4] += x[idx+4];
1029: y[idy+5] += x[idx+5];
1030: }
1031: break;
1032: #if !defined(PETSC_USE_COMPLEX)
1033: case MAX_VALUES:
1034: for (i=0; i<n; i++) {
1035: idx = *indicesx++;
1036: idy = *indicesy++;
1037: y[idy] = PetscMax(y[idy],x[idx]);
1038: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
1039: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
1040: y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
1041: y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1042: y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
1043: }
1044: #else
1045: case MAX_VALUES:
1046: #endif
1047: case NOT_SET_VALUES:
1048: break;
1049: default:
1050: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1051: }
1052: return(0);
1053: }
1054: /* ----------------------------------------------------------------------------------------------- */
1055: PETSC_STATIC_INLINE void Pack_7(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1056: {
1057: PetscInt i,idx;
1059: for (i=0; i<n; i++) {
1060: idx = *indicesx++;
1061: y[0] = x[idx];
1062: y[1] = x[idx+1];
1063: y[2] = x[idx+2];
1064: y[3] = x[idx+3];
1065: y[4] = x[idx+4];
1066: y[5] = x[idx+5];
1067: y[6] = x[idx+6];
1068: y += 7;
1069: }
1070: }
1072: PETSC_STATIC_INLINE PetscErrorCode UnPack_7(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1073: {
1074: PetscInt i,idy;
1077: switch (addv) {
1078: case INSERT_VALUES:
1079: case INSERT_ALL_VALUES:
1080: for (i=0; i<n; i++) {
1081: idy = *indicesy++;
1082: y[idy] = x[0];
1083: y[idy+1] = x[1];
1084: y[idy+2] = x[2];
1085: y[idy+3] = x[3];
1086: y[idy+4] = x[4];
1087: y[idy+5] = x[5];
1088: y[idy+6] = x[6];
1089: x += 7;
1090: }
1091: break;
1092: case ADD_VALUES:
1093: case ADD_ALL_VALUES:
1094: for (i=0; i<n; i++) {
1095: idy = *indicesy++;
1096: y[idy] += x[0];
1097: y[idy+1] += x[1];
1098: y[idy+2] += x[2];
1099: y[idy+3] += x[3];
1100: y[idy+4] += x[4];
1101: y[idy+5] += x[5];
1102: y[idy+6] += x[6];
1103: x += 7;
1104: }
1105: break;
1106: #if !defined(PETSC_USE_COMPLEX)
1107: case MAX_VALUES:
1108: for (i=0; i<n; i++) {
1109: idy = *indicesy++;
1110: y[idy] = PetscMax(y[idy],x[0]);
1111: y[idy+1] = PetscMax(y[idy+1],x[1]);
1112: y[idy+2] = PetscMax(y[idy+2],x[2]);
1113: y[idy+3] = PetscMax(y[idy+3],x[3]);
1114: y[idy+4] = PetscMax(y[idy+4],x[4]);
1115: y[idy+5] = PetscMax(y[idy+5],x[5]);
1116: y[idy+6] = PetscMax(y[idy+6],x[6]);
1117: x += 7;
1118: }
1119: #else
1120: case MAX_VALUES:
1121: #endif
1122: case NOT_SET_VALUES:
1123: break;
1124: default:
1125: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1126: }
1127: return(0);
1128: }
1130: PETSC_STATIC_INLINE PetscErrorCode Scatter_7(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1131: {
1132: PetscInt i,idx,idy;
1135: switch (addv) {
1136: case INSERT_VALUES:
1137: case INSERT_ALL_VALUES:
1138: for (i=0; i<n; i++) {
1139: idx = *indicesx++;
1140: idy = *indicesy++;
1141: y[idy] = x[idx];
1142: y[idy+1] = x[idx+1];
1143: y[idy+2] = x[idx+2];
1144: y[idy+3] = x[idx+3];
1145: y[idy+4] = x[idx+4];
1146: y[idy+5] = x[idx+5];
1147: y[idy+6] = x[idx+6];
1148: }
1149: break;
1150: case ADD_VALUES:
1151: case ADD_ALL_VALUES:
1152: for (i=0; i<n; i++) {
1153: idx = *indicesx++;
1154: idy = *indicesy++;
1155: y[idy] += x[idx];
1156: y[idy+1] += x[idx+1];
1157: y[idy+2] += x[idx+2];
1158: y[idy+3] += x[idx+3];
1159: y[idy+4] += x[idx+4];
1160: y[idy+5] += x[idx+5];
1161: y[idy+6] += x[idx+6];
1162: }
1163: break;
1164: #if !defined(PETSC_USE_COMPLEX)
1165: case MAX_VALUES:
1166: for (i=0; i<n; i++) {
1167: idx = *indicesx++;
1168: idy = *indicesy++;
1169: y[idy] = PetscMax(y[idy],x[idx]);
1170: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
1171: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
1172: y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
1173: y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1174: y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
1175: y[idy+6] = PetscMax(y[idy+6],x[idx+6]);
1176: }
1177: #else
1178: case MAX_VALUES:
1179: #endif
1180: case NOT_SET_VALUES:
1181: break;
1182: default:
1183: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1184: }
1185: return(0);
1186: }
1187: /* ----------------------------------------------------------------------------------------------- */
1188: PETSC_STATIC_INLINE void Pack_8(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1189: {
1190: PetscInt i,idx;
1192: for (i=0; i<n; i++) {
1193: idx = *indicesx++;
1194: y[0] = x[idx];
1195: y[1] = x[idx+1];
1196: y[2] = x[idx+2];
1197: y[3] = x[idx+3];
1198: y[4] = x[idx+4];
1199: y[5] = x[idx+5];
1200: y[6] = x[idx+6];
1201: y[7] = x[idx+7];
1202: y += 8;
1203: }
1204: }
1206: PETSC_STATIC_INLINE PetscErrorCode UnPack_8(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1207: {
1208: PetscInt i,idy;
1211: switch (addv) {
1212: case INSERT_VALUES:
1213: case INSERT_ALL_VALUES:
1214: for (i=0; i<n; i++) {
1215: idy = *indicesy++;
1216: y[idy] = x[0];
1217: y[idy+1] = x[1];
1218: y[idy+2] = x[2];
1219: y[idy+3] = x[3];
1220: y[idy+4] = x[4];
1221: y[idy+5] = x[5];
1222: y[idy+6] = x[6];
1223: y[idy+7] = x[7];
1224: x += 8;
1225: }
1226: break;
1227: case ADD_VALUES:
1228: case ADD_ALL_VALUES:
1229: for (i=0; i<n; i++) {
1230: idy = *indicesy++;
1231: y[idy] += x[0];
1232: y[idy+1] += x[1];
1233: y[idy+2] += x[2];
1234: y[idy+3] += x[3];
1235: y[idy+4] += x[4];
1236: y[idy+5] += x[5];
1237: y[idy+6] += x[6];
1238: y[idy+7] += x[7];
1239: x += 8;
1240: }
1241: break;
1242: #if !defined(PETSC_USE_COMPLEX)
1243: case MAX_VALUES:
1244: for (i=0; i<n; i++) {
1245: idy = *indicesy++;
1246: y[idy] = PetscMax(y[idy],x[0]);
1247: y[idy+1] = PetscMax(y[idy+1],x[1]);
1248: y[idy+2] = PetscMax(y[idy+2],x[2]);
1249: y[idy+3] = PetscMax(y[idy+3],x[3]);
1250: y[idy+4] = PetscMax(y[idy+4],x[4]);
1251: y[idy+5] = PetscMax(y[idy+5],x[5]);
1252: y[idy+6] = PetscMax(y[idy+6],x[6]);
1253: y[idy+7] = PetscMax(y[idy+7],x[7]);
1254: x += 8;
1255: }
1256: #else
1257: case MAX_VALUES:
1258: #endif
1259: case NOT_SET_VALUES:
1260: break;
1261: default:
1262: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1263: }
1264: return(0);
1265: }
1267: PETSC_STATIC_INLINE PetscErrorCode Scatter_8(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1268: {
1269: PetscInt i,idx,idy;
1272: switch (addv) {
1273: case INSERT_VALUES:
1274: case INSERT_ALL_VALUES:
1275: for (i=0; i<n; i++) {
1276: idx = *indicesx++;
1277: idy = *indicesy++;
1278: y[idy] = x[idx];
1279: y[idy+1] = x[idx+1];
1280: y[idy+2] = x[idx+2];
1281: y[idy+3] = x[idx+3];
1282: y[idy+4] = x[idx+4];
1283: y[idy+5] = x[idx+5];
1284: y[idy+6] = x[idx+6];
1285: y[idy+7] = x[idx+7];
1286: }
1287: break;
1288: case ADD_VALUES:
1289: case ADD_ALL_VALUES:
1290: for (i=0; i<n; i++) {
1291: idx = *indicesx++;
1292: idy = *indicesy++;
1293: y[idy] += x[idx];
1294: y[idy+1] += x[idx+1];
1295: y[idy+2] += x[idx+2];
1296: y[idy+3] += x[idx+3];
1297: y[idy+4] += x[idx+4];
1298: y[idy+5] += x[idx+5];
1299: y[idy+6] += x[idx+6];
1300: y[idy+7] += x[idx+7];
1301: }
1302: break;
1303: #if !defined(PETSC_USE_COMPLEX)
1304: case MAX_VALUES:
1305: for (i=0; i<n; i++) {
1306: idx = *indicesx++;
1307: idy = *indicesy++;
1308: y[idy] = PetscMax(y[idy],x[idx]);
1309: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
1310: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
1311: y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
1312: y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1313: y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
1314: y[idy+6] = PetscMax(y[idy+6],x[idx+6]);
1315: y[idy+7] = PetscMax(y[idy+7],x[idx+7]);
1316: }
1317: #else
1318: case MAX_VALUES:
1319: #endif
1320: case NOT_SET_VALUES:
1321: break;
1322: default:
1323: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1324: }
1325: return(0);
1326: }
1328: PETSC_STATIC_INLINE void Pack_9(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1329: {
1330: PetscInt i,idx;
1332: for (i=0; i<n; i++) {
1333: idx = *indicesx++;
1334: y[0] = x[idx];
1335: y[1] = x[idx+1];
1336: y[2] = x[idx+2];
1337: y[3] = x[idx+3];
1338: y[4] = x[idx+4];
1339: y[5] = x[idx+5];
1340: y[6] = x[idx+6];
1341: y[7] = x[idx+7];
1342: y[8] = x[idx+8];
1343: y += 9;
1344: }
1345: }
1347: PETSC_STATIC_INLINE PetscErrorCode UnPack_9(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1348: {
1349: PetscInt i,idy;
1352: switch (addv) {
1353: case INSERT_VALUES:
1354: case INSERT_ALL_VALUES:
1355: for (i=0; i<n; i++) {
1356: idy = *indicesy++;
1357: y[idy] = x[0];
1358: y[idy+1] = x[1];
1359: y[idy+2] = x[2];
1360: y[idy+3] = x[3];
1361: y[idy+4] = x[4];
1362: y[idy+5] = x[5];
1363: y[idy+6] = x[6];
1364: y[idy+7] = x[7];
1365: y[idy+8] = x[8];
1366: x += 9;
1367: }
1368: break;
1369: case ADD_VALUES:
1370: case ADD_ALL_VALUES:
1371: for (i=0; i<n; i++) {
1372: idy = *indicesy++;
1373: y[idy] += x[0];
1374: y[idy+1] += x[1];
1375: y[idy+2] += x[2];
1376: y[idy+3] += x[3];
1377: y[idy+4] += x[4];
1378: y[idy+5] += x[5];
1379: y[idy+6] += x[6];
1380: y[idy+7] += x[7];
1381: y[idy+8] += x[8];
1382: x += 9;
1383: }
1384: break;
1385: #if !defined(PETSC_USE_COMPLEX)
1386: case MAX_VALUES:
1387: for (i=0; i<n; i++) {
1388: idy = *indicesy++;
1389: y[idy] = PetscMax(y[idy],x[0]);
1390: y[idy+1] = PetscMax(y[idy+1],x[1]);
1391: y[idy+2] = PetscMax(y[idy+2],x[2]);
1392: y[idy+3] = PetscMax(y[idy+3],x[3]);
1393: y[idy+4] = PetscMax(y[idy+4],x[4]);
1394: y[idy+5] = PetscMax(y[idy+5],x[5]);
1395: y[idy+6] = PetscMax(y[idy+6],x[6]);
1396: y[idy+7] = PetscMax(y[idy+7],x[7]);
1397: y[idy+8] = PetscMax(y[idy+8],x[8]);
1398: x += 9;
1399: }
1400: #else
1401: case MAX_VALUES:
1402: #endif
1403: case NOT_SET_VALUES:
1404: break;
1405: default:
1406: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1407: }
1408: return(0);
1409: }
1411: PETSC_STATIC_INLINE PetscErrorCode Scatter_9(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1412: {
1413: PetscInt i,idx,idy;
1416: switch (addv) {
1417: case INSERT_VALUES:
1418: case INSERT_ALL_VALUES:
1419: for (i=0; i<n; i++) {
1420: idx = *indicesx++;
1421: idy = *indicesy++;
1422: y[idy] = x[idx];
1423: y[idy+1] = x[idx+1];
1424: y[idy+2] = x[idx+2];
1425: y[idy+3] = x[idx+3];
1426: y[idy+4] = x[idx+4];
1427: y[idy+5] = x[idx+5];
1428: y[idy+6] = x[idx+6];
1429: y[idy+7] = x[idx+7];
1430: y[idy+8] = x[idx+8];
1431: }
1432: break;
1433: case ADD_VALUES:
1434: case ADD_ALL_VALUES:
1435: for (i=0; i<n; i++) {
1436: idx = *indicesx++;
1437: idy = *indicesy++;
1438: y[idy] += x[idx];
1439: y[idy+1] += x[idx+1];
1440: y[idy+2] += x[idx+2];
1441: y[idy+3] += x[idx+3];
1442: y[idy+4] += x[idx+4];
1443: y[idy+5] += x[idx+5];
1444: y[idy+6] += x[idx+6];
1445: y[idy+7] += x[idx+7];
1446: y[idy+8] += x[idx+8];
1447: }
1448: break;
1449: #if !defined(PETSC_USE_COMPLEX)
1450: case MAX_VALUES:
1451: for (i=0; i<n; i++) {
1452: idx = *indicesx++;
1453: idy = *indicesy++;
1454: y[idy] = PetscMax(y[idy],x[idx]);
1455: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
1456: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
1457: y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
1458: y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1459: y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
1460: y[idy+6] = PetscMax(y[idy+6],x[idx+6]);
1461: y[idy+7] = PetscMax(y[idy+7],x[idx+7]);
1462: y[idy+8] = PetscMax(y[idy+8],x[idx+8]);
1463: }
1464: #else
1465: case MAX_VALUES:
1466: #endif
1467: case NOT_SET_VALUES:
1468: break;
1469: default:
1470: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1471: }
1472: return(0);
1473: }
1475: PETSC_STATIC_INLINE void Pack_10(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1476: {
1477: PetscInt i,idx;
1479: for (i=0; i<n; i++) {
1480: idx = *indicesx++;
1481: y[0] = x[idx];
1482: y[1] = x[idx+1];
1483: y[2] = x[idx+2];
1484: y[3] = x[idx+3];
1485: y[4] = x[idx+4];
1486: y[5] = x[idx+5];
1487: y[6] = x[idx+6];
1488: y[7] = x[idx+7];
1489: y[8] = x[idx+8];
1490: y[9] = x[idx+9];
1491: y += 10;
1492: }
1493: }
1495: PETSC_STATIC_INLINE PetscErrorCode UnPack_10(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1496: {
1497: PetscInt i,idy;
1500: switch (addv) {
1501: case INSERT_VALUES:
1502: case INSERT_ALL_VALUES:
1503: for (i=0; i<n; i++) {
1504: idy = *indicesy++;
1505: y[idy] = x[0];
1506: y[idy+1] = x[1];
1507: y[idy+2] = x[2];
1508: y[idy+3] = x[3];
1509: y[idy+4] = x[4];
1510: y[idy+5] = x[5];
1511: y[idy+6] = x[6];
1512: y[idy+7] = x[7];
1513: y[idy+8] = x[8];
1514: y[idy+9] = x[9];
1515: x += 10;
1516: }
1517: break;
1518: case ADD_VALUES:
1519: case ADD_ALL_VALUES:
1520: for (i=0; i<n; i++) {
1521: idy = *indicesy++;
1522: y[idy] += x[0];
1523: y[idy+1] += x[1];
1524: y[idy+2] += x[2];
1525: y[idy+3] += x[3];
1526: y[idy+4] += x[4];
1527: y[idy+5] += x[5];
1528: y[idy+6] += x[6];
1529: y[idy+7] += x[7];
1530: y[idy+8] += x[8];
1531: y[idy+9] += x[9];
1532: x += 10;
1533: }
1534: break;
1535: #if !defined(PETSC_USE_COMPLEX)
1536: case MAX_VALUES:
1537: for (i=0; i<n; i++) {
1538: idy = *indicesy++;
1539: y[idy] = PetscMax(y[idy],x[0]);
1540: y[idy+1] = PetscMax(y[idy+1],x[1]);
1541: y[idy+2] = PetscMax(y[idy+2],x[2]);
1542: y[idy+3] = PetscMax(y[idy+3],x[3]);
1543: y[idy+4] = PetscMax(y[idy+4],x[4]);
1544: y[idy+5] = PetscMax(y[idy+5],x[5]);
1545: y[idy+6] = PetscMax(y[idy+6],x[6]);
1546: y[idy+7] = PetscMax(y[idy+7],x[7]);
1547: y[idy+8] = PetscMax(y[idy+8],x[8]);
1548: y[idy+9] = PetscMax(y[idy+9],x[9]);
1549: x += 10;
1550: }
1551: #else
1552: case MAX_VALUES:
1553: #endif
1554: case NOT_SET_VALUES:
1555: break;
1556: default:
1557: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1558: }
1559: return(0);
1560: }
1562: PETSC_STATIC_INLINE PetscErrorCode Scatter_10(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1563: {
1564: PetscInt i,idx,idy;
1567: switch (addv) {
1568: case INSERT_VALUES:
1569: case INSERT_ALL_VALUES:
1570: for (i=0; i<n; i++) {
1571: idx = *indicesx++;
1572: idy = *indicesy++;
1573: y[idy] = x[idx];
1574: y[idy+1] = x[idx+1];
1575: y[idy+2] = x[idx+2];
1576: y[idy+3] = x[idx+3];
1577: y[idy+4] = x[idx+4];
1578: y[idy+5] = x[idx+5];
1579: y[idy+6] = x[idx+6];
1580: y[idy+7] = x[idx+7];
1581: y[idy+8] = x[idx+8];
1582: y[idy+9] = x[idx+9];
1583: }
1584: break;
1585: case ADD_VALUES:
1586: case ADD_ALL_VALUES:
1587: for (i=0; i<n; i++) {
1588: idx = *indicesx++;
1589: idy = *indicesy++;
1590: y[idy] += x[idx];
1591: y[idy+1] += x[idx+1];
1592: y[idy+2] += x[idx+2];
1593: y[idy+3] += x[idx+3];
1594: y[idy+4] += x[idx+4];
1595: y[idy+5] += x[idx+5];
1596: y[idy+6] += x[idx+6];
1597: y[idy+7] += x[idx+7];
1598: y[idy+8] += x[idx+8];
1599: y[idy+9] += x[idx+9];
1600: }
1601: break;
1602: #if !defined(PETSC_USE_COMPLEX)
1603: case MAX_VALUES:
1604: for (i=0; i<n; i++) {
1605: idx = *indicesx++;
1606: idy = *indicesy++;
1607: y[idy] = PetscMax(y[idy],x[idx]);
1608: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
1609: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
1610: y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
1611: y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1612: y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
1613: y[idy+6] = PetscMax(y[idy+6],x[idx+6]);
1614: y[idy+7] = PetscMax(y[idy+7],x[idx+7]);
1615: y[idy+8] = PetscMax(y[idy+8],x[idx+8]);
1616: y[idy+9] = PetscMax(y[idy+9],x[idx+9]);
1617: }
1618: #else
1619: case MAX_VALUES:
1620: #endif
1621: case NOT_SET_VALUES:
1622: break;
1623: default:
1624: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1625: }
1626: return(0);
1627: }
1629: PETSC_STATIC_INLINE void Pack_11(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1630: {
1631: PetscInt i,idx;
1633: for (i=0; i<n; i++) {
1634: idx = *indicesx++;
1635: y[0] = x[idx];
1636: y[1] = x[idx+1];
1637: y[2] = x[idx+2];
1638: y[3] = x[idx+3];
1639: y[4] = x[idx+4];
1640: y[5] = x[idx+5];
1641: y[6] = x[idx+6];
1642: y[7] = x[idx+7];
1643: y[8] = x[idx+8];
1644: y[9] = x[idx+9];
1645: y[10] = x[idx+10];
1646: y += 11;
1647: }
1648: }
1650: PETSC_STATIC_INLINE PetscErrorCode UnPack_11(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1651: {
1652: PetscInt i,idy;
1655: switch (addv) {
1656: case INSERT_VALUES:
1657: case INSERT_ALL_VALUES:
1658: for (i=0; i<n; i++) {
1659: idy = *indicesy++;
1660: y[idy] = x[0];
1661: y[idy+1] = x[1];
1662: y[idy+2] = x[2];
1663: y[idy+3] = x[3];
1664: y[idy+4] = x[4];
1665: y[idy+5] = x[5];
1666: y[idy+6] = x[6];
1667: y[idy+7] = x[7];
1668: y[idy+8] = x[8];
1669: y[idy+9] = x[9];
1670: y[idy+10] = x[10];
1671: x += 11;
1672: }
1673: break;
1674: case ADD_VALUES:
1675: case ADD_ALL_VALUES:
1676: for (i=0; i<n; i++) {
1677: idy = *indicesy++;
1678: y[idy] += x[0];
1679: y[idy+1] += x[1];
1680: y[idy+2] += x[2];
1681: y[idy+3] += x[3];
1682: y[idy+4] += x[4];
1683: y[idy+5] += x[5];
1684: y[idy+6] += x[6];
1685: y[idy+7] += x[7];
1686: y[idy+8] += x[8];
1687: y[idy+9] += x[9];
1688: y[idy+10] += x[10];
1689: x += 11;
1690: }
1691: break;
1692: #if !defined(PETSC_USE_COMPLEX)
1693: case MAX_VALUES:
1694: for (i=0; i<n; i++) {
1695: idy = *indicesy++;
1696: y[idy] = PetscMax(y[idy],x[0]);
1697: y[idy+1] = PetscMax(y[idy+1],x[1]);
1698: y[idy+2] = PetscMax(y[idy+2],x[2]);
1699: y[idy+3] = PetscMax(y[idy+3],x[3]);
1700: y[idy+4] = PetscMax(y[idy+4],x[4]);
1701: y[idy+5] = PetscMax(y[idy+5],x[5]);
1702: y[idy+6] = PetscMax(y[idy+6],x[6]);
1703: y[idy+7] = PetscMax(y[idy+7],x[7]);
1704: y[idy+8] = PetscMax(y[idy+8],x[8]);
1705: y[idy+9] = PetscMax(y[idy+9],x[9]);
1706: y[idy+10] = PetscMax(y[idy+10],x[10]);
1707: x += 11;
1708: }
1709: #else
1710: case MAX_VALUES:
1711: #endif
1712: case NOT_SET_VALUES:
1713: break;
1714: default:
1715: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1716: }
1717: return(0);
1718: }
1720: PETSC_STATIC_INLINE PetscErrorCode Scatter_11(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1721: {
1722: PetscInt i,idx,idy;
1725: switch (addv) {
1726: case INSERT_VALUES:
1727: case INSERT_ALL_VALUES:
1728: for (i=0; i<n; i++) {
1729: idx = *indicesx++;
1730: idy = *indicesy++;
1731: y[idy] = x[idx];
1732: y[idy+1] = x[idx+1];
1733: y[idy+2] = x[idx+2];
1734: y[idy+3] = x[idx+3];
1735: y[idy+4] = x[idx+4];
1736: y[idy+5] = x[idx+5];
1737: y[idy+6] = x[idx+6];
1738: y[idy+7] = x[idx+7];
1739: y[idy+8] = x[idx+8];
1740: y[idy+9] = x[idx+9];
1741: y[idy+10] = x[idx+10];
1742: }
1743: break;
1744: case ADD_VALUES:
1745: case ADD_ALL_VALUES:
1746: for (i=0; i<n; i++) {
1747: idx = *indicesx++;
1748: idy = *indicesy++;
1749: y[idy] += x[idx];
1750: y[idy+1] += x[idx+1];
1751: y[idy+2] += x[idx+2];
1752: y[idy+3] += x[idx+3];
1753: y[idy+4] += x[idx+4];
1754: y[idy+5] += x[idx+5];
1755: y[idy+6] += x[idx+6];
1756: y[idy+7] += x[idx+7];
1757: y[idy+8] += x[idx+8];
1758: y[idy+9] += x[idx+9];
1759: y[idy+10] += x[idx+10];
1760: }
1761: break;
1762: #if !defined(PETSC_USE_COMPLEX)
1763: case MAX_VALUES:
1764: for (i=0; i<n; i++) {
1765: idx = *indicesx++;
1766: idy = *indicesy++;
1767: y[idy] = PetscMax(y[idy],x[idx]);
1768: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
1769: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
1770: y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
1771: y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1772: y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
1773: y[idy+6] = PetscMax(y[idy+6],x[idx+6]);
1774: y[idy+7] = PetscMax(y[idy+7],x[idx+7]);
1775: y[idy+8] = PetscMax(y[idy+8],x[idx+8]);
1776: y[idy+9] = PetscMax(y[idy+9],x[idx+9]);
1777: y[idy+10] = PetscMax(y[idy+10],x[idx+10]);
1778: }
1779: #else
1780: case MAX_VALUES:
1781: #endif
1782: case NOT_SET_VALUES:
1783: break;
1784: default:
1785: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1786: }
1787: return(0);
1788: }
1790: /* ----------------------------------------------------------------------------------------------- */
1791: PETSC_STATIC_INLINE void Pack_12(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1792: {
1793: PetscInt i,idx;
1795: for (i=0; i<n; i++) {
1796: idx = *indicesx++;
1797: y[0] = x[idx];
1798: y[1] = x[idx+1];
1799: y[2] = x[idx+2];
1800: y[3] = x[idx+3];
1801: y[4] = x[idx+4];
1802: y[5] = x[idx+5];
1803: y[6] = x[idx+6];
1804: y[7] = x[idx+7];
1805: y[8] = x[idx+8];
1806: y[9] = x[idx+9];
1807: y[10] = x[idx+10];
1808: y[11] = x[idx+11];
1809: y += 12;
1810: }
1811: }
1813: PETSC_STATIC_INLINE PetscErrorCode UnPack_12(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1814: {
1815: PetscInt i,idy;
1818: switch (addv) {
1819: case INSERT_VALUES:
1820: case INSERT_ALL_VALUES:
1821: for (i=0; i<n; i++) {
1822: idy = *indicesy++;
1823: y[idy] = x[0];
1824: y[idy+1] = x[1];
1825: y[idy+2] = x[2];
1826: y[idy+3] = x[3];
1827: y[idy+4] = x[4];
1828: y[idy+5] = x[5];
1829: y[idy+6] = x[6];
1830: y[idy+7] = x[7];
1831: y[idy+8] = x[8];
1832: y[idy+9] = x[9];
1833: y[idy+10] = x[10];
1834: y[idy+11] = x[11];
1835: x += 12;
1836: }
1837: break;
1838: case ADD_VALUES:
1839: case ADD_ALL_VALUES:
1840: for (i=0; i<n; i++) {
1841: idy = *indicesy++;
1842: y[idy] += x[0];
1843: y[idy+1] += x[1];
1844: y[idy+2] += x[2];
1845: y[idy+3] += x[3];
1846: y[idy+4] += x[4];
1847: y[idy+5] += x[5];
1848: y[idy+6] += x[6];
1849: y[idy+7] += x[7];
1850: y[idy+8] += x[8];
1851: y[idy+9] += x[9];
1852: y[idy+10] += x[10];
1853: y[idy+11] += x[11];
1854: x += 12;
1855: }
1856: break;
1857: #if !defined(PETSC_USE_COMPLEX)
1858: case MAX_VALUES:
1859: for (i=0; i<n; i++) {
1860: idy = *indicesy++;
1861: y[idy] = PetscMax(y[idy],x[0]);
1862: y[idy+1] = PetscMax(y[idy+1],x[1]);
1863: y[idy+2] = PetscMax(y[idy+2],x[2]);
1864: y[idy+3] = PetscMax(y[idy+3],x[3]);
1865: y[idy+4] = PetscMax(y[idy+4],x[4]);
1866: y[idy+5] = PetscMax(y[idy+5],x[5]);
1867: y[idy+6] = PetscMax(y[idy+6],x[6]);
1868: y[idy+7] = PetscMax(y[idy+7],x[7]);
1869: y[idy+8] = PetscMax(y[idy+8],x[8]);
1870: y[idy+9] = PetscMax(y[idy+9],x[9]);
1871: y[idy+10] = PetscMax(y[idy+10],x[10]);
1872: y[idy+11] = PetscMax(y[idy+11],x[11]);
1873: x += 12;
1874: }
1875: #else
1876: case MAX_VALUES:
1877: #endif
1878: case NOT_SET_VALUES:
1879: break;
1880: default:
1881: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1882: }
1883: return(0);
1884: }
1886: PETSC_STATIC_INLINE PetscErrorCode Scatter_12(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1887: {
1888: PetscInt i,idx,idy;
1891: switch (addv) {
1892: case INSERT_VALUES:
1893: case INSERT_ALL_VALUES:
1894: for (i=0; i<n; i++) {
1895: idx = *indicesx++;
1896: idy = *indicesy++;
1897: y[idy] = x[idx];
1898: y[idy+1] = x[idx+1];
1899: y[idy+2] = x[idx+2];
1900: y[idy+3] = x[idx+3];
1901: y[idy+4] = x[idx+4];
1902: y[idy+5] = x[idx+5];
1903: y[idy+6] = x[idx+6];
1904: y[idy+7] = x[idx+7];
1905: y[idy+8] = x[idx+8];
1906: y[idy+9] = x[idx+9];
1907: y[idy+10] = x[idx+10];
1908: y[idy+11] = x[idx+11];
1909: }
1910: break;
1911: case ADD_VALUES:
1912: case ADD_ALL_VALUES:
1913: for (i=0; i<n; i++) {
1914: idx = *indicesx++;
1915: idy = *indicesy++;
1916: y[idy] += x[idx];
1917: y[idy+1] += x[idx+1];
1918: y[idy+2] += x[idx+2];
1919: y[idy+3] += x[idx+3];
1920: y[idy+4] += x[idx+4];
1921: y[idy+5] += x[idx+5];
1922: y[idy+6] += x[idx+6];
1923: y[idy+7] += x[idx+7];
1924: y[idy+8] += x[idx+8];
1925: y[idy+9] += x[idx+9];
1926: y[idy+10] += x[idx+10];
1927: y[idy+11] += x[idx+11];
1928: }
1929: break;
1930: #if !defined(PETSC_USE_COMPLEX)
1931: case MAX_VALUES:
1932: for (i=0; i<n; i++) {
1933: idx = *indicesx++;
1934: idy = *indicesy++;
1935: y[idy] = PetscMax(y[idy],x[idx]);
1936: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
1937: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
1938: y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
1939: y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1940: y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
1941: y[idy+6] = PetscMax(y[idy+6],x[idx+6]);
1942: y[idy+7] = PetscMax(y[idy+7],x[idx+7]);
1943: y[idy+8] = PetscMax(y[idy+8],x[idx+8]);
1944: y[idy+9] = PetscMax(y[idy+9],x[idx+9]);
1945: y[idy+10] = PetscMax(y[idy+10],x[idx+10]);
1946: y[idy+11] = PetscMax(y[idy+11],x[idx+11]);
1947: }
1948: #else
1949: case MAX_VALUES:
1950: #endif
1951: case NOT_SET_VALUES:
1952: break;
1953: default:
1954: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1955: }
1956: return(0);
1957: }
1959: /* ----------------------------------------------------------------------------------------------- */
1960: PETSC_STATIC_INLINE void Pack_bs(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1961: {
1962: PetscInt i,idx;
1964: for (i=0; i<n; i++) {
1965: idx = *indicesx++;
1966: PetscMemcpy(y,x + idx,bs*sizeof(PetscScalar));
1967: y += bs;
1968: }
1969: }
1971: PETSC_STATIC_INLINE PetscErrorCode UnPack_bs(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1972: {
1973: PetscInt i,idy,j;
1976: switch (addv) {
1977: case INSERT_VALUES:
1978: case INSERT_ALL_VALUES:
1979: for (i=0; i<n; i++) {
1980: idy = *indicesy++;
1981: PetscMemcpy(y + idy,x,bs*sizeof(PetscScalar));
1982: x += bs;
1983: }
1984: break;
1985: case ADD_VALUES:
1986: case ADD_ALL_VALUES:
1987: for (i=0; i<n; i++) {
1988: idy = *indicesy++;
1989: for (j=0; j<bs; j++) y[idy+j] += x[j];
1990: x += bs;
1991: }
1992: break;
1993: #if !defined(PETSC_USE_COMPLEX)
1994: case MAX_VALUES:
1995: for (i=0; i<n; i++) {
1996: idy = *indicesy++;
1997: for (j=0; j<bs; j++) y[idy+j] = PetscMax(y[idy+j],x[j]);
1998: x += bs;
1999: }
2000: #else
2001: case MAX_VALUES:
2002: #endif
2003: case NOT_SET_VALUES:
2004: break;
2005: default:
2006: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
2007: }
2008: return(0);
2009: }
2011: PETSC_STATIC_INLINE PetscErrorCode Scatter_bs(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
2012: {
2013: PetscInt i,idx,idy,j;
2016: switch (addv) {
2017: case INSERT_VALUES:
2018: case INSERT_ALL_VALUES:
2019: for (i=0; i<n; i++) {
2020: idx = *indicesx++;
2021: idy = *indicesy++;
2022: PetscMemcpy(y + idy, x + idx,bs*sizeof(PetscScalar));
2023: }
2024: break;
2025: case ADD_VALUES:
2026: case ADD_ALL_VALUES:
2027: for (i=0; i<n; i++) {
2028: idx = *indicesx++;
2029: idy = *indicesy++;
2030: for (j=0; j<bs; j++ ) y[idy+j] += x[idx+j];
2031: }
2032: break;
2033: #if !defined(PETSC_USE_COMPLEX)
2034: case MAX_VALUES:
2035: for (i=0; i<n; i++) {
2036: idx = *indicesx++;
2037: idy = *indicesy++;
2038: for (j=0; j<bs; j++ ) y[idy+j] = PetscMax(y[idy+j],x[idx+j]);
2039: }
2040: #else
2041: case MAX_VALUES:
2042: #endif
2043: case NOT_SET_VALUES:
2044: break;
2045: default:
2046: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
2047: }
2048: return(0);
2049: }
2051: /* Create the VecScatterBegin/End_P for our chosen block sizes */
2052: #define BS 1
2053: #include <../src/vec/vscat/impls/mpi3/vpscat.h>
2054: #define BS 2
2055: #include <../src/vec/vscat/impls/mpi3/vpscat.h>
2056: #define BS 3
2057: #include <../src/vec/vscat/impls/mpi3/vpscat.h>
2058: #define BS 4
2059: #include <../src/vec/vscat/impls/mpi3/vpscat.h>
2060: #define BS 5
2061: #include <../src/vec/vscat/impls/mpi3/vpscat.h>
2062: #define BS 6
2063: #include <../src/vec/vscat/impls/mpi3/vpscat.h>
2064: #define BS 7
2065: #include <../src/vec/vscat/impls/mpi3/vpscat.h>
2066: #define BS 8
2067: #include <../src/vec/vscat/impls/mpi3/vpscat.h>
2068: #define BS 9
2069: #include <../src/vec/vscat/impls/mpi3/vpscat.h>
2070: #define BS 10
2071: #include <../src/vec/vscat/impls/mpi3/vpscat.h>
2072: #define BS 11
2073: #include <../src/vec/vscat/impls/mpi3/vpscat.h>
2074: #define BS 12
2075: #include <../src/vec/vscat/impls/mpi3/vpscat.h>
2076: #define BS bs
2077: #include <../src/vec/vscat/impls/mpi3/vpscat.h>
2079: /* ==========================================================================================*/
2080: /* create parallel to sequential scatter context */
2082: extern PetscErrorCode VecScatterCreateCommon_PtoS_MPI3(VecScatter_MPI_General*,VecScatter_MPI_General*,VecScatter);
2084: /*
2085: bs indicates how many elements there are in each block. Normally this would be 1.
2087: contains check that PetscMPIInt can handle the sizes needed
2088: */
2089: #include <petscbt.h>
2090: PetscErrorCode VecScatterCreateLocal_PtoS_MPI3(PetscInt nx,const PetscInt *inidx,PetscInt ny,const PetscInt *inidy,Vec xin,Vec yin,PetscInt bs,VecScatter ctx)
2091: {
2092: VecScatter_MPI_General *from,*to;
2093: PetscMPIInt size,rank,mrank,imdex,tag,n;
2094: PetscInt *source = NULL,*owners = NULL,nxr;
2095: PetscInt *lowner = NULL,*start = NULL,*lsharedowner = NULL,*sharedstart = NULL,lengthy,lengthx;
2096: PetscMPIInt *nprocs = NULL,nrecvs,nrecvshared;
2097: PetscInt i,j,idx = 0,nsends;
2098: PetscMPIInt *owner = NULL;
2099: PetscInt *starts = NULL,count,slen,slenshared;
2100: PetscInt *rvalues,*svalues,base,*values,nprocslocal,recvtotal,*rsvalues;
2101: PetscMPIInt *onodes1,*olengths1;
2102: MPI_Comm comm;
2103: MPI_Request *send_waits = NULL,*recv_waits = NULL;
2104: MPI_Status recv_status,*send_status;
2105: PetscErrorCode ierr;
2106: PetscShmComm scomm;
2107: PetscMPIInt jj;
2108: MPI_Info info;
2109: MPI_Comm mscomm;
2110: MPI_Win sharedoffsetwin; /* Window that owns sharedspaceoffset */
2111: PetscInt *sharedspaceoffset;
2112: VecScatterType type;
2113: PetscBool mpi3node;
2115: PetscObjectGetNewTag((PetscObject)ctx,&tag);
2116: PetscObjectGetComm((PetscObject)xin,&comm);
2117: MPI_Comm_rank(comm,&rank);
2118: MPI_Comm_size(comm,&size);
2120: VecScatterGetType(ctx,&type);
2121: PetscStrcmp(type,"mpi3node",&mpi3node);
2123: PetscShmCommGet(comm,&scomm);
2124: PetscShmCommGetMpiShmComm(scomm,&mscomm);
2126: MPI_Info_create(&info);
2127: MPI_Info_set(info, "alloc_shared_noncontig", "true");
2129: if (mpi3node) {
2130: /* Check if (parallel) inidx has duplicate indices.
2131: Current VecScatterEndMPI3Node() (case StoP) writes the sequential vector to the parallel vector.
2132: Writing to the same shared location without variable locking
2133: leads to incorrect scattering. See src/vec/vscat/examples/runex2_5 and runex3_5 */
2135: PetscInt *mem,**optr;
2136: MPI_Win swin;
2137: PetscMPIInt msize;
2139: MPI_Comm_size(mscomm,&msize);
2140: MPI_Comm_rank(mscomm,&mrank);
2142: PetscMalloc1(msize,&optr);
2143: MPIU_Win_allocate_shared((nx+1)*sizeof(PetscInt),sizeof(PetscInt),MPI_INFO_NULL,mscomm,&mem,&swin);
2145: /* Write local nx and inidx into mem */
2146: mem[0] = nx;
2147: for (i=1; i<=nx; i++) mem[i] = inidx[i-1];
2148: MPI_Barrier(mscomm); /* sync shared memory */
2150: if (!mrank) {
2151: PetscBT table;
2152: /* sz and dsp_unit are not used. Replacing them with NULL would cause MPI_Win_shared_query() crash */
2153: MPI_Aint sz;
2154: PetscMPIInt dsp_unit;
2155: PetscInt N = xin->map->N;
2157: PetscBTCreate(N,&table); /* may not be scalable */
2158: PetscBTMemzero(N,table);
2160: jj = 0;
2161: while (jj<msize && !ctx->is_duplicate) {
2162: if (jj == mrank) {jj++; continue;}
2163: MPIU_Win_shared_query(swin,jj,&sz,&dsp_unit,&optr[jj]);
2164: for (j=1; j<=optr[jj][0]; j++) { /* optr[jj][0] = nx */
2165: if (PetscBTLookupSet(table,optr[jj][j])) {
2166: ctx->is_duplicate = PETSC_TRUE; /* only mrank=0 has this info., will be checked in VecScatterEndMPI3Node() */
2167: break;
2168: }
2169: }
2170: jj++;
2171: }
2172: PetscBTDestroy(&table);
2173: }
2175: if (swin != MPI_WIN_NULL) {MPI_Win_free(&swin);}
2176: PetscFree(optr);
2177: }
2179: owners = xin->map->range;
2180: VecGetSize(yin,&lengthy);
2181: VecGetSize(xin,&lengthx);
2183: /* first count number of contributors to each processor */
2184: PetscMalloc2(size,&nprocs,nx,&owner);
2185: PetscMemzero(nprocs,size*sizeof(PetscMPIInt));
2187: j = 0;
2188: nsends = 0;
2189: for (i=0; i<nx; i++) {
2190: PetscIntMultError(bs,inidx[i],&idx);
2191: if (idx < owners[j]) j = 0;
2192: for (; j<size; j++) {
2193: if (idx < owners[j+1]) {
2194: if (!nprocs[j]++) nsends++;
2195: owner[i] = j;
2196: break;
2197: }
2198: }
2199: if (j == size) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"ith %D block entry %D not owned by any process, upper bound %D",i,idx,owners[size]);
2200: }
2202: nprocslocal = nprocs[rank];
2203: nprocs[rank] = 0;
2204: if (nprocslocal) nsends--;
2205: /* inform other processors of number of messages and max length*/
2206: PetscGatherNumberOfMessages(comm,NULL,nprocs,&nrecvs);
2207: PetscGatherMessageLengths(comm,nsends,nrecvs,nprocs,&onodes1,&olengths1);
2208: PetscSortMPIIntWithArray(nrecvs,onodes1,olengths1);
2209: recvtotal = 0;
2210: for (i=0; i<nrecvs; i++) {
2211: PetscIntSumError(recvtotal,olengths1[i],&recvtotal);
2212: }
2214: /* post receives: */
2215: PetscMalloc3(recvtotal,&rvalues,nrecvs,&source,nrecvs,&recv_waits);
2216: count = 0;
2217: for (i=0; i<nrecvs; i++) {
2218: MPI_Irecv((rvalues+count),olengths1[i],MPIU_INT,onodes1[i],tag,comm,recv_waits+i);
2219: count += olengths1[i];
2220: }
2222: /* do sends:
2223: 1) starts[i] gives the starting index in svalues for stuff going to
2224: the ith processor
2225: */
2226: nxr = 0;
2227: for (i=0; i<nx; i++) {
2228: if (owner[i] != rank) nxr++;
2229: }
2230: PetscMalloc3(nxr,&svalues,nsends,&send_waits,size+1,&starts);
2232: starts[0] = 0;
2233: for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[i-1];
2234: for (i=0; i<nx; i++) {
2235: if (owner[i] != rank) svalues[starts[owner[i]]++] = bs*inidx[i];
2236: }
2237: starts[0] = 0;
2238: for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[i-1];
2239: count = 0;
2240: for (i=0; i<size; i++) {
2241: if (nprocs[i]) {
2242: MPI_Isend(svalues+starts[i],nprocs[i],MPIU_INT,i,tag,comm,send_waits+count++);
2243: }
2244: }
2246: /* wait on receives; this is everyone who we need to deliver data to */
2247: count = nrecvs;
2248: slen = 0;
2249: nrecvshared = 0;
2250: slenshared = 0;
2251: while (count) {
2252: MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
2253: /* unpack receives into our local space */
2254: MPI_Get_count(&recv_status,MPIU_INT,&n);
2255: PetscShmCommGlobalToLocal(scomm,onodes1[imdex],&jj);
2256: if (jj > -1) {
2257: PetscInfo3(NULL,"[%d] Sending values to shared memory partner %d,global rank %d\n",rank,jj,onodes1[imdex]);
2258: nrecvshared++;
2259: slenshared += n;
2260: }
2261: slen += n;
2262: count--;
2263: }
2265: if (slen != recvtotal) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Total message lengths %D not expected %D",slen,recvtotal);
2267: /* allocate entire send scatter context */
2268: PetscNewLog(ctx,&to);
2269: to->sharedwin = MPI_WIN_NULL;
2270: to->n = nrecvs-nrecvshared;
2272: PetscMalloc1(nrecvs-nrecvshared,&to->requests);
2273: PetscMalloc4(bs*(slen-slenshared),&to->values,slen-slenshared,&to->indices,nrecvs-nrecvshared+1,&to->starts,nrecvs-nrecvshared,&to->procs);
2274: PetscMalloc2(PetscMax(to->n,nsends),&to->sstatus,PetscMax(to->n,nsends),&to->rstatus);
2276: MPI_Comm_size(mscomm,&to->msize);
2277: PetscMalloc1(slenshared,&to->sharedspaceindices);
2278: PetscCalloc1(to->msize+1,&to->sharedspacestarts);
2280: ctx->todata = (void*)to;
2281: to->starts[0] = 0;
2282: to->sharedspacestarts[0] = 0;
2284: /* Allocate shared memory space for shared memory partner communication */
2285: MPIU_Win_allocate_shared(to->msize*sizeof(PetscInt),sizeof(PetscInt),info,mscomm,&sharedspaceoffset,&sharedoffsetwin);
2286: for (i=0; i<to->msize; i++) sharedspaceoffset[i] = -1; /* mark with -1 for later error checking */
2287: if (nrecvs) {
2288: /* move the data into the send scatter */
2289: base = owners[rank];
2290: rsvalues = rvalues;
2291: to->n = 0;
2292: for (i=0; i<nrecvs; i++) {
2293: values = rsvalues;
2294: PetscShmCommGlobalToLocal(scomm,(PetscMPIInt)onodes1[i],&jj);
2295: if (jj > -1) {
2296: to->sharedspacestarts[jj] = to->sharedcnt;
2297: to->sharedspacestarts[jj+1] = to->sharedcnt + olengths1[i];
2298: for (j=0; j<olengths1[i]; j++) to->sharedspaceindices[to->sharedcnt + j] = values[j] - base;
2299: sharedspaceoffset[jj] = to->sharedcnt;
2300: to->sharedcnt += olengths1[i];
2301: PetscInfo4(NULL,"[%d] Sending %d values to shared memory partner %d,global rank %d\n",rank,olengths1[i],jj,onodes1[i]);
2302: } else {
2303: to->starts[to->n+1] = to->starts[to->n] + olengths1[i];
2304: to->procs[to->n] = onodes1[i];
2305: for (j=0; j<olengths1[i]; j++) to->indices[to->starts[to->n] + j] = values[j] - base;
2306: to->n++;
2307: }
2308: rsvalues += olengths1[i];
2309: }
2310: }
2311: PetscFree(olengths1);
2312: PetscFree(onodes1);
2313: PetscFree3(rvalues,source,recv_waits);
2315: if (mpi3node) {
2316: /* to->sharedspace is used only as a flag */
2317: i = 0;
2318: if (to->sharedcnt) i = 1;
2319: MPIU_Win_allocate_shared(i*sizeof(PetscScalar),sizeof(PetscScalar),info,mscomm,&to->sharedspace,&to->sharedwin);
2320: } else { /* mpi3 */
2321: MPIU_Win_allocate_shared(bs*to->sharedcnt*sizeof(PetscScalar),sizeof(PetscScalar),info,mscomm,&to->sharedspace,&to->sharedwin);
2322: }
2323: if (to->sharedwin == MPI_WIN_NULL) SETERRQ(PETSC_COMM_SELF,100,"what the");
2324: MPI_Info_free(&info);
2326: /* allocate entire receive scatter context */
2327: PetscNewLog(ctx,&from);
2328: from->sharedwin = MPI_WIN_NULL;
2329: from->msize = to->msize;
2330: /* we don't actually know the correct value for from->n at this point so we use the upper bound */
2331: from->n = nsends;
2333: PetscMalloc1(nsends,&from->requests);
2334: PetscMalloc4((ny-nprocslocal)*bs,&from->values,ny-nprocslocal,&from->indices,nsends+1,&from->starts,from->n,&from->procs);
2335: ctx->fromdata = (void*)from;
2337: PetscCalloc1(to->msize+1,&from->sharedspacestarts);
2338: /* move data into receive scatter */
2339: PetscMalloc2(size,&lowner,nsends+1,&start);
2340: PetscMalloc2(size,&lsharedowner,to->msize+1,&sharedstart);
2341: count = 0; from->starts[0] = start[0] = 0;
2342: from->sharedcnt = 0;
2343: for (i=0; i<size; i++) {
2344: lsharedowner[i] = -1;
2345: lowner[i] = -1;
2346: if (nprocs[i]) {
2347: PetscShmCommGlobalToLocal(scomm,i,&jj);
2348: if (jj > -1) {
2349: from->sharedspacestarts[jj] = sharedstart[jj] = from->sharedcnt;
2350: from->sharedspacestarts[jj+1] = sharedstart[jj+1] = from->sharedspacestarts[jj] + nprocs[i];
2351: from->sharedcnt += nprocs[i];
2352: lsharedowner[i] = jj;
2353: } else {
2354: lowner[i] = count++;
2355: from->procs[count-1] = i;
2356: from->starts[count] = start[count] = start[count-1] + nprocs[i];
2357: }
2358: }
2359: }
2360: from->n = count;
2362: for (i=0; i<nx; i++) {
2363: if (owner[i] != rank && lowner[owner[i]] > -1) {
2364: from->indices[start[lowner[owner[i]]]++] = bs*inidy[i];
2365: if (bs*inidy[i] >= lengthy) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Scattering past end of TO vector");
2366: }
2367: }
2369: /* copy over appropriate parts of inidy[] into from->sharedspaceindices */
2370: if (mpi3node) {
2371: /* in addition, copy over appropriate parts of inidx[] into 2nd part of from->sharedspaceindices */
2372: PetscInt *sharedspaceindicesx;
2373: PetscMalloc1(2*from->sharedcnt,&from->sharedspaceindices);
2374: sharedspaceindicesx = from->sharedspaceindices + from->sharedcnt;
2375: for (i=0; i<nx; i++) {
2376: if (owner[i] != rank && lsharedowner[owner[i]] > -1) {
2377: if (sharedstart[lsharedowner[owner[i]]] > from->sharedcnt) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_MEM,"won");
2379: from->sharedspaceindices[sharedstart[lsharedowner[owner[i]]]] = bs*inidy[i];
2380: sharedspaceindicesx[sharedstart[lsharedowner[owner[i]]]] = bs*inidx[i]-owners[owner[i]]; /* local inidx */
2381: sharedstart[lsharedowner[owner[i]]]++;
2382: }
2383: }
2384: } else { /* mpi3 */
2385: PetscMalloc1(from->sharedcnt,&from->sharedspaceindices);
2386: for (i=0; i<nx; i++) {
2387: if (owner[i] != rank && lsharedowner[owner[i]] > -1) {
2388: if (sharedstart[lsharedowner[owner[i]]] > from->sharedcnt) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_MEM,"won");
2389: from->sharedspaceindices[sharedstart[lsharedowner[owner[i]]]++] = bs*inidy[i];
2390: }
2391: }
2392: }
2394: PetscFree2(lowner,start);
2395: PetscFree2(lsharedowner,sharedstart);
2396: PetscFree2(nprocs,owner);
2398: /* wait on sends */
2399: if (nsends) {
2400: PetscMalloc1(nsends,&send_status);
2401: MPI_Waitall(nsends,send_waits,send_status);
2402: PetscFree(send_status);
2403: }
2404: PetscFree3(svalues,send_waits,starts);
2406: if (nprocslocal) {
2407: PetscInt nt = from->local.n = to->local.n = nprocslocal;
2408: /* we have a scatter to ourselves */
2409: PetscMalloc1(nt,&to->local.vslots);
2410: PetscMalloc1(nt,&from->local.vslots);
2411: nt = 0;
2412: for (i=0; i<nx; i++) {
2413: idx = bs*inidx[i];
2414: if (idx >= owners[rank] && idx < owners[rank+1]) {
2415: to->local.vslots[nt] = idx - owners[rank];
2416: from->local.vslots[nt++] = bs*inidy[i];
2417: if (bs*inidy[i] >= lengthy) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Scattering past end of TO vector");
2418: }
2419: }
2420: PetscLogObjectMemory((PetscObject)ctx,2*nt*sizeof(PetscInt));
2421: } else {
2422: from->local.n = 0;
2423: from->local.vslots = 0;
2424: to->local.n = 0;
2425: to->local.vslots = 0;
2426: }
2428: /* Get the shared memory address for all processes we will be copying data from */
2429: PetscCalloc1(to->msize,&from->sharedspaces);
2430: PetscCalloc1(to->msize,&from->sharedspacesoffset);
2431: MPI_Comm_rank(mscomm,&mrank);
2432: for (jj=0; jj<to->msize; jj++) {
2433: MPI_Aint isize;
2434: PetscMPIInt disp_unit;
2435: PetscInt *ptr;
2436: MPIU_Win_shared_query(to->sharedwin,jj,&isize,&disp_unit,&from->sharedspaces[jj]);
2437: MPIU_Win_shared_query(sharedoffsetwin,jj,&isize,&disp_unit,&ptr);
2438: from->sharedspacesoffset[jj] = ptr[mrank];
2439: }
2440: MPI_Win_free(&sharedoffsetwin);
2442: if (mpi3node && !from->sharedspace) {
2443: /* comput from->notdone to be used by VecScatterEndMPI3Node() */
2444: PetscInt notdone = 0;
2445: for (i=0; i<from->msize; i++) {
2446: if (from->sharedspacesoffset && from->sharedspacesoffset[i] > -1) {
2447: notdone++;
2448: }
2449: }
2450: from->notdone = notdone;
2451: }
2453: from->local.nonmatching_computed = PETSC_FALSE;
2454: from->local.n_nonmatching = 0;
2455: from->local.slots_nonmatching = 0;
2456: to->local.nonmatching_computed = PETSC_FALSE;
2457: to->local.n_nonmatching = 0;
2458: to->local.slots_nonmatching = 0;
2460: from->format = VEC_SCATTER_MPI_GENERAL;
2461: to->format = VEC_SCATTER_MPI_GENERAL;
2462: from->bs = bs;
2463: to->bs = bs;
2465: VecScatterCreateCommon_PtoS_MPI3(from,to,ctx);
2466: return(0);
2467: }
2469: /*
2470: bs indicates how many elements there are in each block. Normally this would be 1.
2471: */
2472: PetscErrorCode VecScatterCreateCommon_PtoS_MPI3(VecScatter_MPI_General *from,VecScatter_MPI_General *to,VecScatter ctx)
2473: {
2474: MPI_Comm comm;
2475: PetscMPIInt tag = ((PetscObject)ctx)->tag, tagr;
2476: PetscInt bs = to->bs;
2477: PetscMPIInt size;
2478: PetscInt i, n;
2480: VecScatterType type;
2481: PetscBool mpi3node;
2484: PetscObjectGetComm((PetscObject)ctx,&comm);
2485: PetscObjectGetNewTag((PetscObject)ctx,&tagr);
2486: ctx->ops->destroy = VecScatterDestroy_PtoP_MPI3;
2488: MPI_Comm_size(comm,&size);
2489: /* check if the receives are ALL going into contiguous locations; if so can skip indexing */
2490: to->contiq = PETSC_FALSE;
2491: n = from->starts[from->n];
2492: from->contiq = PETSC_TRUE;
2493: for (i=1; i<n; i++) {
2494: if (from->indices[i] != from->indices[i-1] + bs) {
2495: from->contiq = PETSC_FALSE;
2496: break;
2497: }
2498: }
2500: ctx->ops->copy = VecScatterCopy_PtoP_AllToAll;
2501: {
2502: PetscInt *sstarts = to->starts, *rstarts = from->starts;
2503: PetscMPIInt *sprocs = to->procs, *rprocs = from->procs;
2504: MPI_Request *swaits = to->requests,*rwaits = from->requests;
2505: MPI_Request *rev_swaits,*rev_rwaits;
2506: PetscScalar *Ssvalues = to->values, *Srvalues = from->values;
2508: /* allocate additional wait variables for the "reverse" scatter */
2509: PetscMalloc1(to->n,&rev_rwaits);
2510: PetscMalloc1(from->n,&rev_swaits);
2511: to->rev_requests = rev_rwaits;
2512: from->rev_requests = rev_swaits;
2514: for (i=0; i<from->n; i++) {
2515: MPI_Send_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tagr,comm,rev_swaits+i);
2516: }
2518: for (i=0; i<to->n; i++) {
2519: MPI_Send_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
2520: }
2521: /* Register receives for scatter and reverse */
2522: for (i=0; i<from->n; i++) {
2523: MPI_Recv_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);
2524: }
2525: for (i=0; i<to->n; i++) {
2526: MPI_Recv_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tagr,comm,rev_rwaits+i);
2527: }
2528: ctx->ops->copy = VecScatterCopy_PtoP_X;
2529: }
2530: PetscInfo1(ctx,"Using blocksize %D scatter\n",bs);
2532: #if defined(PETSC_USE_DEBUG)
2533: MPIU_Allreduce(&bs,&i,1,MPIU_INT,MPI_MIN,PetscObjectComm((PetscObject)ctx));
2534: MPIU_Allreduce(&bs,&n,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)ctx));
2535: if (bs!=i || bs!=n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Blocks size %D != %D or %D",bs,i,n);
2536: #endif
2538: VecScatterGetType(ctx,&type);
2539: PetscStrcmp(type,"mpi3node",&mpi3node);
2541: if (mpi3node) {
2542: switch (bs) {
2543: case 12:
2544: ctx->ops->begin = VecScatterBeginMPI3Node_12;
2545: ctx->ops->end = VecScatterEndMPI3Node_12;
2546: break;
2547: case 11:
2548: ctx->ops->begin = VecScatterBeginMPI3Node_11;
2549: ctx->ops->end = VecScatterEndMPI3Node_11;
2550: break;
2551: case 10:
2552: ctx->ops->begin = VecScatterBeginMPI3Node_10;
2553: ctx->ops->end = VecScatterEndMPI3Node_10;
2554: break;
2555: case 9:
2556: ctx->ops->begin = VecScatterBeginMPI3Node_9;
2557: ctx->ops->end = VecScatterEndMPI3Node_9;
2558: break;
2559: case 8:
2560: ctx->ops->begin = VecScatterBeginMPI3Node_8;
2561: ctx->ops->end = VecScatterEndMPI3Node_8;
2562: break;
2563: case 7:
2564: ctx->ops->begin = VecScatterBeginMPI3Node_7;
2565: ctx->ops->end = VecScatterEndMPI3Node_7;
2566: break;
2567: case 6:
2568: ctx->ops->begin = VecScatterBeginMPI3Node_6;
2569: ctx->ops->end = VecScatterEndMPI3Node_6;
2570: break;
2571: case 5:
2572: ctx->ops->begin = VecScatterBeginMPI3Node_5;
2573: ctx->ops->end = VecScatterEndMPI3Node_5;
2574: break;
2575: case 4:
2576: ctx->ops->begin = VecScatterBeginMPI3Node_4;
2577: ctx->ops->end = VecScatterEndMPI3Node_4;
2578: break;
2579: case 3:
2580: ctx->ops->begin = VecScatterBeginMPI3Node_3;
2581: ctx->ops->end = VecScatterEndMPI3Node_3;
2582: break;
2583: case 2:
2584: ctx->ops->begin = VecScatterBeginMPI3Node_2;
2585: ctx->ops->end = VecScatterEndMPI3Node_2;
2586: break;
2587: case 1:
2588: ctx->ops->begin = VecScatterBeginMPI3Node_1;
2589: ctx->ops->end = VecScatterEndMPI3Node_1;
2590: break;
2591: default:
2592: ctx->ops->begin = VecScatterBegin_bs;
2593: ctx->ops->end = VecScatterEnd_bs;
2594: }
2595: } else { /* !mpi3node */
2597: switch (bs) {
2598: case 12:
2599: ctx->ops->begin = VecScatterBegin_12;
2600: ctx->ops->end = VecScatterEnd_12;
2601: break;
2602: case 11:
2603: ctx->ops->begin = VecScatterBegin_11;
2604: ctx->ops->end = VecScatterEnd_11;
2605: break;
2606: case 10:
2607: ctx->ops->begin = VecScatterBegin_10;
2608: ctx->ops->end = VecScatterEnd_10;
2609: break;
2610: case 9:
2611: ctx->ops->begin = VecScatterBegin_9;
2612: ctx->ops->end = VecScatterEnd_9;
2613: break;
2614: case 8:
2615: ctx->ops->begin = VecScatterBegin_8;
2616: ctx->ops->end = VecScatterEnd_8;
2617: break;
2618: case 7:
2619: ctx->ops->begin = VecScatterBegin_7;
2620: ctx->ops->end = VecScatterEnd_7;
2621: break;
2622: case 6:
2623: ctx->ops->begin = VecScatterBegin_6;
2624: ctx->ops->end = VecScatterEnd_6;
2625: break;
2626: case 5:
2627: ctx->ops->begin = VecScatterBegin_5;
2628: ctx->ops->end = VecScatterEnd_5;
2629: break;
2630: case 4:
2631: ctx->ops->begin = VecScatterBegin_4;
2632: ctx->ops->end = VecScatterEnd_4;
2633: break;
2634: case 3:
2635: ctx->ops->begin = VecScatterBegin_3;
2636: ctx->ops->end = VecScatterEnd_3;
2637: break;
2638: case 2:
2639: ctx->ops->begin = VecScatterBegin_2;
2640: ctx->ops->end = VecScatterEnd_2;
2641: break;
2642: case 1:
2643: if (mpi3node) {
2644: ctx->ops->begin = VecScatterBeginMPI3Node_1;
2645: ctx->ops->end = VecScatterEndMPI3Node_1;
2646: } else {
2647: ctx->ops->begin = VecScatterBegin_1;
2648: ctx->ops->end = VecScatterEnd_1;
2649: }
2650: break;
2651: default:
2652: ctx->ops->begin = VecScatterBegin_bs;
2653: ctx->ops->end = VecScatterEnd_bs;
2654: }
2655: }
2657: ctx->ops->view = VecScatterView_MPI;
2659: /* try to optimize PtoP vecscatter with memcpy's */
2660: VecScatterMemcpyPlanCreate_PtoP(to,from);
2661: return(0);
2662: }
2664: /* ------------------------------------------------------------------------------------*/
2665: /*
2666: Scatter from local Seq vectors to a parallel vector.
2667: Reverses the order of the arguments, calls VecScatterCreateLocal_PtoS() then
2668: reverses the result.
2669: */
2670: PetscErrorCode VecScatterCreateLocal_StoP_MPI3(PetscInt nx,const PetscInt *inidx,PetscInt ny,const PetscInt *inidy,Vec xin,Vec yin,PetscInt bs,VecScatter ctx)
2671: {
2672: PetscErrorCode ierr;
2673: MPI_Request *waits;
2674: VecScatter_MPI_General *to,*from;
2677: VecScatterCreateLocal_PtoS_MPI3(ny,inidy,nx,inidx,yin,xin,bs,ctx);
2678: to = (VecScatter_MPI_General*)ctx->fromdata;
2679: from = (VecScatter_MPI_General*)ctx->todata;
2680: ctx->todata = (void*)to;
2681: ctx->fromdata = (void*)from;
2682: /* these two are special, they are ALWAYS stored in to struct */
2683: to->sstatus = from->sstatus;
2684: to->rstatus = from->rstatus;
2686: from->sstatus = 0;
2687: from->rstatus = 0;
2688: waits = from->rev_requests;
2689: from->rev_requests = from->requests;
2690: from->requests = waits;
2691: waits = to->rev_requests;
2692: to->rev_requests = to->requests;
2693: to->requests = waits;
2694: return(0);
2695: }
2697: /* ---------------------------------------------------------------------------------*/
2698: PetscErrorCode VecScatterCreateLocal_PtoP_MPI3(PetscInt nx,const PetscInt *inidx,PetscInt ny,const PetscInt *inidy,Vec xin,Vec yin,PetscInt bs,VecScatter ctx)
2699: {
2701: PetscMPIInt size,rank,tag,imdex,n;
2702: PetscInt *owners = xin->map->range;
2703: PetscMPIInt *nprocs = NULL;
2704: PetscInt i,j,idx,nsends,*local_inidx = NULL,*local_inidy = NULL;
2705: PetscMPIInt *owner = NULL;
2706: PetscInt *starts = NULL,count,slen;
2707: PetscInt *rvalues = NULL,*svalues = NULL,base,*values = NULL,*rsvalues,recvtotal,lastidx;
2708: PetscMPIInt *onodes1,*olengths1,nrecvs;
2709: MPI_Comm comm;
2710: MPI_Request *send_waits = NULL,*recv_waits = NULL;
2711: MPI_Status recv_status,*send_status = NULL;
2712: PetscBool duplicate = PETSC_FALSE;
2713: #if defined(PETSC_USE_DEBUG)
2714: PetscBool found = PETSC_FALSE;
2715: #endif
2718: PetscObjectGetNewTag((PetscObject)ctx,&tag);
2719: PetscObjectGetComm((PetscObject)xin,&comm);
2720: MPI_Comm_size(comm,&size);
2721: MPI_Comm_rank(comm,&rank);
2722: if (size == 1) {
2723: VecScatterCreateLocal_StoP_MPI3(nx,inidx,ny,inidy,xin,yin,bs,ctx);
2724: return(0);
2725: }
2727: /*
2728: Each processor ships off its inidx[j] and inidy[j] to the appropriate processor
2729: They then call the StoPScatterCreate()
2730: */
2731: /* first count number of contributors to each processor */
2732: PetscMalloc3(size,&nprocs,nx,&owner,(size+1),&starts);
2733: PetscMemzero(nprocs,size*sizeof(PetscMPIInt));
2735: lastidx = -1;
2736: j = 0;
2737: for (i=0; i<nx; i++) {
2738: /* if indices are NOT locally sorted, need to start search at the beginning */
2739: if (lastidx > (idx = bs*inidx[i])) j = 0;
2740: lastidx = idx;
2741: for (; j<size; j++) {
2742: if (idx >= owners[j] && idx < owners[j+1]) {
2743: nprocs[j]++;
2744: owner[i] = j;
2745: #if defined(PETSC_USE_DEBUG)
2746: found = PETSC_TRUE;
2747: #endif
2748: break;
2749: }
2750: }
2751: #if defined(PETSC_USE_DEBUG)
2752: if (!found) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index %D out of range",idx);
2753: found = PETSC_FALSE;
2754: #endif
2755: }
2756: nsends = 0;
2757: for (i=0; i<size; i++) nsends += (nprocs[i] > 0);
2759: /* inform other processors of number of messages and max length*/
2760: PetscGatherNumberOfMessages(comm,NULL,nprocs,&nrecvs);
2761: PetscGatherMessageLengths(comm,nsends,nrecvs,nprocs,&onodes1,&olengths1);
2762: PetscSortMPIIntWithArray(nrecvs,onodes1,olengths1);
2763: recvtotal = 0;
2764: for (i=0; i<nrecvs; i++) {
2765: PetscIntSumError(recvtotal,olengths1[i],&recvtotal);
2766: }
2768: /* post receives: */
2769: PetscMalloc5(2*recvtotal,&rvalues,2*nx,&svalues,nrecvs,&recv_waits,nsends,&send_waits,nsends,&send_status);
2771: count = 0;
2772: for (i=0; i<nrecvs; i++) {
2773: MPI_Irecv((rvalues+2*count),2*olengths1[i],MPIU_INT,onodes1[i],tag,comm,recv_waits+i);
2774: PetscIntSumError(count,olengths1[i],&count);
2775: }
2776: PetscFree(onodes1);
2778: /* do sends:
2779: 1) starts[i] gives the starting index in svalues for stuff going to
2780: the ith processor
2781: */
2782: starts[0]= 0;
2783: for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[i-1];
2784: for (i=0; i<nx; i++) {
2785: svalues[2*starts[owner[i]]] = bs*inidx[i];
2786: svalues[1 + 2*starts[owner[i]]++] = bs*inidy[i];
2787: }
2789: starts[0] = 0;
2790: for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[i-1];
2791: count = 0;
2792: for (i=0; i<size; i++) {
2793: if (nprocs[i]) {
2794: MPI_Isend(svalues+2*starts[i],2*nprocs[i],MPIU_INT,i,tag,comm,send_waits+count);
2795: count++;
2796: }
2797: }
2798: PetscFree3(nprocs,owner,starts);
2800: /* wait on receives */
2801: count = nrecvs;
2802: slen = 0;
2803: while (count) {
2804: MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
2805: /* unpack receives into our local space */
2806: MPI_Get_count(&recv_status,MPIU_INT,&n);
2807: slen += n/2;
2808: count--;
2809: }
2810: if (slen != recvtotal) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Total message lengths %D not as expected %D",slen,recvtotal);
2812: PetscMalloc2(slen,&local_inidx,slen,&local_inidy);
2813: base = owners[rank];
2814: count = 0;
2815: rsvalues = rvalues;
2816: for (i=0; i<nrecvs; i++) {
2817: values = rsvalues;
2818: rsvalues += 2*olengths1[i];
2819: for (j=0; j<olengths1[i]; j++) {
2820: local_inidx[count] = values[2*j] - base;
2821: local_inidy[count++] = values[2*j+1];
2822: }
2823: }
2824: PetscFree(olengths1);
2826: /* wait on sends */
2827: if (nsends) {MPI_Waitall(nsends,send_waits,send_status);}
2828: PetscFree5(rvalues,svalues,recv_waits,send_waits,send_status);
2830: /*
2831: should sort and remove duplicates from local_inidx,local_inidy
2832: */
2833: #if defined(do_it_slow)
2834: /* sort on the from index */
2835: PetscSortIntWithArray(slen,local_inidx,local_inidy);
2836: start = 0;
2837: while (start < slen) {
2838: count = start+1;
2839: last = local_inidx[start];
2840: while (count < slen && last == local_inidx[count]) count++;
2841: if (count > start + 1) { /* found 2 or more same local_inidx[] in a row */
2842: /* sort on to index */
2843: PetscSortInt(count-start,local_inidy+start);
2844: }
2845: /* remove duplicates; not most efficient way, but probably good enough */
2846: i = start;
2847: while (i < count-1) {
2848: if (local_inidy[i] != local_inidy[i+1]) i++;
2849: else { /* found a duplicate */
2850: duplicate = PETSC_TRUE;
2851: for (j=i; j<slen-1; j++) {
2852: local_inidx[j] = local_inidx[j+1];
2853: local_inidy[j] = local_inidy[j+1];
2854: }
2855: slen--;
2856: count--;
2857: }
2858: }
2859: start = count;
2860: }
2861: #endif
2862: if (duplicate) {
2863: PetscInfo(ctx,"Duplicate from to indices passed in VecScatterCreate(), they are ignored\n");
2864: }
2865: VecScatterCreateLocal_StoP_MPI3(slen,local_inidx,slen,local_inidy,xin,yin,bs,ctx);
2866: PetscFree2(local_inidx,local_inidy);
2867: return(0);
2868: }
2870: PetscErrorCode VecScatterSetUp_MPI3(VecScatter ctx)
2871: {
2875: VecScatterSetUp_vectype_private(ctx,VecScatterCreateLocal_PtoS_MPI3,VecScatterCreateLocal_StoP_MPI3,VecScatterCreateLocal_PtoP_MPI3);
2876: return(0);
2877: }
2879: PetscErrorCode VecScatterCreate_MPI3(VecScatter ctx)
2880: {
2881: PetscErrorCode ierr;
2884: ctx->ops->setup = VecScatterSetUp_MPI3;
2885: PetscObjectChangeTypeName((PetscObject)ctx,VECSCATTERMPI3);
2886: PetscInfo(ctx,"Using MPI3 for vector scatter\n");
2887: return(0);
2888: }
2890: PetscErrorCode VecScatterCreate_MPI3Node(VecScatter ctx)
2891: {
2892: PetscErrorCode ierr;
2895: ctx->ops->setup = VecScatterSetUp_MPI3;
2896: PetscObjectChangeTypeName((PetscObject)ctx,VECSCATTERMPI3NODE);
2897: PetscInfo(ctx,"Using MPI3NODE for vector scatter\n");
2898: return(0);
2899: }