Actual source code: vpscat_mpi1.c
petsc-3.9.4 2018-09-11
2: /*
3: Defines parallel vector scatters using MPI1.
4: */
6: #include <../src/vec/vec/impls/dvecimpl.h>
7: #include <../src/vec/vec/impls/mpi/pvecimpl.h>
8: #include <petscsf.h>
10: PetscErrorCode VecScatterView_MPI_MPI1(VecScatter ctx,PetscViewer viewer)
11: {
12: VecScatter_MPI_General *to =(VecScatter_MPI_General*)ctx->todata;
13: VecScatter_MPI_General *from=(VecScatter_MPI_General*)ctx->fromdata;
14: PetscErrorCode ierr;
15: PetscInt i;
16: PetscMPIInt rank;
17: PetscViewerFormat format;
18: PetscBool iascii;
21: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
22: if (iascii) {
23: MPI_Comm_rank(PetscObjectComm((PetscObject)ctx),&rank);
24: PetscViewerGetFormat(viewer,&format);
25: if (format == PETSC_VIEWER_ASCII_INFO) {
26: PetscInt nsend_max,nrecv_max,lensend_max,lenrecv_max,alldata,itmp;
28: MPI_Reduce(&to->n,&nsend_max,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)ctx));
29: MPI_Reduce(&from->n,&nrecv_max,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)ctx));
30: itmp = to->starts[to->n+1];
31: MPI_Reduce(&itmp,&lensend_max,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)ctx));
32: itmp = from->starts[from->n+1];
33: MPI_Reduce(&itmp,&lenrecv_max,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)ctx));
34: MPI_Reduce(&itmp,&alldata,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)ctx));
36: PetscViewerASCIIPrintf(viewer,"VecScatter statistics\n");
37: PetscViewerASCIIPrintf(viewer," Maximum number sends %D\n",nsend_max);
38: PetscViewerASCIIPrintf(viewer," Maximum number receives %D\n",nrecv_max);
39: PetscViewerASCIIPrintf(viewer," Maximum data sent %D\n",(int)(lensend_max*to->bs*sizeof(PetscScalar)));
40: PetscViewerASCIIPrintf(viewer," Maximum data received %D\n",(int)(lenrecv_max*to->bs*sizeof(PetscScalar)));
41: PetscViewerASCIIPrintf(viewer," Total data sent %D\n",(int)(alldata*to->bs*sizeof(PetscScalar)));
43: } else {
44: PetscViewerASCIIPushSynchronized(viewer);
45: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number sends = %D; Number to self = %D\n",rank,to->n,to->local.n);
46: if (to->n) {
47: for (i=0; i<to->n; i++) {
48: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D length = %D to whom %d\n",rank,i,to->starts[i+1]-to->starts[i],to->procs[i]);
49: }
50: PetscViewerASCIISynchronizedPrintf(viewer,"Now the indices for all remote sends (in order by process sent to)\n");
51: for (i=0; i<to->starts[to->n]; i++) {
52: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D \n",rank,to->indices[i]);
53: }
54: }
56: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number receives = %D; Number from self = %D\n",rank,from->n,from->local.n);
57: if (from->n) {
58: for (i=0; i<from->n; i++) {
59: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D length %D from whom %d\n",rank,i,from->starts[i+1]-from->starts[i],from->procs[i]);
60: }
62: PetscViewerASCIISynchronizedPrintf(viewer,"Now the indices for all remote receives (in order by process received from)\n");
63: for (i=0; i<from->starts[from->n]; i++) {
64: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D \n",rank,from->indices[i]);
65: }
66: }
67: if (to->local.n) {
68: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Indices for local part of scatter\n",rank);
69: for (i=0; i<to->local.n; i++) { /* the to and from have the opposite meaning from what you would expect */
70: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] From %D to %D \n",rank,to->local.vslots[i],from->local.vslots[i]);
71: }
72: }
74: PetscViewerFlush(viewer);
75: PetscViewerASCIIPopSynchronized(viewer);
76: }
77: }
78: return(0);
79: }
81: /* -------------------------------------------------------------------------------------*/
82: PetscErrorCode VecScatterDestroy_PtoP_MPI1(VecScatter ctx)
83: {
84: VecScatter_MPI_General *to = (VecScatter_MPI_General*)ctx->todata;
85: VecScatter_MPI_General *from = (VecScatter_MPI_General*)ctx->fromdata;
86: PetscErrorCode ierr;
87: PetscInt i;
90: if (to->use_readyreceiver) {
91: /*
92: Since we have already posted sends we must cancel them before freeing
93: the requests
94: */
95: for (i=0; i<from->n; i++) {
96: MPI_Cancel(from->requests+i);
97: }
98: for (i=0; i<to->n; i++) {
99: MPI_Cancel(to->rev_requests+i);
100: }
101: MPI_Waitall(from->n,from->requests,to->rstatus);
102: MPI_Waitall(to->n,to->rev_requests,to->rstatus);
103: }
105: #if defined(PETSC_HAVE_MPI_ALLTOALLW) && !defined(PETSC_USE_64BIT_INDICES)
106: if (to->use_alltoallw) {
107: for (i=0; i<to->n; i++) {
108: MPI_Type_free(to->types+to->procs[i]);
109: }
110: PetscFree3(to->wcounts,to->wdispls,to->types);
111: if (!from->contiq) {
112: for (i=0; i<from->n; i++) {
113: MPI_Type_free(from->types+from->procs[i]);
114: }
115: }
116: PetscFree3(from->wcounts,from->wdispls,from->types);
117: }
118: #endif
120: #if defined(PETSC_HAVE_MPI_WIN_CREATE_FEATURE)
121: if (to->use_window) {
122: MPI_Win_free(&from->window);
123: MPI_Win_free(&to->window);
124: PetscFree(from->winstarts);
125: PetscFree(to->winstarts);
126: }
127: #endif
129: if (to->use_alltoallv) {
130: PetscFree2(to->counts,to->displs);
131: PetscFree2(from->counts,from->displs);
132: }
134: /* release MPI resources obtained with MPI_Send_init() and MPI_Recv_init() */
135: /*
136: IBM's PE version of MPI has a bug where freeing these guys will screw up later
137: message passing.
138: */
139: #if !defined(PETSC_HAVE_BROKEN_REQUEST_FREE)
140: if (!to->use_alltoallv && !to->use_window) { /* currently the to->requests etc are ALWAYS allocated even if not used */
141: if (to->requests) {
142: for (i=0; i<to->n; i++) {
143: MPI_Request_free(to->requests + i);
144: }
145: }
146: if (to->rev_requests) {
147: for (i=0; i<to->n; i++) {
148: MPI_Request_free(to->rev_requests + i);
149: }
150: }
151: }
152: /*
153: MPICH could not properly cancel requests thus with ready receiver mode we
154: cannot free the requests. It may be fixed now, if not then put the following
155: code inside a if (!to->use_readyreceiver) {
156: */
157: if (!to->use_alltoallv && !to->use_window) { /* currently the from->requests etc are ALWAYS allocated even if not used */
158: if (from->requests) {
159: for (i=0; i<from->n; i++) {
160: MPI_Request_free(from->requests + i);
161: }
162: }
164: if (from->rev_requests) {
165: for (i=0; i<from->n; i++) {
166: MPI_Request_free(from->rev_requests + i);
167: }
168: }
169: }
170: #endif
172: PetscFree(to->local.vslots);
173: PetscFree(from->local.vslots);
174: PetscFree2(to->counts,to->displs);
175: PetscFree2(from->counts,from->displs);
176: PetscFree(to->local.slots_nonmatching);
177: PetscFree(from->local.slots_nonmatching);
178: PetscFree(to->rev_requests);
179: PetscFree(from->rev_requests);
180: PetscFree(to->requests);
181: PetscFree(from->requests);
182: PetscFree4(to->values,to->indices,to->starts,to->procs);
183: PetscFree2(to->sstatus,to->rstatus);
184: PetscFree4(from->values,from->indices,from->starts,from->procs);
185: PetscFree(from);
186: PetscFree(to);
187: return(0);
188: }
192: /* --------------------------------------------------------------------------------------*/
193: /*
194: Special optimization to see if the local part of the scatter is actually
195: a copy. The scatter routines call PetscMemcpy() instead.
197: */
198: PetscErrorCode VecScatterLocalOptimizeCopy_Private_MPI1(VecScatter scatter,VecScatter_Seq_General *to,VecScatter_Seq_General *from,PetscInt bs)
199: {
200: PetscInt n = to->n,i,*to_slots = to->vslots,*from_slots = from->vslots;
201: PetscInt to_start,from_start;
205: to_start = to_slots[0];
206: from_start = from_slots[0];
208: for (i=1; i<n; i++) {
209: to_start += bs;
210: from_start += bs;
211: if (to_slots[i] != to_start) return(0);
212: if (from_slots[i] != from_start) return(0);
213: }
214: to->is_copy = PETSC_TRUE;
215: to->copy_start = to_slots[0];
216: to->copy_length = bs*sizeof(PetscScalar)*n;
217: from->is_copy = PETSC_TRUE;
218: from->copy_start = from_slots[0];
219: from->copy_length = bs*sizeof(PetscScalar)*n;
220: PetscInfo(scatter,"Local scatter is a copy, optimizing for it\n");
221: return(0);
222: }
224: /* --------------------------------------------------------------------------------------*/
226: PetscErrorCode VecScatterCopy_PtoP_X_MPI1(VecScatter in,VecScatter out)
227: {
228: VecScatter_MPI_General *in_to = (VecScatter_MPI_General*)in->todata;
229: VecScatter_MPI_General *in_from = (VecScatter_MPI_General*)in->fromdata,*out_to,*out_from;
230: PetscErrorCode ierr;
231: PetscInt ny,bs = in_from->bs;
234: out->ops->begin = in->ops->begin;
235: out->ops->end = in->ops->end;
236: out->ops->copy = in->ops->copy;
237: out->ops->destroy = in->ops->destroy;
238: out->ops->view = in->ops->view;
240: /* allocate entire send scatter context */
241: PetscNewLog(out,&out_to);
242: PetscNewLog(out,&out_from);
244: ny = in_to->starts[in_to->n];
245: out_to->n = in_to->n;
246: out_to->format = in_to->format;
247: out_to->sendfirst = in_to->sendfirst;
249: PetscMalloc1(out_to->n,&out_to->requests);
250: PetscMalloc4(bs*ny,&out_to->values,ny,&out_to->indices,out_to->n+1,&out_to->starts,out_to->n,&out_to->procs);
251: PetscMalloc2(PetscMax(in_to->n,in_from->n),&out_to->sstatus,PetscMax(in_to->n,in_from->n),&out_to->rstatus);
252: PetscMemcpy(out_to->indices,in_to->indices,ny*sizeof(PetscInt));
253: PetscMemcpy(out_to->starts,in_to->starts,(out_to->n+1)*sizeof(PetscInt));
254: PetscMemcpy(out_to->procs,in_to->procs,(out_to->n)*sizeof(PetscMPIInt));
256: out->todata = (void*)out_to;
257: out_to->local.n = in_to->local.n;
258: out_to->local.nonmatching_computed = PETSC_FALSE;
259: out_to->local.n_nonmatching = 0;
260: out_to->local.slots_nonmatching = 0;
261: if (in_to->local.n) {
262: PetscMalloc1(in_to->local.n,&out_to->local.vslots);
263: PetscMalloc1(in_from->local.n,&out_from->local.vslots);
264: PetscMemcpy(out_to->local.vslots,in_to->local.vslots,in_to->local.n*sizeof(PetscInt));
265: PetscMemcpy(out_from->local.vslots,in_from->local.vslots,in_from->local.n*sizeof(PetscInt));
266: } else {
267: out_to->local.vslots = 0;
268: out_from->local.vslots = 0;
269: }
271: /* allocate entire receive context */
272: out_from->format = in_from->format;
273: ny = in_from->starts[in_from->n];
274: out_from->n = in_from->n;
275: out_from->sendfirst = in_from->sendfirst;
277: PetscMalloc1(out_from->n,&out_from->requests);
278: PetscMalloc4(ny*bs,&out_from->values,ny,&out_from->indices,out_from->n+1,&out_from->starts,out_from->n,&out_from->procs);
279: PetscMemcpy(out_from->indices,in_from->indices,ny*sizeof(PetscInt));
280: PetscMemcpy(out_from->starts,in_from->starts,(out_from->n+1)*sizeof(PetscInt));
281: PetscMemcpy(out_from->procs,in_from->procs,(out_from->n)*sizeof(PetscMPIInt));
283: out->fromdata = (void*)out_from;
284: out_from->local.n = in_from->local.n;
285: out_from->local.nonmatching_computed = PETSC_FALSE;
286: out_from->local.n_nonmatching = 0;
287: out_from->local.slots_nonmatching = 0;
289: /*
290: set up the request arrays for use with isend_init() and irecv_init()
291: */
292: {
293: PetscMPIInt tag;
294: MPI_Comm comm;
295: PetscInt *sstarts = out_to->starts, *rstarts = out_from->starts;
296: PetscMPIInt *sprocs = out_to->procs, *rprocs = out_from->procs;
297: PetscInt i;
298: PetscBool flg;
299: MPI_Request *swaits = out_to->requests,*rwaits = out_from->requests;
300: MPI_Request *rev_swaits,*rev_rwaits;
301: PetscScalar *Ssvalues = out_to->values, *Srvalues = out_from->values;
303: PetscMalloc1(in_to->n,&out_to->rev_requests);
304: PetscMalloc1(in_from->n,&out_from->rev_requests);
306: rev_rwaits = out_to->rev_requests;
307: rev_swaits = out_from->rev_requests;
309: out_from->bs = out_to->bs = bs;
310: tag = ((PetscObject)out)->tag;
311: PetscObjectGetComm((PetscObject)out,&comm);
313: /* Register the receives that you will use later (sends for scatter reverse) */
314: for (i=0; i<out_from->n; i++) {
315: MPI_Recv_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);
316: MPI_Send_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tag,comm,rev_swaits+i);
317: }
319: flg = PETSC_FALSE;
320: PetscOptionsGetBool(NULL,NULL,"-vecscatter_rsend",&flg,NULL);
321: if (flg) {
322: out_to->use_readyreceiver = PETSC_TRUE;
323: out_from->use_readyreceiver = PETSC_TRUE;
324: for (i=0; i<out_to->n; i++) {
325: MPI_Rsend_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
326: }
327: if (out_from->n) {MPI_Startall_irecv(out_from->starts[out_from->n]*out_from->bs,out_from->n,out_from->requests);}
328: MPI_Barrier(comm);
329: PetscInfo(in,"Using VecScatter ready receiver mode\n");
330: } else {
331: out_to->use_readyreceiver = PETSC_FALSE;
332: out_from->use_readyreceiver = PETSC_FALSE;
333: flg = PETSC_FALSE;
334: PetscOptionsGetBool(NULL,NULL,"-vecscatter_ssend",&flg,NULL);
335: if (flg) {
336: PetscInfo(in,"Using VecScatter Ssend mode\n");
337: }
338: for (i=0; i<out_to->n; i++) {
339: if (!flg) {
340: MPI_Send_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
341: } else {
342: MPI_Ssend_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
343: }
344: }
345: }
346: /* Register receives for scatter reverse */
347: for (i=0; i<out_to->n; i++) {
348: MPI_Recv_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,rev_rwaits+i);
349: }
350: }
351: return(0);
352: }
354: PetscErrorCode VecScatterCopy_PtoP_AllToAll_MPI1(VecScatter in,VecScatter out)
355: {
356: VecScatter_MPI_General *in_to = (VecScatter_MPI_General*)in->todata;
357: VecScatter_MPI_General *in_from = (VecScatter_MPI_General*)in->fromdata,*out_to,*out_from;
358: PetscErrorCode ierr;
359: PetscInt ny,bs = in_from->bs;
360: PetscMPIInt size;
363: MPI_Comm_size(PetscObjectComm((PetscObject)in),&size);
365: out->ops->begin = in->ops->begin;
366: out->ops->end = in->ops->end;
367: out->ops->copy = in->ops->copy;
368: out->ops->destroy = in->ops->destroy;
369: out->ops->view = in->ops->view;
371: /* allocate entire send scatter context */
372: PetscNewLog(out,&out_to);
373: PetscNewLog(out,&out_from);
375: ny = in_to->starts[in_to->n];
376: out_to->n = in_to->n;
377: out_to->format = in_to->format;
378: out_to->sendfirst = in_to->sendfirst;
380: PetscMalloc1(out_to->n,&out_to->requests);
381: PetscMalloc4(bs*ny,&out_to->values,ny,&out_to->indices,out_to->n+1,&out_to->starts,out_to->n,&out_to->procs);
382: PetscMalloc2(PetscMax(in_to->n,in_from->n),&out_to->sstatus,PetscMax(in_to->n,in_from->n),&out_to->rstatus);
383: PetscMemcpy(out_to->indices,in_to->indices,ny*sizeof(PetscInt));
384: PetscMemcpy(out_to->starts,in_to->starts,(out_to->n+1)*sizeof(PetscInt));
385: PetscMemcpy(out_to->procs,in_to->procs,(out_to->n)*sizeof(PetscMPIInt));
387: out->todata = (void*)out_to;
388: out_to->local.n = in_to->local.n;
389: out_to->local.nonmatching_computed = PETSC_FALSE;
390: out_to->local.n_nonmatching = 0;
391: out_to->local.slots_nonmatching = 0;
392: if (in_to->local.n) {
393: PetscMalloc1(in_to->local.n,&out_to->local.vslots);
394: PetscMalloc1(in_from->local.n,&out_from->local.vslots);
395: PetscMemcpy(out_to->local.vslots,in_to->local.vslots,in_to->local.n*sizeof(PetscInt));
396: PetscMemcpy(out_from->local.vslots,in_from->local.vslots,in_from->local.n*sizeof(PetscInt));
397: } else {
398: out_to->local.vslots = 0;
399: out_from->local.vslots = 0;
400: }
402: /* allocate entire receive context */
403: out_from->format = in_from->format;
404: ny = in_from->starts[in_from->n];
405: out_from->n = in_from->n;
406: out_from->sendfirst = in_from->sendfirst;
408: PetscMalloc1(out_from->n,&out_from->requests);
409: PetscMalloc4(ny*bs,&out_from->values,ny,&out_from->indices,out_from->n+1,&out_from->starts,out_from->n,&out_from->procs);
410: PetscMemcpy(out_from->indices,in_from->indices,ny*sizeof(PetscInt));
411: PetscMemcpy(out_from->starts,in_from->starts,(out_from->n+1)*sizeof(PetscInt));
412: PetscMemcpy(out_from->procs,in_from->procs,(out_from->n)*sizeof(PetscMPIInt));
414: out->fromdata = (void*)out_from;
415: out_from->local.n = in_from->local.n;
416: out_from->local.nonmatching_computed = PETSC_FALSE;
417: out_from->local.n_nonmatching = 0;
418: out_from->local.slots_nonmatching = 0;
420: out_to->use_alltoallv = out_from->use_alltoallv = PETSC_TRUE;
422: PetscMalloc2(size,&out_to->counts,size,&out_to->displs);
423: PetscMemcpy(out_to->counts,in_to->counts,size*sizeof(PetscMPIInt));
424: PetscMemcpy(out_to->displs,in_to->displs,size*sizeof(PetscMPIInt));
426: PetscMalloc2(size,&out_from->counts,size,&out_from->displs);
427: PetscMemcpy(out_from->counts,in_from->counts,size*sizeof(PetscMPIInt));
428: PetscMemcpy(out_from->displs,in_from->displs,size*sizeof(PetscMPIInt));
429: return(0);
430: }
431: /* --------------------------------------------------------------------------------------------------
432: Packs and unpacks the message data into send or from receive buffers.
434: These could be generated automatically.
436: Fortran kernels etc. could be used.
437: */
438: PETSC_STATIC_INLINE void Pack_MPI1_1(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
439: {
440: PetscInt i;
441: for (i=0; i<n; i++) y[i] = x[indicesx[i]];
442: }
444: PETSC_STATIC_INLINE PetscErrorCode UnPack_MPI1_1(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
445: {
446: PetscInt i;
449: switch (addv) {
450: case INSERT_VALUES:
451: case INSERT_ALL_VALUES:
452: for (i=0; i<n; i++) y[indicesy[i]] = x[i];
453: break;
454: case ADD_VALUES:
455: case ADD_ALL_VALUES:
456: for (i=0; i<n; i++) y[indicesy[i]] += x[i];
457: break;
458: #if !defined(PETSC_USE_COMPLEX)
459: case MAX_VALUES:
460: for (i=0; i<n; i++) y[indicesy[i]] = PetscMax(y[indicesy[i]],x[i]);
461: #else
462: case MAX_VALUES:
463: #endif
464: case NOT_SET_VALUES:
465: break;
466: default:
467: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
468: }
469: return(0);
470: }
472: PETSC_STATIC_INLINE PetscErrorCode Scatter_MPI1_1(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
473: {
474: PetscInt i;
477: switch (addv) {
478: case INSERT_VALUES:
479: case INSERT_ALL_VALUES:
480: for (i=0; i<n; i++) y[indicesy[i]] = x[indicesx[i]];
481: break;
482: case ADD_VALUES:
483: case ADD_ALL_VALUES:
484: for (i=0; i<n; i++) y[indicesy[i]] += x[indicesx[i]];
485: break;
486: #if !defined(PETSC_USE_COMPLEX)
487: case MAX_VALUES:
488: for (i=0; i<n; i++) y[indicesy[i]] = PetscMax(y[indicesy[i]],x[indicesx[i]]);
489: #else
490: case MAX_VALUES:
491: #endif
492: case NOT_SET_VALUES:
493: break;
494: default:
495: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
496: }
497: return(0);
498: }
500: /* ----------------------------------------------------------------------------------------------- */
501: PETSC_STATIC_INLINE void Pack_MPI1_2(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
502: {
503: PetscInt i,idx;
505: for (i=0; i<n; i++) {
506: idx = *indicesx++;
507: y[0] = x[idx];
508: y[1] = x[idx+1];
509: y += 2;
510: }
511: }
513: PETSC_STATIC_INLINE PetscErrorCode UnPack_MPI1_2(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
514: {
515: PetscInt i,idy;
518: switch (addv) {
519: case INSERT_VALUES:
520: case INSERT_ALL_VALUES:
521: for (i=0; i<n; i++) {
522: idy = *indicesy++;
523: y[idy] = x[0];
524: y[idy+1] = x[1];
525: x += 2;
526: }
527: break;
528: case ADD_VALUES:
529: case ADD_ALL_VALUES:
530: for (i=0; i<n; i++) {
531: idy = *indicesy++;
532: y[idy] += x[0];
533: y[idy+1] += x[1];
534: x += 2;
535: }
536: break;
537: #if !defined(PETSC_USE_COMPLEX)
538: case MAX_VALUES:
539: for (i=0; i<n; i++) {
540: idy = *indicesy++;
541: y[idy] = PetscMax(y[idy],x[0]);
542: y[idy+1] = PetscMax(y[idy+1],x[1]);
543: x += 2;
544: }
545: #else
546: case MAX_VALUES:
547: #endif
548: case NOT_SET_VALUES:
549: break;
550: default:
551: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
552: }
553: return(0);
554: }
556: PETSC_STATIC_INLINE PetscErrorCode Scatter_MPI1_2(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
557: {
558: PetscInt i,idx,idy;
561: switch (addv) {
562: case INSERT_VALUES:
563: case INSERT_ALL_VALUES:
564: for (i=0; i<n; i++) {
565: idx = *indicesx++;
566: idy = *indicesy++;
567: y[idy] = x[idx];
568: y[idy+1] = x[idx+1];
569: }
570: break;
571: case ADD_VALUES:
572: case ADD_ALL_VALUES:
573: for (i=0; i<n; i++) {
574: idx = *indicesx++;
575: idy = *indicesy++;
576: y[idy] += x[idx];
577: y[idy+1] += x[idx+1];
578: }
579: break;
580: #if !defined(PETSC_USE_COMPLEX)
581: case MAX_VALUES:
582: for (i=0; i<n; i++) {
583: idx = *indicesx++;
584: idy = *indicesy++;
585: y[idy] = PetscMax(y[idy],x[idx]);
586: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
587: }
588: #else
589: case MAX_VALUES:
590: #endif
591: case NOT_SET_VALUES:
592: break;
593: default:
594: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
595: }
596: return(0);
597: }
598: /* ----------------------------------------------------------------------------------------------- */
599: PETSC_STATIC_INLINE void Pack_MPI1_3(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
600: {
601: PetscInt i,idx;
603: for (i=0; i<n; i++) {
604: idx = *indicesx++;
605: y[0] = x[idx];
606: y[1] = x[idx+1];
607: y[2] = x[idx+2];
608: y += 3;
609: }
610: }
611: PETSC_STATIC_INLINE PetscErrorCode UnPack_MPI1_3(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
612: {
613: PetscInt i,idy;
616: switch (addv) {
617: case INSERT_VALUES:
618: case INSERT_ALL_VALUES:
619: for (i=0; i<n; i++) {
620: idy = *indicesy++;
621: y[idy] = x[0];
622: y[idy+1] = x[1];
623: y[idy+2] = x[2];
624: x += 3;
625: }
626: break;
627: case ADD_VALUES:
628: case ADD_ALL_VALUES:
629: for (i=0; i<n; i++) {
630: idy = *indicesy++;
631: y[idy] += x[0];
632: y[idy+1] += x[1];
633: y[idy+2] += x[2];
634: x += 3;
635: }
636: break;
637: #if !defined(PETSC_USE_COMPLEX)
638: case MAX_VALUES:
639: for (i=0; i<n; i++) {
640: idy = *indicesy++;
641: y[idy] = PetscMax(y[idy],x[0]);
642: y[idy+1] = PetscMax(y[idy+1],x[1]);
643: y[idy+2] = PetscMax(y[idy+2],x[2]);
644: x += 3;
645: }
646: #else
647: case MAX_VALUES:
648: #endif
649: case NOT_SET_VALUES:
650: break;
651: default:
652: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
653: }
654: return(0);
655: }
657: PETSC_STATIC_INLINE PetscErrorCode Scatter_MPI1_3(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
658: {
659: PetscInt i,idx,idy;
662: switch (addv) {
663: case INSERT_VALUES:
664: case INSERT_ALL_VALUES:
665: for (i=0; i<n; i++) {
666: idx = *indicesx++;
667: idy = *indicesy++;
668: y[idy] = x[idx];
669: y[idy+1] = x[idx+1];
670: y[idy+2] = x[idx+2];
671: }
672: break;
673: case ADD_VALUES:
674: case ADD_ALL_VALUES:
675: for (i=0; i<n; i++) {
676: idx = *indicesx++;
677: idy = *indicesy++;
678: y[idy] += x[idx];
679: y[idy+1] += x[idx+1];
680: y[idy+2] += x[idx+2];
681: }
682: break;
683: #if !defined(PETSC_USE_COMPLEX)
684: case MAX_VALUES:
685: for (i=0; i<n; i++) {
686: idx = *indicesx++;
687: idy = *indicesy++;
688: y[idy] = PetscMax(y[idy],x[idx]);
689: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
690: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
691: }
692: #else
693: case MAX_VALUES:
694: #endif
695: case NOT_SET_VALUES:
696: break;
697: default:
698: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
699: }
700: return(0);
701: }
702: /* ----------------------------------------------------------------------------------------------- */
703: PETSC_STATIC_INLINE void Pack_MPI1_4(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
704: {
705: PetscInt i,idx;
707: for (i=0; i<n; i++) {
708: idx = *indicesx++;
709: y[0] = x[idx];
710: y[1] = x[idx+1];
711: y[2] = x[idx+2];
712: y[3] = x[idx+3];
713: y += 4;
714: }
715: }
716: PETSC_STATIC_INLINE PetscErrorCode UnPack_MPI1_4(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
717: {
718: PetscInt i,idy;
721: switch (addv) {
722: case INSERT_VALUES:
723: case INSERT_ALL_VALUES:
724: for (i=0; i<n; i++) {
725: idy = *indicesy++;
726: y[idy] = x[0];
727: y[idy+1] = x[1];
728: y[idy+2] = x[2];
729: y[idy+3] = x[3];
730: x += 4;
731: }
732: break;
733: case ADD_VALUES:
734: case ADD_ALL_VALUES:
735: for (i=0; i<n; i++) {
736: idy = *indicesy++;
737: y[idy] += x[0];
738: y[idy+1] += x[1];
739: y[idy+2] += x[2];
740: y[idy+3] += x[3];
741: x += 4;
742: }
743: break;
744: #if !defined(PETSC_USE_COMPLEX)
745: case MAX_VALUES:
746: for (i=0; i<n; i++) {
747: idy = *indicesy++;
748: y[idy] = PetscMax(y[idy],x[0]);
749: y[idy+1] = PetscMax(y[idy+1],x[1]);
750: y[idy+2] = PetscMax(y[idy+2],x[2]);
751: y[idy+3] = PetscMax(y[idy+3],x[3]);
752: x += 4;
753: }
754: #else
755: case MAX_VALUES:
756: #endif
757: case NOT_SET_VALUES:
758: break;
759: default:
760: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
761: }
762: return(0);
763: }
765: PETSC_STATIC_INLINE PetscErrorCode Scatter_MPI1_4(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
766: {
767: PetscInt i,idx,idy;
770: switch (addv) {
771: case INSERT_VALUES:
772: case INSERT_ALL_VALUES:
773: for (i=0; i<n; i++) {
774: idx = *indicesx++;
775: idy = *indicesy++;
776: y[idy] = x[idx];
777: y[idy+1] = x[idx+1];
778: y[idy+2] = x[idx+2];
779: y[idy+3] = x[idx+3];
780: }
781: break;
782: case ADD_VALUES:
783: case ADD_ALL_VALUES:
784: for (i=0; i<n; i++) {
785: idx = *indicesx++;
786: idy = *indicesy++;
787: y[idy] += x[idx];
788: y[idy+1] += x[idx+1];
789: y[idy+2] += x[idx+2];
790: y[idy+3] += x[idx+3];
791: }
792: break;
793: #if !defined(PETSC_USE_COMPLEX)
794: case MAX_VALUES:
795: for (i=0; i<n; i++) {
796: idx = *indicesx++;
797: idy = *indicesy++;
798: y[idy] = PetscMax(y[idy],x[idx]);
799: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
800: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
801: y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
802: }
803: #else
804: case MAX_VALUES:
805: #endif
806: case NOT_SET_VALUES:
807: break;
808: default:
809: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
810: }
811: return(0);
812: }
813: /* ----------------------------------------------------------------------------------------------- */
814: PETSC_STATIC_INLINE void Pack_MPI1_5(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
815: {
816: PetscInt i,idx;
818: for (i=0; i<n; i++) {
819: idx = *indicesx++;
820: y[0] = x[idx];
821: y[1] = x[idx+1];
822: y[2] = x[idx+2];
823: y[3] = x[idx+3];
824: y[4] = x[idx+4];
825: y += 5;
826: }
827: }
829: PETSC_STATIC_INLINE PetscErrorCode UnPack_MPI1_5(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
830: {
831: PetscInt i,idy;
834: switch (addv) {
835: case INSERT_VALUES:
836: case INSERT_ALL_VALUES:
837: for (i=0; i<n; i++) {
838: idy = *indicesy++;
839: y[idy] = x[0];
840: y[idy+1] = x[1];
841: y[idy+2] = x[2];
842: y[idy+3] = x[3];
843: y[idy+4] = x[4];
844: x += 5;
845: }
846: break;
847: case ADD_VALUES:
848: case ADD_ALL_VALUES:
849: for (i=0; i<n; i++) {
850: idy = *indicesy++;
851: y[idy] += x[0];
852: y[idy+1] += x[1];
853: y[idy+2] += x[2];
854: y[idy+3] += x[3];
855: y[idy+4] += x[4];
856: x += 5;
857: }
858: break;
859: #if !defined(PETSC_USE_COMPLEX)
860: case MAX_VALUES:
861: for (i=0; i<n; i++) {
862: idy = *indicesy++;
863: y[idy] = PetscMax(y[idy],x[0]);
864: y[idy+1] = PetscMax(y[idy+1],x[1]);
865: y[idy+2] = PetscMax(y[idy+2],x[2]);
866: y[idy+3] = PetscMax(y[idy+3],x[3]);
867: y[idy+4] = PetscMax(y[idy+4],x[4]);
868: x += 5;
869: }
870: #else
871: case MAX_VALUES:
872: #endif
873: case NOT_SET_VALUES:
874: break;
875: default:
876: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
877: }
878: return(0);
879: }
881: PETSC_STATIC_INLINE PetscErrorCode Scatter_MPI1_5(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
882: {
883: PetscInt i,idx,idy;
886: switch (addv) {
887: case INSERT_VALUES:
888: case INSERT_ALL_VALUES:
889: for (i=0; i<n; i++) {
890: idx = *indicesx++;
891: idy = *indicesy++;
892: y[idy] = x[idx];
893: y[idy+1] = x[idx+1];
894: y[idy+2] = x[idx+2];
895: y[idy+3] = x[idx+3];
896: y[idy+4] = x[idx+4];
897: }
898: break;
899: case ADD_VALUES:
900: case ADD_ALL_VALUES:
901: for (i=0; i<n; i++) {
902: idx = *indicesx++;
903: idy = *indicesy++;
904: y[idy] += x[idx];
905: y[idy+1] += x[idx+1];
906: y[idy+2] += x[idx+2];
907: y[idy+3] += x[idx+3];
908: y[idy+4] += x[idx+4];
909: }
910: break;
911: #if !defined(PETSC_USE_COMPLEX)
912: case MAX_VALUES:
913: for (i=0; i<n; i++) {
914: idx = *indicesx++;
915: idy = *indicesy++;
916: y[idy] = PetscMax(y[idy],x[idx]);
917: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
918: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
919: y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
920: y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
921: }
922: #else
923: case MAX_VALUES:
924: #endif
925: case NOT_SET_VALUES:
926: break;
927: default:
928: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
929: }
930: return(0);
931: }
932: /* ----------------------------------------------------------------------------------------------- */
933: PETSC_STATIC_INLINE void Pack_MPI1_6(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
934: {
935: PetscInt i,idx;
937: for (i=0; i<n; i++) {
938: idx = *indicesx++;
939: y[0] = x[idx];
940: y[1] = x[idx+1];
941: y[2] = x[idx+2];
942: y[3] = x[idx+3];
943: y[4] = x[idx+4];
944: y[5] = x[idx+5];
945: y += 6;
946: }
947: }
949: PETSC_STATIC_INLINE PetscErrorCode UnPack_MPI1_6(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
950: {
951: PetscInt i,idy;
954: switch (addv) {
955: case INSERT_VALUES:
956: case INSERT_ALL_VALUES:
957: for (i=0; i<n; i++) {
958: idy = *indicesy++;
959: y[idy] = x[0];
960: y[idy+1] = x[1];
961: y[idy+2] = x[2];
962: y[idy+3] = x[3];
963: y[idy+4] = x[4];
964: y[idy+5] = x[5];
965: x += 6;
966: }
967: break;
968: case ADD_VALUES:
969: case ADD_ALL_VALUES:
970: for (i=0; i<n; i++) {
971: idy = *indicesy++;
972: y[idy] += x[0];
973: y[idy+1] += x[1];
974: y[idy+2] += x[2];
975: y[idy+3] += x[3];
976: y[idy+4] += x[4];
977: y[idy+5] += x[5];
978: x += 6;
979: }
980: break;
981: #if !defined(PETSC_USE_COMPLEX)
982: case MAX_VALUES:
983: for (i=0; i<n; i++) {
984: idy = *indicesy++;
985: y[idy] = PetscMax(y[idy],x[0]);
986: y[idy+1] = PetscMax(y[idy+1],x[1]);
987: y[idy+2] = PetscMax(y[idy+2],x[2]);
988: y[idy+3] = PetscMax(y[idy+3],x[3]);
989: y[idy+4] = PetscMax(y[idy+4],x[4]);
990: y[idy+5] = PetscMax(y[idy+5],x[5]);
991: x += 6;
992: }
993: #else
994: case MAX_VALUES:
995: #endif
996: case NOT_SET_VALUES:
997: break;
998: default:
999: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1000: }
1001: return(0);
1002: }
1004: PETSC_STATIC_INLINE PetscErrorCode Scatter_MPI1_6(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1005: {
1006: PetscInt i,idx,idy;
1009: switch (addv) {
1010: case INSERT_VALUES:
1011: case INSERT_ALL_VALUES:
1012: for (i=0; i<n; i++) {
1013: idx = *indicesx++;
1014: idy = *indicesy++;
1015: y[idy] = x[idx];
1016: y[idy+1] = x[idx+1];
1017: y[idy+2] = x[idx+2];
1018: y[idy+3] = x[idx+3];
1019: y[idy+4] = x[idx+4];
1020: y[idy+5] = x[idx+5];
1021: }
1022: break;
1023: case ADD_VALUES:
1024: case ADD_ALL_VALUES:
1025: for (i=0; i<n; i++) {
1026: idx = *indicesx++;
1027: idy = *indicesy++;
1028: y[idy] += x[idx];
1029: y[idy+1] += x[idx+1];
1030: y[idy+2] += x[idx+2];
1031: y[idy+3] += x[idx+3];
1032: y[idy+4] += x[idx+4];
1033: y[idy+5] += x[idx+5];
1034: }
1035: break;
1036: #if !defined(PETSC_USE_COMPLEX)
1037: case MAX_VALUES:
1038: for (i=0; i<n; i++) {
1039: idx = *indicesx++;
1040: idy = *indicesy++;
1041: y[idy] = PetscMax(y[idy],x[idx]);
1042: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
1043: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
1044: y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
1045: y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1046: y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
1047: }
1048: #else
1049: case MAX_VALUES:
1050: #endif
1051: case NOT_SET_VALUES:
1052: break;
1053: default:
1054: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1055: }
1056: return(0);
1057: }
1058: /* ----------------------------------------------------------------------------------------------- */
1059: PETSC_STATIC_INLINE void Pack_MPI1_7(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1060: {
1061: PetscInt i,idx;
1063: for (i=0; i<n; i++) {
1064: idx = *indicesx++;
1065: y[0] = x[idx];
1066: y[1] = x[idx+1];
1067: y[2] = x[idx+2];
1068: y[3] = x[idx+3];
1069: y[4] = x[idx+4];
1070: y[5] = x[idx+5];
1071: y[6] = x[idx+6];
1072: y += 7;
1073: }
1074: }
1076: PETSC_STATIC_INLINE PetscErrorCode UnPack_MPI1_7(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1077: {
1078: PetscInt i,idy;
1081: switch (addv) {
1082: case INSERT_VALUES:
1083: case INSERT_ALL_VALUES:
1084: for (i=0; i<n; i++) {
1085: idy = *indicesy++;
1086: y[idy] = x[0];
1087: y[idy+1] = x[1];
1088: y[idy+2] = x[2];
1089: y[idy+3] = x[3];
1090: y[idy+4] = x[4];
1091: y[idy+5] = x[5];
1092: y[idy+6] = x[6];
1093: x += 7;
1094: }
1095: break;
1096: case ADD_VALUES:
1097: case ADD_ALL_VALUES:
1098: for (i=0; i<n; i++) {
1099: idy = *indicesy++;
1100: y[idy] += x[0];
1101: y[idy+1] += x[1];
1102: y[idy+2] += x[2];
1103: y[idy+3] += x[3];
1104: y[idy+4] += x[4];
1105: y[idy+5] += x[5];
1106: y[idy+6] += x[6];
1107: x += 7;
1108: }
1109: break;
1110: #if !defined(PETSC_USE_COMPLEX)
1111: case MAX_VALUES:
1112: for (i=0; i<n; i++) {
1113: idy = *indicesy++;
1114: y[idy] = PetscMax(y[idy],x[0]);
1115: y[idy+1] = PetscMax(y[idy+1],x[1]);
1116: y[idy+2] = PetscMax(y[idy+2],x[2]);
1117: y[idy+3] = PetscMax(y[idy+3],x[3]);
1118: y[idy+4] = PetscMax(y[idy+4],x[4]);
1119: y[idy+5] = PetscMax(y[idy+5],x[5]);
1120: y[idy+6] = PetscMax(y[idy+6],x[6]);
1121: x += 7;
1122: }
1123: #else
1124: case MAX_VALUES:
1125: #endif
1126: case NOT_SET_VALUES:
1127: break;
1128: default:
1129: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1130: }
1131: return(0);
1132: }
1134: PETSC_STATIC_INLINE PetscErrorCode Scatter_MPI1_7(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1135: {
1136: PetscInt i,idx,idy;
1139: switch (addv) {
1140: case INSERT_VALUES:
1141: case INSERT_ALL_VALUES:
1142: for (i=0; i<n; i++) {
1143: idx = *indicesx++;
1144: idy = *indicesy++;
1145: y[idy] = x[idx];
1146: y[idy+1] = x[idx+1];
1147: y[idy+2] = x[idx+2];
1148: y[idy+3] = x[idx+3];
1149: y[idy+4] = x[idx+4];
1150: y[idy+5] = x[idx+5];
1151: y[idy+6] = x[idx+6];
1152: }
1153: break;
1154: case ADD_VALUES:
1155: case ADD_ALL_VALUES:
1156: for (i=0; i<n; i++) {
1157: idx = *indicesx++;
1158: idy = *indicesy++;
1159: y[idy] += x[idx];
1160: y[idy+1] += x[idx+1];
1161: y[idy+2] += x[idx+2];
1162: y[idy+3] += x[idx+3];
1163: y[idy+4] += x[idx+4];
1164: y[idy+5] += x[idx+5];
1165: y[idy+6] += x[idx+6];
1166: }
1167: break;
1168: #if !defined(PETSC_USE_COMPLEX)
1169: case MAX_VALUES:
1170: for (i=0; i<n; i++) {
1171: idx = *indicesx++;
1172: idy = *indicesy++;
1173: y[idy] = PetscMax(y[idy],x[idx]);
1174: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
1175: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
1176: y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
1177: y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1178: y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
1179: y[idy+6] = PetscMax(y[idy+6],x[idx+6]);
1180: }
1181: #else
1182: case MAX_VALUES:
1183: #endif
1184: case NOT_SET_VALUES:
1185: break;
1186: default:
1187: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1188: }
1189: return(0);
1190: }
1191: /* ----------------------------------------------------------------------------------------------- */
1192: PETSC_STATIC_INLINE void Pack_MPI1_8(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1193: {
1194: PetscInt i,idx;
1196: for (i=0; i<n; i++) {
1197: idx = *indicesx++;
1198: y[0] = x[idx];
1199: y[1] = x[idx+1];
1200: y[2] = x[idx+2];
1201: y[3] = x[idx+3];
1202: y[4] = x[idx+4];
1203: y[5] = x[idx+5];
1204: y[6] = x[idx+6];
1205: y[7] = x[idx+7];
1206: y += 8;
1207: }
1208: }
1210: PETSC_STATIC_INLINE PetscErrorCode UnPack_MPI1_8(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1211: {
1212: PetscInt i,idy;
1215: switch (addv) {
1216: case INSERT_VALUES:
1217: case INSERT_ALL_VALUES:
1218: for (i=0; i<n; i++) {
1219: idy = *indicesy++;
1220: y[idy] = x[0];
1221: y[idy+1] = x[1];
1222: y[idy+2] = x[2];
1223: y[idy+3] = x[3];
1224: y[idy+4] = x[4];
1225: y[idy+5] = x[5];
1226: y[idy+6] = x[6];
1227: y[idy+7] = x[7];
1228: x += 8;
1229: }
1230: break;
1231: case ADD_VALUES:
1232: case ADD_ALL_VALUES:
1233: for (i=0; i<n; i++) {
1234: idy = *indicesy++;
1235: y[idy] += x[0];
1236: y[idy+1] += x[1];
1237: y[idy+2] += x[2];
1238: y[idy+3] += x[3];
1239: y[idy+4] += x[4];
1240: y[idy+5] += x[5];
1241: y[idy+6] += x[6];
1242: y[idy+7] += x[7];
1243: x += 8;
1244: }
1245: break;
1246: #if !defined(PETSC_USE_COMPLEX)
1247: case MAX_VALUES:
1248: for (i=0; i<n; i++) {
1249: idy = *indicesy++;
1250: y[idy] = PetscMax(y[idy],x[0]);
1251: y[idy+1] = PetscMax(y[idy+1],x[1]);
1252: y[idy+2] = PetscMax(y[idy+2],x[2]);
1253: y[idy+3] = PetscMax(y[idy+3],x[3]);
1254: y[idy+4] = PetscMax(y[idy+4],x[4]);
1255: y[idy+5] = PetscMax(y[idy+5],x[5]);
1256: y[idy+6] = PetscMax(y[idy+6],x[6]);
1257: y[idy+7] = PetscMax(y[idy+7],x[7]);
1258: x += 8;
1259: }
1260: #else
1261: case MAX_VALUES:
1262: #endif
1263: case NOT_SET_VALUES:
1264: break;
1265: default:
1266: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1267: }
1268: return(0);
1269: }
1271: PETSC_STATIC_INLINE PetscErrorCode Scatter_MPI1_8(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1272: {
1273: PetscInt i,idx,idy;
1276: switch (addv) {
1277: case INSERT_VALUES:
1278: case INSERT_ALL_VALUES:
1279: for (i=0; i<n; i++) {
1280: idx = *indicesx++;
1281: idy = *indicesy++;
1282: y[idy] = x[idx];
1283: y[idy+1] = x[idx+1];
1284: y[idy+2] = x[idx+2];
1285: y[idy+3] = x[idx+3];
1286: y[idy+4] = x[idx+4];
1287: y[idy+5] = x[idx+5];
1288: y[idy+6] = x[idx+6];
1289: y[idy+7] = x[idx+7];
1290: }
1291: break;
1292: case ADD_VALUES:
1293: case ADD_ALL_VALUES:
1294: for (i=0; i<n; i++) {
1295: idx = *indicesx++;
1296: idy = *indicesy++;
1297: y[idy] += x[idx];
1298: y[idy+1] += x[idx+1];
1299: y[idy+2] += x[idx+2];
1300: y[idy+3] += x[idx+3];
1301: y[idy+4] += x[idx+4];
1302: y[idy+5] += x[idx+5];
1303: y[idy+6] += x[idx+6];
1304: y[idy+7] += x[idx+7];
1305: }
1306: break;
1307: #if !defined(PETSC_USE_COMPLEX)
1308: case MAX_VALUES:
1309: for (i=0; i<n; i++) {
1310: idx = *indicesx++;
1311: idy = *indicesy++;
1312: y[idy] = PetscMax(y[idy],x[idx]);
1313: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
1314: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
1315: y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
1316: y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1317: y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
1318: y[idy+6] = PetscMax(y[idy+6],x[idx+6]);
1319: y[idy+7] = PetscMax(y[idy+7],x[idx+7]);
1320: }
1321: #else
1322: case MAX_VALUES:
1323: #endif
1324: case NOT_SET_VALUES:
1325: break;
1326: default:
1327: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1328: }
1329: return(0);
1330: }
1332: PETSC_STATIC_INLINE void Pack_MPI1_9(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1333: {
1334: PetscInt i,idx;
1336: for (i=0; i<n; i++) {
1337: idx = *indicesx++;
1338: y[0] = x[idx];
1339: y[1] = x[idx+1];
1340: y[2] = x[idx+2];
1341: y[3] = x[idx+3];
1342: y[4] = x[idx+4];
1343: y[5] = x[idx+5];
1344: y[6] = x[idx+6];
1345: y[7] = x[idx+7];
1346: y[8] = x[idx+8];
1347: y += 9;
1348: }
1349: }
1351: PETSC_STATIC_INLINE PetscErrorCode UnPack_MPI1_9(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1352: {
1353: PetscInt i,idy;
1356: switch (addv) {
1357: case INSERT_VALUES:
1358: case INSERT_ALL_VALUES:
1359: for (i=0; i<n; i++) {
1360: idy = *indicesy++;
1361: y[idy] = x[0];
1362: y[idy+1] = x[1];
1363: y[idy+2] = x[2];
1364: y[idy+3] = x[3];
1365: y[idy+4] = x[4];
1366: y[idy+5] = x[5];
1367: y[idy+6] = x[6];
1368: y[idy+7] = x[7];
1369: y[idy+8] = x[8];
1370: x += 9;
1371: }
1372: break;
1373: case ADD_VALUES:
1374: case ADD_ALL_VALUES:
1375: for (i=0; i<n; i++) {
1376: idy = *indicesy++;
1377: y[idy] += x[0];
1378: y[idy+1] += x[1];
1379: y[idy+2] += x[2];
1380: y[idy+3] += x[3];
1381: y[idy+4] += x[4];
1382: y[idy+5] += x[5];
1383: y[idy+6] += x[6];
1384: y[idy+7] += x[7];
1385: y[idy+8] += x[8];
1386: x += 9;
1387: }
1388: break;
1389: #if !defined(PETSC_USE_COMPLEX)
1390: case MAX_VALUES:
1391: for (i=0; i<n; i++) {
1392: idy = *indicesy++;
1393: y[idy] = PetscMax(y[idy],x[0]);
1394: y[idy+1] = PetscMax(y[idy+1],x[1]);
1395: y[idy+2] = PetscMax(y[idy+2],x[2]);
1396: y[idy+3] = PetscMax(y[idy+3],x[3]);
1397: y[idy+4] = PetscMax(y[idy+4],x[4]);
1398: y[idy+5] = PetscMax(y[idy+5],x[5]);
1399: y[idy+6] = PetscMax(y[idy+6],x[6]);
1400: y[idy+7] = PetscMax(y[idy+7],x[7]);
1401: y[idy+8] = PetscMax(y[idy+8],x[8]);
1402: x += 9;
1403: }
1404: #else
1405: case MAX_VALUES:
1406: #endif
1407: case NOT_SET_VALUES:
1408: break;
1409: default:
1410: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1411: }
1412: return(0);
1413: }
1415: PETSC_STATIC_INLINE PetscErrorCode Scatter_MPI1_9(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1416: {
1417: PetscInt i,idx,idy;
1420: switch (addv) {
1421: case INSERT_VALUES:
1422: case INSERT_ALL_VALUES:
1423: for (i=0; i<n; i++) {
1424: idx = *indicesx++;
1425: idy = *indicesy++;
1426: y[idy] = x[idx];
1427: y[idy+1] = x[idx+1];
1428: y[idy+2] = x[idx+2];
1429: y[idy+3] = x[idx+3];
1430: y[idy+4] = x[idx+4];
1431: y[idy+5] = x[idx+5];
1432: y[idy+6] = x[idx+6];
1433: y[idy+7] = x[idx+7];
1434: y[idy+8] = x[idx+8];
1435: }
1436: break;
1437: case ADD_VALUES:
1438: case ADD_ALL_VALUES:
1439: for (i=0; i<n; i++) {
1440: idx = *indicesx++;
1441: idy = *indicesy++;
1442: y[idy] += x[idx];
1443: y[idy+1] += x[idx+1];
1444: y[idy+2] += x[idx+2];
1445: y[idy+3] += x[idx+3];
1446: y[idy+4] += x[idx+4];
1447: y[idy+5] += x[idx+5];
1448: y[idy+6] += x[idx+6];
1449: y[idy+7] += x[idx+7];
1450: y[idy+8] += x[idx+8];
1451: }
1452: break;
1453: #if !defined(PETSC_USE_COMPLEX)
1454: case MAX_VALUES:
1455: for (i=0; i<n; i++) {
1456: idx = *indicesx++;
1457: idy = *indicesy++;
1458: y[idy] = PetscMax(y[idy],x[idx]);
1459: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
1460: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
1461: y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
1462: y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1463: y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
1464: y[idy+6] = PetscMax(y[idy+6],x[idx+6]);
1465: y[idy+7] = PetscMax(y[idy+7],x[idx+7]);
1466: y[idy+8] = PetscMax(y[idy+8],x[idx+8]);
1467: }
1468: #else
1469: case MAX_VALUES:
1470: #endif
1471: case NOT_SET_VALUES:
1472: break;
1473: default:
1474: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1475: }
1476: return(0);
1477: }
1479: PETSC_STATIC_INLINE void Pack_MPI1_10(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1480: {
1481: PetscInt i,idx;
1483: for (i=0; i<n; i++) {
1484: idx = *indicesx++;
1485: y[0] = x[idx];
1486: y[1] = x[idx+1];
1487: y[2] = x[idx+2];
1488: y[3] = x[idx+3];
1489: y[4] = x[idx+4];
1490: y[5] = x[idx+5];
1491: y[6] = x[idx+6];
1492: y[7] = x[idx+7];
1493: y[8] = x[idx+8];
1494: y[9] = x[idx+9];
1495: y += 10;
1496: }
1497: }
1499: PETSC_STATIC_INLINE PetscErrorCode UnPack_MPI1_10(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1500: {
1501: PetscInt i,idy;
1504: switch (addv) {
1505: case INSERT_VALUES:
1506: case INSERT_ALL_VALUES:
1507: for (i=0; i<n; i++) {
1508: idy = *indicesy++;
1509: y[idy] = x[0];
1510: y[idy+1] = x[1];
1511: y[idy+2] = x[2];
1512: y[idy+3] = x[3];
1513: y[idy+4] = x[4];
1514: y[idy+5] = x[5];
1515: y[idy+6] = x[6];
1516: y[idy+7] = x[7];
1517: y[idy+8] = x[8];
1518: y[idy+9] = x[9];
1519: x += 10;
1520: }
1521: break;
1522: case ADD_VALUES:
1523: case ADD_ALL_VALUES:
1524: for (i=0; i<n; i++) {
1525: idy = *indicesy++;
1526: y[idy] += x[0];
1527: y[idy+1] += x[1];
1528: y[idy+2] += x[2];
1529: y[idy+3] += x[3];
1530: y[idy+4] += x[4];
1531: y[idy+5] += x[5];
1532: y[idy+6] += x[6];
1533: y[idy+7] += x[7];
1534: y[idy+8] += x[8];
1535: y[idy+9] += x[9];
1536: x += 10;
1537: }
1538: break;
1539: #if !defined(PETSC_USE_COMPLEX)
1540: case MAX_VALUES:
1541: for (i=0; i<n; i++) {
1542: idy = *indicesy++;
1543: y[idy] = PetscMax(y[idy],x[0]);
1544: y[idy+1] = PetscMax(y[idy+1],x[1]);
1545: y[idy+2] = PetscMax(y[idy+2],x[2]);
1546: y[idy+3] = PetscMax(y[idy+3],x[3]);
1547: y[idy+4] = PetscMax(y[idy+4],x[4]);
1548: y[idy+5] = PetscMax(y[idy+5],x[5]);
1549: y[idy+6] = PetscMax(y[idy+6],x[6]);
1550: y[idy+7] = PetscMax(y[idy+7],x[7]);
1551: y[idy+8] = PetscMax(y[idy+8],x[8]);
1552: y[idy+9] = PetscMax(y[idy+9],x[9]);
1553: x += 10;
1554: }
1555: #else
1556: case MAX_VALUES:
1557: #endif
1558: case NOT_SET_VALUES:
1559: break;
1560: default:
1561: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1562: }
1563: return(0);
1564: }
1566: PETSC_STATIC_INLINE PetscErrorCode Scatter_MPI1_10(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1567: {
1568: PetscInt i,idx,idy;
1571: switch (addv) {
1572: case INSERT_VALUES:
1573: case INSERT_ALL_VALUES:
1574: for (i=0; i<n; i++) {
1575: idx = *indicesx++;
1576: idy = *indicesy++;
1577: y[idy] = x[idx];
1578: y[idy+1] = x[idx+1];
1579: y[idy+2] = x[idx+2];
1580: y[idy+3] = x[idx+3];
1581: y[idy+4] = x[idx+4];
1582: y[idy+5] = x[idx+5];
1583: y[idy+6] = x[idx+6];
1584: y[idy+7] = x[idx+7];
1585: y[idy+8] = x[idx+8];
1586: y[idy+9] = x[idx+9];
1587: }
1588: break;
1589: case ADD_VALUES:
1590: case ADD_ALL_VALUES:
1591: for (i=0; i<n; i++) {
1592: idx = *indicesx++;
1593: idy = *indicesy++;
1594: y[idy] += x[idx];
1595: y[idy+1] += x[idx+1];
1596: y[idy+2] += x[idx+2];
1597: y[idy+3] += x[idx+3];
1598: y[idy+4] += x[idx+4];
1599: y[idy+5] += x[idx+5];
1600: y[idy+6] += x[idx+6];
1601: y[idy+7] += x[idx+7];
1602: y[idy+8] += x[idx+8];
1603: y[idy+9] += x[idx+9];
1604: }
1605: break;
1606: #if !defined(PETSC_USE_COMPLEX)
1607: case MAX_VALUES:
1608: for (i=0; i<n; i++) {
1609: idx = *indicesx++;
1610: idy = *indicesy++;
1611: y[idy] = PetscMax(y[idy],x[idx]);
1612: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
1613: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
1614: y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
1615: y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1616: y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
1617: y[idy+6] = PetscMax(y[idy+6],x[idx+6]);
1618: y[idy+7] = PetscMax(y[idy+7],x[idx+7]);
1619: y[idy+8] = PetscMax(y[idy+8],x[idx+8]);
1620: y[idy+9] = PetscMax(y[idy+9],x[idx+9]);
1621: }
1622: #else
1623: case MAX_VALUES:
1624: #endif
1625: case NOT_SET_VALUES:
1626: break;
1627: default:
1628: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1629: }
1630: return(0);
1631: }
1633: PETSC_STATIC_INLINE void Pack_MPI1_11(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1634: {
1635: PetscInt i,idx;
1637: for (i=0; i<n; i++) {
1638: idx = *indicesx++;
1639: y[0] = x[idx];
1640: y[1] = x[idx+1];
1641: y[2] = x[idx+2];
1642: y[3] = x[idx+3];
1643: y[4] = x[idx+4];
1644: y[5] = x[idx+5];
1645: y[6] = x[idx+6];
1646: y[7] = x[idx+7];
1647: y[8] = x[idx+8];
1648: y[9] = x[idx+9];
1649: y[10] = x[idx+10];
1650: y += 11;
1651: }
1652: }
1654: PETSC_STATIC_INLINE PetscErrorCode UnPack_MPI1_11(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1655: {
1656: PetscInt i,idy;
1659: switch (addv) {
1660: case INSERT_VALUES:
1661: case INSERT_ALL_VALUES:
1662: for (i=0; i<n; i++) {
1663: idy = *indicesy++;
1664: y[idy] = x[0];
1665: y[idy+1] = x[1];
1666: y[idy+2] = x[2];
1667: y[idy+3] = x[3];
1668: y[idy+4] = x[4];
1669: y[idy+5] = x[5];
1670: y[idy+6] = x[6];
1671: y[idy+7] = x[7];
1672: y[idy+8] = x[8];
1673: y[idy+9] = x[9];
1674: y[idy+10] = x[10];
1675: x += 11;
1676: }
1677: break;
1678: case ADD_VALUES:
1679: case ADD_ALL_VALUES:
1680: for (i=0; i<n; i++) {
1681: idy = *indicesy++;
1682: y[idy] += x[0];
1683: y[idy+1] += x[1];
1684: y[idy+2] += x[2];
1685: y[idy+3] += x[3];
1686: y[idy+4] += x[4];
1687: y[idy+5] += x[5];
1688: y[idy+6] += x[6];
1689: y[idy+7] += x[7];
1690: y[idy+8] += x[8];
1691: y[idy+9] += x[9];
1692: y[idy+10] += x[10];
1693: x += 11;
1694: }
1695: break;
1696: #if !defined(PETSC_USE_COMPLEX)
1697: case MAX_VALUES:
1698: for (i=0; i<n; i++) {
1699: idy = *indicesy++;
1700: y[idy] = PetscMax(y[idy],x[0]);
1701: y[idy+1] = PetscMax(y[idy+1],x[1]);
1702: y[idy+2] = PetscMax(y[idy+2],x[2]);
1703: y[idy+3] = PetscMax(y[idy+3],x[3]);
1704: y[idy+4] = PetscMax(y[idy+4],x[4]);
1705: y[idy+5] = PetscMax(y[idy+5],x[5]);
1706: y[idy+6] = PetscMax(y[idy+6],x[6]);
1707: y[idy+7] = PetscMax(y[idy+7],x[7]);
1708: y[idy+8] = PetscMax(y[idy+8],x[8]);
1709: y[idy+9] = PetscMax(y[idy+9],x[9]);
1710: y[idy+10] = PetscMax(y[idy+10],x[10]);
1711: x += 11;
1712: }
1713: #else
1714: case MAX_VALUES:
1715: #endif
1716: case NOT_SET_VALUES:
1717: break;
1718: default:
1719: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1720: }
1721: return(0);
1722: }
1724: PETSC_STATIC_INLINE PetscErrorCode Scatter_MPI1_11(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1725: {
1726: PetscInt i,idx,idy;
1729: switch (addv) {
1730: case INSERT_VALUES:
1731: case INSERT_ALL_VALUES:
1732: for (i=0; i<n; i++) {
1733: idx = *indicesx++;
1734: idy = *indicesy++;
1735: y[idy] = x[idx];
1736: y[idy+1] = x[idx+1];
1737: y[idy+2] = x[idx+2];
1738: y[idy+3] = x[idx+3];
1739: y[idy+4] = x[idx+4];
1740: y[idy+5] = x[idx+5];
1741: y[idy+6] = x[idx+6];
1742: y[idy+7] = x[idx+7];
1743: y[idy+8] = x[idx+8];
1744: y[idy+9] = x[idx+9];
1745: y[idy+10] = x[idx+10];
1746: }
1747: break;
1748: case ADD_VALUES:
1749: case ADD_ALL_VALUES:
1750: for (i=0; i<n; i++) {
1751: idx = *indicesx++;
1752: idy = *indicesy++;
1753: y[idy] += x[idx];
1754: y[idy+1] += x[idx+1];
1755: y[idy+2] += x[idx+2];
1756: y[idy+3] += x[idx+3];
1757: y[idy+4] += x[idx+4];
1758: y[idy+5] += x[idx+5];
1759: y[idy+6] += x[idx+6];
1760: y[idy+7] += x[idx+7];
1761: y[idy+8] += x[idx+8];
1762: y[idy+9] += x[idx+9];
1763: y[idy+10] += x[idx+10];
1764: }
1765: break;
1766: #if !defined(PETSC_USE_COMPLEX)
1767: case MAX_VALUES:
1768: for (i=0; i<n; i++) {
1769: idx = *indicesx++;
1770: idy = *indicesy++;
1771: y[idy] = PetscMax(y[idy],x[idx]);
1772: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
1773: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
1774: y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
1775: y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1776: y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
1777: y[idy+6] = PetscMax(y[idy+6],x[idx+6]);
1778: y[idy+7] = PetscMax(y[idy+7],x[idx+7]);
1779: y[idy+8] = PetscMax(y[idy+8],x[idx+8]);
1780: y[idy+9] = PetscMax(y[idy+9],x[idx+9]);
1781: y[idy+10] = PetscMax(y[idy+10],x[idx+10]);
1782: }
1783: #else
1784: case MAX_VALUES:
1785: #endif
1786: case NOT_SET_VALUES:
1787: break;
1788: default:
1789: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1790: }
1791: return(0);
1792: }
1794: /* ----------------------------------------------------------------------------------------------- */
1795: PETSC_STATIC_INLINE void Pack_MPI1_12(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1796: {
1797: PetscInt i,idx;
1799: for (i=0; i<n; i++) {
1800: idx = *indicesx++;
1801: y[0] = x[idx];
1802: y[1] = x[idx+1];
1803: y[2] = x[idx+2];
1804: y[3] = x[idx+3];
1805: y[4] = x[idx+4];
1806: y[5] = x[idx+5];
1807: y[6] = x[idx+6];
1808: y[7] = x[idx+7];
1809: y[8] = x[idx+8];
1810: y[9] = x[idx+9];
1811: y[10] = x[idx+10];
1812: y[11] = x[idx+11];
1813: y += 12;
1814: }
1815: }
1817: PETSC_STATIC_INLINE PetscErrorCode UnPack_MPI1_12(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1818: {
1819: PetscInt i,idy;
1822: switch (addv) {
1823: case INSERT_VALUES:
1824: case INSERT_ALL_VALUES:
1825: for (i=0; i<n; i++) {
1826: idy = *indicesy++;
1827: y[idy] = x[0];
1828: y[idy+1] = x[1];
1829: y[idy+2] = x[2];
1830: y[idy+3] = x[3];
1831: y[idy+4] = x[4];
1832: y[idy+5] = x[5];
1833: y[idy+6] = x[6];
1834: y[idy+7] = x[7];
1835: y[idy+8] = x[8];
1836: y[idy+9] = x[9];
1837: y[idy+10] = x[10];
1838: y[idy+11] = x[11];
1839: x += 12;
1840: }
1841: break;
1842: case ADD_VALUES:
1843: case ADD_ALL_VALUES:
1844: for (i=0; i<n; i++) {
1845: idy = *indicesy++;
1846: y[idy] += x[0];
1847: y[idy+1] += x[1];
1848: y[idy+2] += x[2];
1849: y[idy+3] += x[3];
1850: y[idy+4] += x[4];
1851: y[idy+5] += x[5];
1852: y[idy+6] += x[6];
1853: y[idy+7] += x[7];
1854: y[idy+8] += x[8];
1855: y[idy+9] += x[9];
1856: y[idy+10] += x[10];
1857: y[idy+11] += x[11];
1858: x += 12;
1859: }
1860: break;
1861: #if !defined(PETSC_USE_COMPLEX)
1862: case MAX_VALUES:
1863: for (i=0; i<n; i++) {
1864: idy = *indicesy++;
1865: y[idy] = PetscMax(y[idy],x[0]);
1866: y[idy+1] = PetscMax(y[idy+1],x[1]);
1867: y[idy+2] = PetscMax(y[idy+2],x[2]);
1868: y[idy+3] = PetscMax(y[idy+3],x[3]);
1869: y[idy+4] = PetscMax(y[idy+4],x[4]);
1870: y[idy+5] = PetscMax(y[idy+5],x[5]);
1871: y[idy+6] = PetscMax(y[idy+6],x[6]);
1872: y[idy+7] = PetscMax(y[idy+7],x[7]);
1873: y[idy+8] = PetscMax(y[idy+8],x[8]);
1874: y[idy+9] = PetscMax(y[idy+9],x[9]);
1875: y[idy+10] = PetscMax(y[idy+10],x[10]);
1876: y[idy+11] = PetscMax(y[idy+11],x[11]);
1877: x += 12;
1878: }
1879: #else
1880: case MAX_VALUES:
1881: #endif
1882: case NOT_SET_VALUES:
1883: break;
1884: default:
1885: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1886: }
1887: return(0);
1888: }
1890: PETSC_STATIC_INLINE PetscErrorCode Scatter_MPI1_12(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1891: {
1892: PetscInt i,idx,idy;
1895: switch (addv) {
1896: case INSERT_VALUES:
1897: case INSERT_ALL_VALUES:
1898: for (i=0; i<n; i++) {
1899: idx = *indicesx++;
1900: idy = *indicesy++;
1901: y[idy] = x[idx];
1902: y[idy+1] = x[idx+1];
1903: y[idy+2] = x[idx+2];
1904: y[idy+3] = x[idx+3];
1905: y[idy+4] = x[idx+4];
1906: y[idy+5] = x[idx+5];
1907: y[idy+6] = x[idx+6];
1908: y[idy+7] = x[idx+7];
1909: y[idy+8] = x[idx+8];
1910: y[idy+9] = x[idx+9];
1911: y[idy+10] = x[idx+10];
1912: y[idy+11] = x[idx+11];
1913: }
1914: break;
1915: case ADD_VALUES:
1916: case ADD_ALL_VALUES:
1917: for (i=0; i<n; i++) {
1918: idx = *indicesx++;
1919: idy = *indicesy++;
1920: y[idy] += x[idx];
1921: y[idy+1] += x[idx+1];
1922: y[idy+2] += x[idx+2];
1923: y[idy+3] += x[idx+3];
1924: y[idy+4] += x[idx+4];
1925: y[idy+5] += x[idx+5];
1926: y[idy+6] += x[idx+6];
1927: y[idy+7] += x[idx+7];
1928: y[idy+8] += x[idx+8];
1929: y[idy+9] += x[idx+9];
1930: y[idy+10] += x[idx+10];
1931: y[idy+11] += x[idx+11];
1932: }
1933: break;
1934: #if !defined(PETSC_USE_COMPLEX)
1935: case MAX_VALUES:
1936: for (i=0; i<n; i++) {
1937: idx = *indicesx++;
1938: idy = *indicesy++;
1939: y[idy] = PetscMax(y[idy],x[idx]);
1940: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
1941: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
1942: y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
1943: y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1944: y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
1945: y[idy+6] = PetscMax(y[idy+6],x[idx+6]);
1946: y[idy+7] = PetscMax(y[idy+7],x[idx+7]);
1947: y[idy+8] = PetscMax(y[idy+8],x[idx+8]);
1948: y[idy+9] = PetscMax(y[idy+9],x[idx+9]);
1949: y[idy+10] = PetscMax(y[idy+10],x[idx+10]);
1950: y[idy+11] = PetscMax(y[idy+11],x[idx+11]);
1951: }
1952: #else
1953: case MAX_VALUES:
1954: #endif
1955: case NOT_SET_VALUES:
1956: break;
1957: default:
1958: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1959: }
1960: return(0);
1961: }
1963: /* ----------------------------------------------------------------------------------------------- */
1964: PETSC_STATIC_INLINE void Pack_MPI1_bs(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1965: {
1966: PetscInt i,idx;
1968: for (i=0; i<n; i++) {
1969: idx = *indicesx++;
1970: PetscMemcpy(y,x + idx,bs*sizeof(PetscScalar));
1971: y += bs;
1972: }
1973: }
1975: PETSC_STATIC_INLINE PetscErrorCode UnPack_MPI1_bs(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1976: {
1977: PetscInt i,idy,j;
1980: switch (addv) {
1981: case INSERT_VALUES:
1982: case INSERT_ALL_VALUES:
1983: for (i=0; i<n; i++) {
1984: idy = *indicesy++;
1985: PetscMemcpy(y + idy,x,bs*sizeof(PetscScalar));
1986: x += bs;
1987: }
1988: break;
1989: case ADD_VALUES:
1990: case ADD_ALL_VALUES:
1991: for (i=0; i<n; i++) {
1992: idy = *indicesy++;
1993: for (j=0; j<bs; j++) y[idy+j] += x[j];
1994: x += bs;
1995: }
1996: break;
1997: #if !defined(PETSC_USE_COMPLEX)
1998: case MAX_VALUES:
1999: for (i=0; i<n; i++) {
2000: idy = *indicesy++;
2001: for (j=0; j<bs; j++) y[idy+j] = PetscMax(y[idy+j],x[j]);
2002: x += bs;
2003: }
2004: #else
2005: case MAX_VALUES:
2006: #endif
2007: case NOT_SET_VALUES:
2008: break;
2009: default:
2010: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
2011: }
2012: return(0);
2013: }
2015: PETSC_STATIC_INLINE PetscErrorCode Scatter_MPI1_bs(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
2016: {
2017: PetscInt i,idx,idy,j;
2020: switch (addv) {
2021: case INSERT_VALUES:
2022: case INSERT_ALL_VALUES:
2023: for (i=0; i<n; i++) {
2024: idx = *indicesx++;
2025: idy = *indicesy++;
2026: PetscMemcpy(y + idy, x + idx,bs*sizeof(PetscScalar));
2027: }
2028: break;
2029: case ADD_VALUES:
2030: case ADD_ALL_VALUES:
2031: for (i=0; i<n; i++) {
2032: idx = *indicesx++;
2033: idy = *indicesy++;
2034: for (j=0; j<bs; j++ ) y[idy+j] += x[idx+j];
2035: }
2036: break;
2037: #if !defined(PETSC_USE_COMPLEX)
2038: case MAX_VALUES:
2039: for (i=0; i<n; i++) {
2040: idx = *indicesx++;
2041: idy = *indicesy++;
2042: for (j=0; j<bs; j++ ) y[idy+j] = PetscMax(y[idy+j],x[idx+j]);
2043: }
2044: #else
2045: case MAX_VALUES:
2046: #endif
2047: case NOT_SET_VALUES:
2048: break;
2049: default:
2050: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
2051: }
2052: return(0);
2053: }
2055: /* Create the VecScatterBegin/End_P for our chosen block sizes */
2056: #define BS 1
2057: #include <../src/vec/vscat/impls/vpscat_mpi1.h>
2058: #define BS 2
2059: #include <../src/vec/vscat/impls/vpscat_mpi1.h>
2060: #define BS 3
2061: #include <../src/vec/vscat/impls/vpscat_mpi1.h>
2062: #define BS 4
2063: #include <../src/vec/vscat/impls/vpscat_mpi1.h>
2064: #define BS 5
2065: #include <../src/vec/vscat/impls/vpscat_mpi1.h>
2066: #define BS 6
2067: #include <../src/vec/vscat/impls/vpscat_mpi1.h>
2068: #define BS 7
2069: #include <../src/vec/vscat/impls/vpscat_mpi1.h>
2070: #define BS 8
2071: #include <../src/vec/vscat/impls/vpscat_mpi1.h>
2072: #define BS 9
2073: #include <../src/vec/vscat/impls/vpscat_mpi1.h>
2074: #define BS 10
2075: #include <../src/vec/vscat/impls/vpscat_mpi1.h>
2076: #define BS 11
2077: #include <../src/vec/vscat/impls/vpscat_mpi1.h>
2078: #define BS 12
2079: #include <../src/vec/vscat/impls/vpscat_mpi1.h>
2080: #define BS bs
2081: #include <../src/vec/vscat/impls/vpscat_mpi1.h>
2083: /* ==========================================================================================*/
2085: /* create parallel to sequential scatter context */
2087: PetscErrorCode VecScatterCreateCommon_PtoS_MPI1(VecScatter_MPI_General*,VecScatter_MPI_General*,VecScatter);
2089: /*
2090: bs indicates how many elements there are in each block. Normally this would be 1.
2092: contains check that PetscMPIInt can handle the sizes needed
2093: */
2094: PetscErrorCode VecScatterCreateLocal_PtoS_MPI1(PetscInt nx,const PetscInt *inidx,PetscInt ny,const PetscInt *inidy,Vec xin,Vec yin,PetscInt bs,VecScatter ctx)
2095: {
2096: VecScatter_MPI_General *from,*to;
2097: PetscMPIInt size,rank,imdex,tag,n;
2098: PetscInt *source = NULL,*owners = NULL,nxr;
2099: PetscInt *lowner = NULL,*start = NULL,lengthy,lengthx;
2100: PetscMPIInt *nprocs = NULL,nrecvs;
2101: PetscInt i,j,idx,nsends;
2102: PetscMPIInt *owner = NULL;
2103: PetscInt *starts = NULL,count,slen;
2104: PetscInt *rvalues,*svalues,base,*values,nprocslocal,recvtotal,*rsvalues;
2105: PetscMPIInt *onodes1,*olengths1;
2106: MPI_Comm comm;
2107: MPI_Request *send_waits = NULL,*recv_waits = NULL;
2108: MPI_Status recv_status,*send_status;
2109: PetscErrorCode ierr;
2112: PetscObjectGetNewTag((PetscObject)ctx,&tag);
2113: PetscObjectGetComm((PetscObject)xin,&comm);
2114: MPI_Comm_rank(comm,&rank);
2115: MPI_Comm_size(comm,&size);
2116: owners = xin->map->range;
2117: VecGetSize(yin,&lengthy);
2118: VecGetSize(xin,&lengthx);
2120: /* first count number of contributors to each processor */
2121: /* owner[i]: owner of ith inidx; nproc[j]: num of inidx to be sent to jth proc */
2122: PetscMalloc2(size,&nprocs,nx,&owner);
2123: PetscMemzero(nprocs,size*sizeof(PetscMPIInt));
2125: j = 0;
2126: nsends = 0;
2127: for (i=0; i<nx; i++) {
2128: idx = bs*inidx[i];
2129: if (idx < owners[j]) j = 0;
2130: for (; j<size; j++) {
2131: if (idx < owners[j+1]) {
2132: if (!nprocs[j]++) nsends++;
2133: owner[i] = j;
2134: break;
2135: }
2136: }
2137: if (j == size) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"ith %D block entry %D not owned by any process, upper bound %D",i,idx,owners[size]);
2138: }
2140: nprocslocal = nprocs[rank];
2141: nprocs[rank] = 0;
2142: if (nprocslocal) nsends--;
2143: /* inform other processors of number of messages and max length*/
2144: PetscGatherNumberOfMessages(comm,NULL,nprocs,&nrecvs);
2145: PetscGatherMessageLengths(comm,nsends,nrecvs,nprocs,&onodes1,&olengths1);
2146: PetscSortMPIIntWithArray(nrecvs,onodes1,olengths1);
2147: recvtotal = 0; for (i=0; i<nrecvs; i++) recvtotal += olengths1[i];
2149: /* post receives: */
2150: PetscMalloc3(recvtotal,&rvalues,nrecvs,&source,nrecvs,&recv_waits);
2151: count = 0;
2152: for (i=0; i<nrecvs; i++) {
2153: MPI_Irecv((rvalues+count),olengths1[i],MPIU_INT,onodes1[i],tag,comm,recv_waits+i);
2154: count += olengths1[i];
2155: }
2157: /* do sends:
2158: 1) starts[i] gives the starting index in svalues for stuff going to
2159: the ith processor
2160: */
2161: nxr = 0;
2162: for (i=0; i<nx; i++) {
2163: if (owner[i] != rank) nxr++;
2164: }
2165: PetscMalloc3(nxr,&svalues,nsends,&send_waits,size+1,&starts);
2167: starts[0] = 0;
2168: for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[i-1];
2169: for (i=0; i<nx; i++) {
2170: if (owner[i] != rank) svalues[starts[owner[i]]++] = bs*inidx[i];
2171: }
2172: starts[0] = 0;
2173: for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[i-1];
2174: count = 0;
2175: for (i=0; i<size; i++) {
2176: if (nprocs[i]) {
2177: MPI_Isend(svalues+starts[i],nprocs[i],MPIU_INT,i,tag,comm,send_waits+count++);
2178: }
2179: }
2181: /* wait on receives */
2182: count = nrecvs;
2183: slen = 0;
2184: while (count) {
2185: MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
2186: /* unpack receives into our local space */
2187: MPI_Get_count(&recv_status,MPIU_INT,&n);
2188: slen += n;
2189: count--;
2190: }
2192: if (slen != recvtotal) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Total message lengths %D not expected %D",slen,recvtotal);
2194: /* allocate entire send scatter context */
2195: PetscNewLog(ctx,&to);
2196: to->n = nrecvs;
2198: PetscMalloc1(nrecvs,&to->requests);
2199: PetscMalloc4(bs*slen,&to->values,slen,&to->indices,nrecvs+1,&to->starts,nrecvs,&to->procs);
2200: PetscMalloc2(PetscMax(to->n,nsends),&to->sstatus,PetscMax(to->n,nsends),&to->rstatus);
2202: ctx->todata = (void*)to;
2203: to->starts[0] = 0;
2205: if (nrecvs) {
2206: /* move the data into the send scatter */
2207: base = owners[rank];
2208: rsvalues = rvalues;
2209: for (i=0; i<nrecvs; i++) {
2210: to->starts[i+1] = to->starts[i] + olengths1[i];
2211: to->procs[i] = onodes1[i];
2212: values = rsvalues;
2213: rsvalues += olengths1[i];
2214: for (j=0; j<olengths1[i]; j++) to->indices[to->starts[i] + j] = values[j] - base;
2215: }
2216: }
2217: PetscFree(olengths1);
2218: PetscFree(onodes1);
2219: PetscFree3(rvalues,source,recv_waits);
2221: /* allocate entire receive scatter context */
2222: PetscNewLog(ctx,&from);
2223: from->n = nsends;
2225: PetscMalloc1(nsends,&from->requests);
2226: PetscMalloc4((ny-nprocslocal)*bs,&from->values,ny-nprocslocal,&from->indices,nsends+1,&from->starts,from->n,&from->procs);
2227: ctx->fromdata = (void*)from;
2229: /* move data into receive scatter */
2230: PetscMalloc2(size,&lowner,nsends+1,&start);
2231: count = 0; from->starts[0] = start[0] = 0;
2232: for (i=0; i<size; i++) {
2233: if (nprocs[i]) {
2234: lowner[i] = count;
2235: from->procs[count++] = i;
2236: from->starts[count] = start[count] = start[count-1] + nprocs[i];
2237: }
2238: }
2240: for (i=0; i<nx; i++) {
2241: if (owner[i] != rank) {
2242: from->indices[start[lowner[owner[i]]]++] = bs*inidy[i];
2243: if (bs*inidy[i] >= lengthy) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Scattering past end of TO vector");
2244: }
2245: }
2246: PetscFree2(lowner,start);
2247: PetscFree2(nprocs,owner);
2249: /* wait on sends */
2250: if (nsends) {
2251: PetscMalloc1(nsends,&send_status);
2252: MPI_Waitall(nsends,send_waits,send_status);
2253: PetscFree(send_status);
2254: }
2255: PetscFree3(svalues,send_waits,starts);
2257: if (nprocslocal) {
2258: PetscInt nt = from->local.n = to->local.n = nprocslocal;
2259: /* we have a scatter to ourselves */
2260: PetscMalloc1(nt,&to->local.vslots);
2261: PetscMalloc1(nt,&from->local.vslots);
2262: nt = 0;
2263: for (i=0; i<nx; i++) {
2264: idx = bs*inidx[i];
2265: if (idx >= owners[rank] && idx < owners[rank+1]) {
2266: to->local.vslots[nt] = idx - owners[rank];
2267: from->local.vslots[nt++] = bs*inidy[i];
2268: if (bs*inidy[i] >= lengthy) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Scattering past end of TO vector");
2269: }
2270: }
2271: PetscLogObjectMemory((PetscObject)ctx,2*nt*sizeof(PetscInt));
2272: } else {
2273: from->local.n = 0;
2274: from->local.vslots = 0;
2275: to->local.n = 0;
2276: to->local.vslots = 0;
2277: }
2279: from->local.nonmatching_computed = PETSC_FALSE;
2280: from->local.n_nonmatching = 0;
2281: from->local.slots_nonmatching = 0;
2282: to->local.nonmatching_computed = PETSC_FALSE;
2283: to->local.n_nonmatching = 0;
2284: to->local.slots_nonmatching = 0;
2286: from->format = VEC_SCATTER_MPI_GENERAL;
2287: to->format = VEC_SCATTER_MPI_GENERAL;
2288: from->bs = bs;
2289: to->bs = bs;
2291: VecScatterCreateCommon_PtoS_MPI1(from,to,ctx);
2292: return(0);
2293: }
2295: /*
2296: bs indicates how many elements there are in each block. Normally this would be 1.
2297: */
2298: PetscErrorCode VecScatterCreateCommon_PtoS_MPI1(VecScatter_MPI_General *from,VecScatter_MPI_General *to,VecScatter ctx)
2299: {
2300: MPI_Comm comm;
2301: PetscMPIInt tag = ((PetscObject)ctx)->tag, tagr;
2302: PetscInt bs = to->bs;
2303: PetscMPIInt size;
2304: PetscInt i, n;
2308: PetscObjectGetComm((PetscObject)ctx,&comm);
2309: PetscObjectGetNewTag((PetscObject)ctx,&tagr);
2310: ctx->ops->destroy = VecScatterDestroy_PtoP_MPI1;
2312: ctx->reproduce = PETSC_FALSE;
2313: to->sendfirst = PETSC_FALSE;
2314: PetscOptionsGetBool(NULL,NULL,"-vecscatter_reproduce",&ctx->reproduce,NULL);
2315: PetscOptionsGetBool(NULL,NULL,"-vecscatter_sendfirst",&to->sendfirst,NULL);
2316: from->sendfirst = to->sendfirst;
2318: MPI_Comm_size(comm,&size);
2319: /* check if the receives are ALL going into contiguous locations; if so can skip indexing */
2320: to->contiq = PETSC_FALSE;
2321: n = from->starts[from->n];
2322: from->contiq = PETSC_TRUE;
2323: for (i=1; i<n; i++) {
2324: if (from->indices[i] != from->indices[i-1] + bs) {
2325: from->contiq = PETSC_FALSE;
2326: break;
2327: }
2328: }
2330: to->use_alltoallv = PETSC_FALSE;
2331: PetscOptionsGetBool(NULL,NULL,"-vecscatter_alltoall",&to->use_alltoallv,NULL);
2332: from->use_alltoallv = to->use_alltoallv;
2333: if (from->use_alltoallv) PetscInfo(ctx,"Using MPI_Alltoallv() for scatter\n");
2334: #if defined(PETSC_HAVE_MPI_ALLTOALLW) && !defined(PETSC_USE_64BIT_INDICES)
2335: if (to->use_alltoallv) {
2336: to->use_alltoallw = PETSC_FALSE;
2337: PetscOptionsGetBool(NULL,NULL,"-vecscatter_nopack",&to->use_alltoallw,NULL);
2338: }
2339: from->use_alltoallw = to->use_alltoallw;
2340: if (from->use_alltoallw) PetscInfo(ctx,"Using MPI_Alltoallw() for scatter\n");
2341: #endif
2343: #if defined(PETSC_HAVE_MPI_WIN_CREATE_FEATURE)
2344: to->use_window = PETSC_FALSE;
2345: PetscOptionsGetBool(NULL,NULL,"-vecscatter_window",&to->use_window,NULL);
2346: from->use_window = to->use_window;
2347: #endif
2349: if (to->use_alltoallv) {
2351: PetscMalloc2(size,&to->counts,size,&to->displs);
2352: PetscMemzero(to->counts,size*sizeof(PetscMPIInt));
2353: for (i=0; i<to->n; i++) to->counts[to->procs[i]] = bs*(to->starts[i+1] - to->starts[i]);
2355: to->displs[0] = 0;
2356: for (i=1; i<size; i++) to->displs[i] = to->displs[i-1] + to->counts[i-1];
2358: PetscMalloc2(size,&from->counts,size,&from->displs);
2359: PetscMemzero(from->counts,size*sizeof(PetscMPIInt));
2360: for (i=0; i<from->n; i++) from->counts[from->procs[i]] = bs*(from->starts[i+1] - from->starts[i]);
2361: from->displs[0] = 0;
2362: for (i=1; i<size; i++) from->displs[i] = from->displs[i-1] + from->counts[i-1];
2364: #if defined(PETSC_HAVE_MPI_ALLTOALLW) && !defined(PETSC_USE_64BIT_INDICES)
2365: if (to->use_alltoallw) {
2366: PetscMPIInt mpibs, mpilen;
2368: ctx->packtogether = PETSC_FALSE;
2369: PetscMPIIntCast(bs,&mpibs);
2370: PetscMalloc3(size,&to->wcounts,size,&to->wdispls,size,&to->types);
2371: PetscMemzero(to->wcounts,size*sizeof(PetscMPIInt));
2372: PetscMemzero(to->wdispls,size*sizeof(PetscMPIInt));
2373: for (i=0; i<size; i++) to->types[i] = MPIU_SCALAR;
2375: for (i=0; i<to->n; i++) {
2376: to->wcounts[to->procs[i]] = 1;
2377: PetscMPIIntCast(to->starts[i+1]-to->starts[i],&mpilen);
2378: MPI_Type_create_indexed_block(mpilen,mpibs,to->indices+to->starts[i],MPIU_SCALAR,to->types+to->procs[i]);
2379: MPI_Type_commit(to->types+to->procs[i]);
2380: }
2381: PetscMalloc3(size,&from->wcounts,size,&from->wdispls,size,&from->types);
2382: PetscMemzero(from->wcounts,size*sizeof(PetscMPIInt));
2383: PetscMemzero(from->wdispls,size*sizeof(PetscMPIInt));
2384: for (i=0; i<size; i++) from->types[i] = MPIU_SCALAR;
2386: if (from->contiq) {
2387: PetscInfo(ctx,"Scattered vector entries are stored contiguously, taking advantage of this with -vecscatter_alltoall\n");
2388: for (i=0; i<from->n; i++) from->wcounts[from->procs[i]] = bs*(from->starts[i+1] - from->starts[i]);
2390: if (from->n) from->wdispls[from->procs[0]] = sizeof(PetscScalar)*from->indices[0];
2391: for (i=1; i<from->n; i++) from->wdispls[from->procs[i]] = from->wdispls[from->procs[i-1]] + sizeof(PetscScalar)*from->wcounts[from->procs[i-1]];
2393: } else {
2394: for (i=0; i<from->n; i++) {
2395: from->wcounts[from->procs[i]] = 1;
2396: PetscMPIIntCast(from->starts[i+1]-from->starts[i],&mpilen);
2397: MPI_Type_create_indexed_block(mpilen,mpibs,from->indices+from->starts[i],MPIU_SCALAR,from->types+from->procs[i]);
2398: MPI_Type_commit(from->types+from->procs[i]);
2399: }
2400: }
2401: } else ctx->ops->copy = VecScatterCopy_PtoP_AllToAll_MPI1;
2403: #else
2404: to->use_alltoallw = PETSC_FALSE;
2405: from->use_alltoallw = PETSC_FALSE;
2406: ctx->ops->copy = VecScatterCopy_PtoP_AllToAll_MPI1;
2407: #endif
2408: #if defined(PETSC_HAVE_MPI_WIN_CREATE_FEATURE)
2409: } else if (to->use_window) {
2410: PetscMPIInt temptag,winsize;
2411: MPI_Request *request;
2412: MPI_Status *status;
2414: PetscObjectGetNewTag((PetscObject)ctx,&temptag);
2415: winsize = (to->n ? to->starts[to->n] : 0)*bs*sizeof(PetscScalar);
2416: MPI_Win_create(to->values ? to->values : MPI_BOTTOM,winsize,sizeof(PetscScalar),MPI_INFO_NULL,comm,&to->window);
2417: PetscMalloc1(to->n,&to->winstarts);
2418: PetscMalloc2(to->n,&request,to->n,&status);
2419: for (i=0; i<to->n; i++) {
2420: MPI_Irecv(to->winstarts+i,1,MPIU_INT,to->procs[i],temptag,comm,request+i);
2421: }
2422: for (i=0; i<from->n; i++) {
2423: MPI_Send(from->starts+i,1,MPIU_INT,from->procs[i],temptag,comm);
2424: }
2425: MPI_Waitall(to->n,request,status);
2426: PetscFree2(request,status);
2428: winsize = (from->n ? from->starts[from->n] : 0)*bs*sizeof(PetscScalar);
2429: MPI_Win_create(from->values ? from->values : MPI_BOTTOM,winsize,sizeof(PetscScalar),MPI_INFO_NULL,comm,&from->window);
2430: PetscMalloc1(from->n,&from->winstarts);
2431: PetscMalloc2(from->n,&request,from->n,&status);
2432: for (i=0; i<from->n; i++) {
2433: MPI_Irecv(from->winstarts+i,1,MPIU_INT,from->procs[i],temptag,comm,request+i);
2434: }
2435: for (i=0; i<to->n; i++) {
2436: MPI_Send(to->starts+i,1,MPIU_INT,to->procs[i],temptag,comm);
2437: }
2438: MPI_Waitall(from->n,request,status);
2439: PetscFree2(request,status);
2440: #endif
2441: } else {
2442: PetscBool use_rsend = PETSC_FALSE, use_ssend = PETSC_FALSE;
2443: PetscInt *sstarts = to->starts, *rstarts = from->starts;
2444: PetscMPIInt *sprocs = to->procs, *rprocs = from->procs;
2445: MPI_Request *swaits = to->requests,*rwaits = from->requests;
2446: MPI_Request *rev_swaits,*rev_rwaits;
2447: PetscScalar *Ssvalues = to->values, *Srvalues = from->values;
2449: /* allocate additional wait variables for the "reverse" scatter */
2450: PetscMalloc1(to->n,&rev_rwaits);
2451: PetscMalloc1(from->n,&rev_swaits);
2452: to->rev_requests = rev_rwaits;
2453: from->rev_requests = rev_swaits;
2455: /* Register the receives that you will use later (sends for scatter reverse) */
2456: PetscOptionsGetBool(NULL,NULL,"-vecscatter_rsend",&use_rsend,NULL);
2457: PetscOptionsGetBool(NULL,NULL,"-vecscatter_ssend",&use_ssend,NULL);
2458: if (use_rsend) {
2459: PetscInfo(ctx,"Using VecScatter ready receiver mode\n");
2460: to->use_readyreceiver = PETSC_TRUE;
2461: from->use_readyreceiver = PETSC_TRUE;
2462: } else {
2463: to->use_readyreceiver = PETSC_FALSE;
2464: from->use_readyreceiver = PETSC_FALSE;
2465: }
2466: if (use_ssend) {
2467: PetscInfo(ctx,"Using VecScatter Ssend mode\n");
2468: }
2470: for (i=0; i<from->n; i++) {
2471: if (use_rsend) {
2472: MPI_Rsend_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tagr,comm,rev_swaits+i);
2473: } else if (use_ssend) {
2474: MPI_Ssend_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tagr,comm,rev_swaits+i);
2475: } else {
2476: MPI_Send_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tagr,comm,rev_swaits+i);
2477: }
2478: }
2480: for (i=0; i<to->n; i++) {
2481: if (use_rsend) {
2482: MPI_Rsend_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
2483: } else if (use_ssend) {
2484: MPI_Ssend_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
2485: } else {
2486: MPI_Send_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
2487: }
2488: }
2489: /* Register receives for scatter and reverse */
2490: for (i=0; i<from->n; i++) {
2491: MPI_Recv_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);
2492: }
2493: for (i=0; i<to->n; i++) {
2494: MPI_Recv_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tagr,comm,rev_rwaits+i);
2495: }
2496: if (use_rsend) {
2497: if (to->n) {MPI_Startall_irecv(to->starts[to->n]*to->bs,to->n,to->rev_requests);}
2498: if (from->n) {MPI_Startall_irecv(from->starts[from->n]*from->bs,from->n,from->requests);}
2499: MPI_Barrier(comm);
2500: }
2502: ctx->ops->copy = VecScatterCopy_PtoP_X_MPI1;
2503: }
2504: PetscInfo1(ctx,"Using blocksize %D scatter\n",bs);
2506: #if defined(PETSC_USE_DEBUG)
2507: MPIU_Allreduce(&bs,&i,1,MPIU_INT,MPI_MIN,PetscObjectComm((PetscObject)ctx));
2508: MPIU_Allreduce(&bs,&n,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)ctx));
2509: if (bs!=i || bs!=n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Blocks size %D != %D or %D",bs,i,n);
2510: #endif
2512: switch (bs) {
2513: case 12:
2514: ctx->ops->begin = VecScatterBeginMPI1_12;
2515: ctx->ops->end = VecScatterEndMPI1_12;
2516: break;
2517: case 11:
2518: ctx->ops->begin = VecScatterBeginMPI1_11;
2519: ctx->ops->end = VecScatterEndMPI1_11;
2520: break;
2521: case 10:
2522: ctx->ops->begin = VecScatterBeginMPI1_10;
2523: ctx->ops->end = VecScatterEndMPI1_10;
2524: break;
2525: case 9:
2526: ctx->ops->begin = VecScatterBeginMPI1_9;
2527: ctx->ops->end = VecScatterEndMPI1_9;
2528: break;
2529: case 8:
2530: ctx->ops->begin = VecScatterBeginMPI1_8;
2531: ctx->ops->end = VecScatterEndMPI1_8;
2532: break;
2533: case 7:
2534: ctx->ops->begin = VecScatterBeginMPI1_7;
2535: ctx->ops->end = VecScatterEndMPI1_7;
2536: break;
2537: case 6:
2538: ctx->ops->begin = VecScatterBeginMPI1_6;
2539: ctx->ops->end = VecScatterEndMPI1_6;
2540: break;
2541: case 5:
2542: ctx->ops->begin = VecScatterBeginMPI1_5;
2543: ctx->ops->end = VecScatterEndMPI1_5;
2544: break;
2545: case 4:
2546: ctx->ops->begin = VecScatterBeginMPI1_4;
2547: ctx->ops->end = VecScatterEndMPI1_4;
2548: break;
2549: case 3:
2550: ctx->ops->begin = VecScatterBeginMPI1_3;
2551: ctx->ops->end = VecScatterEndMPI1_3;
2552: break;
2553: case 2:
2554: ctx->ops->begin = VecScatterBeginMPI1_2;
2555: ctx->ops->end = VecScatterEndMPI1_2;
2556: break;
2557: case 1:
2558: ctx->ops->begin = VecScatterBeginMPI1_1;
2559: ctx->ops->end = VecScatterEndMPI1_1;
2560: break;
2561: default:
2562: ctx->ops->begin = VecScatterBeginMPI1_bs;
2563: ctx->ops->end = VecScatterEndMPI1_bs;
2565: }
2566: ctx->ops->view = VecScatterView_MPI_MPI1;
2567: /* Check if the local scatter is actually a copy; important special case */
2568: if (to->local.n) {
2569: VecScatterLocalOptimizeCopy_Private_MPI1(ctx,&to->local,&from->local,bs);
2570: }
2571: return(0);
2572: }
2576: /* ------------------------------------------------------------------------------------*/
2577: /*
2578: Scatter from local Seq vectors to a parallel vector.
2579: Reverses the order of the arguments, calls VecScatterCreateLocal_PtoS() then
2580: reverses the result.
2581: */
2582: PetscErrorCode VecScatterCreateLocal_StoP_MPI1(PetscInt nx,const PetscInt *inidx,PetscInt ny,const PetscInt *inidy,Vec xin,Vec yin,PetscInt bs,VecScatter ctx)
2583: {
2584: PetscErrorCode ierr;
2585: MPI_Request *waits;
2586: VecScatter_MPI_General *to,*from;
2589: VecScatterCreateLocal_PtoS_MPI1(ny,inidy,nx,inidx,yin,xin,bs,ctx);
2590: to = (VecScatter_MPI_General*)ctx->fromdata;
2591: from = (VecScatter_MPI_General*)ctx->todata;
2592: ctx->todata = (void*)to;
2593: ctx->fromdata = (void*)from;
2594: /* these two are special, they are ALWAYS stored in to struct */
2595: to->sstatus = from->sstatus;
2596: to->rstatus = from->rstatus;
2598: from->sstatus = 0;
2599: from->rstatus = 0;
2601: waits = from->rev_requests;
2602: from->rev_requests = from->requests;
2603: from->requests = waits;
2604: waits = to->rev_requests;
2605: to->rev_requests = to->requests;
2606: to->requests = waits;
2607: return(0);
2608: }
2610: /* ---------------------------------------------------------------------------------*/
2611: PetscErrorCode VecScatterCreateLocal_PtoP_MPI1(PetscInt nx,const PetscInt *inidx,PetscInt ny,const PetscInt *inidy,Vec xin,Vec yin,PetscInt bs,VecScatter ctx)
2612: {
2614: PetscMPIInt size,rank,tag,imdex,n;
2615: PetscInt *owners = xin->map->range;
2616: PetscMPIInt *nprocs = NULL;
2617: PetscInt i,j,idx,nsends,*local_inidx = NULL,*local_inidy = NULL;
2618: PetscMPIInt *owner = NULL;
2619: PetscInt *starts = NULL,count,slen;
2620: PetscInt *rvalues = NULL,*svalues = NULL,base,*values = NULL,*rsvalues,recvtotal,lastidx;
2621: PetscMPIInt *onodes1,*olengths1,nrecvs;
2622: MPI_Comm comm;
2623: MPI_Request *send_waits = NULL,*recv_waits = NULL;
2624: MPI_Status recv_status,*send_status = NULL;
2625: PetscBool duplicate = PETSC_FALSE;
2626: #if defined(PETSC_USE_DEBUG)
2627: PetscBool found = PETSC_FALSE;
2628: #endif
2631: PetscObjectGetNewTag((PetscObject)ctx,&tag);
2632: PetscObjectGetComm((PetscObject)xin,&comm);
2633: MPI_Comm_size(comm,&size);
2634: MPI_Comm_rank(comm,&rank);
2635: if (size == 1) {
2636: VecScatterCreateLocal_StoP_MPI1(nx,inidx,ny,inidy,xin,yin,bs,ctx);
2637: return(0);
2638: }
2640: /*
2641: Each processor ships off its inidx[j] and inidy[j] to the appropriate processor
2642: They then call the StoPScatterCreate()
2643: */
2644: /* first count number of contributors to each processor */
2645: PetscMalloc3(size,&nprocs,nx,&owner,(size+1),&starts);
2646: PetscMemzero(nprocs,size*sizeof(PetscMPIInt));
2648: lastidx = -1;
2649: j = 0;
2650: for (i=0; i<nx; i++) {
2651: /* if indices are NOT locally sorted, need to start search at the beginning */
2652: if (lastidx > (idx = bs*inidx[i])) j = 0;
2653: lastidx = idx;
2654: for (; j<size; j++) {
2655: if (idx >= owners[j] && idx < owners[j+1]) {
2656: nprocs[j]++;
2657: owner[i] = j;
2658: #if defined(PETSC_USE_DEBUG)
2659: found = PETSC_TRUE;
2660: #endif
2661: break;
2662: }
2663: }
2664: #if defined(PETSC_USE_DEBUG)
2665: if (!found) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index %D out of range",idx);
2666: found = PETSC_FALSE;
2667: #endif
2668: }
2669: nsends = 0;
2670: for (i=0; i<size; i++) nsends += (nprocs[i] > 0);
2672: /* inform other processors of number of messages and max length*/
2673: PetscGatherNumberOfMessages(comm,NULL,nprocs,&nrecvs);
2674: PetscGatherMessageLengths(comm,nsends,nrecvs,nprocs,&onodes1,&olengths1);
2675: PetscSortMPIIntWithArray(nrecvs,onodes1,olengths1);
2676: recvtotal = 0; for (i=0; i<nrecvs; i++) recvtotal += olengths1[i];
2678: /* post receives: */
2679: PetscMalloc5(2*recvtotal,&rvalues,2*nx,&svalues,nrecvs,&recv_waits,nsends,&send_waits,nsends,&send_status);
2681: count = 0;
2682: for (i=0; i<nrecvs; i++) {
2683: MPI_Irecv((rvalues+2*count),2*olengths1[i],MPIU_INT,onodes1[i],tag,comm,recv_waits+i);
2684: count += olengths1[i];
2685: }
2686: PetscFree(onodes1);
2688: /* do sends:
2689: 1) starts[i] gives the starting index in svalues for stuff going to
2690: the ith processor
2691: */
2692: starts[0]= 0;
2693: for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[i-1];
2694: for (i=0; i<nx; i++) {
2695: svalues[2*starts[owner[i]]] = bs*inidx[i];
2696: svalues[1 + 2*starts[owner[i]]++] = bs*inidy[i];
2697: }
2699: starts[0] = 0;
2700: for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[i-1];
2701: count = 0;
2702: for (i=0; i<size; i++) {
2703: if (nprocs[i]) {
2704: MPI_Isend(svalues+2*starts[i],2*nprocs[i],MPIU_INT,i,tag,comm,send_waits+count);
2705: count++;
2706: }
2707: }
2708: PetscFree3(nprocs,owner,starts);
2710: /* wait on receives */
2711: count = nrecvs;
2712: slen = 0;
2713: while (count) {
2714: MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
2715: /* unpack receives into our local space */
2716: MPI_Get_count(&recv_status,MPIU_INT,&n);
2717: slen += n/2;
2718: count--;
2719: }
2720: if (slen != recvtotal) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Total message lengths %D not as expected %D",slen,recvtotal);
2722: PetscMalloc2(slen,&local_inidx,slen,&local_inidy);
2723: base = owners[rank];
2724: count = 0;
2725: rsvalues = rvalues;
2726: for (i=0; i<nrecvs; i++) {
2727: values = rsvalues;
2728: rsvalues += 2*olengths1[i];
2729: for (j=0; j<olengths1[i]; j++) {
2730: local_inidx[count] = values[2*j] - base;
2731: local_inidy[count++] = values[2*j+1];
2732: }
2733: }
2734: PetscFree(olengths1);
2736: /* wait on sends */
2737: if (nsends) {MPI_Waitall(nsends,send_waits,send_status);}
2738: PetscFree5(rvalues,svalues,recv_waits,send_waits,send_status);
2740: /*
2741: should sort and remove duplicates from local_inidx,local_inidy
2742: */
2744: #if defined(do_it_slow)
2745: /* sort on the from index */
2746: PetscSortIntWithArray(slen,local_inidx,local_inidy);
2747: start = 0;
2748: while (start < slen) {
2749: count = start+1;
2750: last = local_inidx[start];
2751: while (count < slen && last == local_inidx[count]) count++;
2752: if (count > start + 1) { /* found 2 or more same local_inidx[] in a row */
2753: /* sort on to index */
2754: PetscSortInt(count-start,local_inidy+start);
2755: }
2756: /* remove duplicates; not most efficient way, but probably good enough */
2757: i = start;
2758: while (i < count-1) {
2759: if (local_inidy[i] != local_inidy[i+1]) i++;
2760: else { /* found a duplicate */
2761: duplicate = PETSC_TRUE;
2762: for (j=i; j<slen-1; j++) {
2763: local_inidx[j] = local_inidx[j+1];
2764: local_inidy[j] = local_inidy[j+1];
2765: }
2766: slen--;
2767: count--;
2768: }
2769: }
2770: start = count;
2771: }
2772: #endif
2773: if (duplicate) {
2774: PetscInfo(ctx,"Duplicate from to indices passed in VecScatterCreate(), they are ignored\n");
2775: }
2776: VecScatterCreateLocal_StoP_MPI1(slen,local_inidx,slen,local_inidy,xin,yin,bs,ctx);
2777: PetscFree2(local_inidx,local_inidy);
2778: return(0);
2779: }
2781: /*@
2782: PetscSFCreateFromZero - Create a PetscSF that maps a Vec from sequential to distributed
2784: Input Parameters:
2785: . gv - A distributed Vec
2787: Output Parameters:
2788: . sf - The SF created mapping a sequential Vec to gv
2790: Level: developer
2792: .seealso: DMPlexDistributedToSequential()
2793: @*/
2794: PetscErrorCode PetscSFCreateFromZero(MPI_Comm comm, Vec gv, PetscSF *sf)
2795: {
2796: PetscSFNode *remotenodes;
2797: PetscInt *localnodes;
2798: PetscInt N, n, start, numroots, l;
2799: PetscMPIInt rank;
2803: PetscSFCreate(comm, sf);
2804: VecGetSize(gv, &N);
2805: VecGetLocalSize(gv, &n);
2806: VecGetOwnershipRange(gv, &start, NULL);
2807: MPI_Comm_rank(comm, &rank);
2808: PetscMalloc1(n, &localnodes);
2809: PetscMalloc1(n, &remotenodes);
2810: if (!rank) numroots = N;
2811: else numroots = 0;
2812: for (l = 0; l < n; ++l) {
2813: localnodes[l] = l;
2814: remotenodes[l].rank = 0;
2815: remotenodes[l].index = l+start;
2816: }
2817: PetscSFSetGraph(*sf, numroots, n, localnodes, PETSC_OWN_POINTER, remotenodes, PETSC_OWN_POINTER);
2818: return(0);
2819: }