Actual source code: vpscat_mpi1.c
petsc-3.11.4 2019-09-28
2: /*
3: Defines parallel vector scatters using MPI1.
4: */
6: #include <../src/vec/vec/impls/dvecimpl.h> /*I "petscvec.h" I*/
7: #include <../src/vec/vec/impls/mpi/pvecimpl.h>
8: #include <petsc/private/vecscatterimpl.h>
11: PetscErrorCode VecScatterView_MPI_MPI1(VecScatter ctx,PetscViewer viewer)
12: {
13: VecScatter_MPI_General *to =(VecScatter_MPI_General*)ctx->todata;
14: VecScatter_MPI_General *from=(VecScatter_MPI_General*)ctx->fromdata;
15: PetscErrorCode ierr;
16: PetscInt i;
17: PetscMPIInt rank;
18: PetscViewerFormat format;
19: PetscBool iascii;
22: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
23: if (iascii) {
24: MPI_Comm_rank(PetscObjectComm((PetscObject)ctx),&rank);
25: PetscViewerGetFormat(viewer,&format);
26: if (format == PETSC_VIEWER_ASCII_INFO) {
27: PetscInt nsend_max,nrecv_max,lensend_max,lenrecv_max,alldata,itmp;
29: MPI_Reduce(&to->n,&nsend_max,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)ctx));
30: MPI_Reduce(&from->n,&nrecv_max,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)ctx));
31: itmp = to->starts[to->n+1];
32: MPI_Reduce(&itmp,&lensend_max,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)ctx));
33: itmp = from->starts[from->n+1];
34: MPI_Reduce(&itmp,&lenrecv_max,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)ctx));
35: MPI_Reduce(&itmp,&alldata,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)ctx));
37: PetscViewerASCIIPrintf(viewer,"VecScatter statistics\n");
38: PetscViewerASCIIPrintf(viewer," Maximum number sends %D\n",nsend_max);
39: PetscViewerASCIIPrintf(viewer," Maximum number receives %D\n",nrecv_max);
40: PetscViewerASCIIPrintf(viewer," Maximum data sent %D\n",(int)(lensend_max*to->bs*sizeof(PetscScalar)));
41: PetscViewerASCIIPrintf(viewer," Maximum data received %D\n",(int)(lenrecv_max*to->bs*sizeof(PetscScalar)));
42: PetscViewerASCIIPrintf(viewer," Total data sent %D\n",(int)(alldata*to->bs*sizeof(PetscScalar)));
44: } else {
45: PetscViewerASCIIPushSynchronized(viewer);
46: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number sends = %D; Number to self = %D\n",rank,to->n,to->local.n);
47: if (to->n) {
48: for (i=0; i<to->n; i++) {
49: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D length = %D to whom %d\n",rank,i,to->starts[i+1]-to->starts[i],to->procs[i]);
50: if (to->memcpy_plan.optimized[i]) { PetscViewerASCIISynchronizedPrintf(viewer," is optimized with %D memcpy's in Pack\n",to->memcpy_plan.copy_offsets[i+1]-to->memcpy_plan.copy_offsets[i]); }
51: }
52: PetscViewerASCIISynchronizedPrintf(viewer,"Now the indices for all remote sends (in order by process sent to)\n");
53: for (i=0; i<to->starts[to->n]; i++) {
54: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D \n",rank,to->indices[i]);
55: }
56: }
58: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number receives = %D; Number from self = %D\n",rank,from->n,from->local.n);
59: if (from->n) {
60: for (i=0; i<from->n; i++) {
61: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D length %D from whom %d\n",rank,i,from->starts[i+1]-from->starts[i],from->procs[i]);
62: if (from->memcpy_plan.optimized[i]) { PetscViewerASCIISynchronizedPrintf(viewer," is optimized with %D memcpy's in Unpack\n",to->memcpy_plan.copy_offsets[i+1]-to->memcpy_plan.copy_offsets[i]); }
63: }
65: PetscViewerASCIISynchronizedPrintf(viewer,"Now the indices for all remote receives (in order by process received from)\n");
66: for (i=0; i<from->starts[from->n]; i++) {
67: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D \n",rank,from->indices[i]);
68: }
69: }
70: if (to->local.n) {
71: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Indices for local part of scatter\n",rank);
72: if (to->local.memcpy_plan.optimized[0]) {
73: PetscViewerASCIIPrintf(viewer,"Local part of the scatter is made of %D copies\n",to->local.memcpy_plan.copy_offsets[1]);
74: }
75: for (i=0; i<to->local.n; i++) { /* the to and from have the opposite meaning from what you would expect */
76: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] From %D to %D \n",rank,to->local.vslots[i],from->local.vslots[i]);
77: }
78: }
80: PetscViewerFlush(viewer);
81: PetscViewerASCIIPopSynchronized(viewer);
82: }
83: }
84: return(0);
85: }
87: /* -------------------------------------------------------------------------------------*/
88: PetscErrorCode VecScatterDestroy_PtoP_MPI1(VecScatter ctx)
89: {
90: VecScatter_MPI_General *to = (VecScatter_MPI_General*)ctx->todata;
91: VecScatter_MPI_General *from = (VecScatter_MPI_General*)ctx->fromdata;
92: PetscErrorCode ierr;
93: PetscInt i;
96: /* release MPI resources obtained with MPI_Send_init() and MPI_Recv_init() */
97: if (to->requests) {
98: for (i=0; i<to->n; i++) {
99: MPI_Request_free(to->requests + i);
100: }
101: }
102: if (to->rev_requests) {
103: for (i=0; i<to->n; i++) {
104: MPI_Request_free(to->rev_requests + i);
105: }
106: }
107: if (from->requests) {
108: for (i=0; i<from->n; i++) {
109: MPI_Request_free(from->requests + i);
110: }
111: }
113: if (from->rev_requests) {
114: for (i=0; i<from->n; i++) {
115: MPI_Request_free(from->rev_requests + i);
116: }
117: }
119: PetscFree(to->local.vslots);
120: PetscFree(from->local.vslots);
121: PetscFree(to->local.slots_nonmatching);
122: PetscFree(from->local.slots_nonmatching);
123: PetscFree(to->rev_requests);
124: PetscFree(from->rev_requests);
125: PetscFree(to->requests);
126: PetscFree(from->requests);
127: PetscFree4(to->values,to->indices,to->starts,to->procs);
128: PetscFree2(to->sstatus,to->rstatus);
129: PetscFree4(from->values,from->indices,from->starts,from->procs);
130: VecScatterMemcpyPlanDestroy_PtoP(to,from);
131: PetscFree(from);
132: PetscFree(to);
133: return(0);
134: }
136: /* --------------------------------------------------------------------------------------*/
138: PetscErrorCode VecScatterCopy_PtoP_X_MPI1(VecScatter in,VecScatter out)
139: {
140: VecScatter_MPI_General *in_to = (VecScatter_MPI_General*)in->todata;
141: VecScatter_MPI_General *in_from = (VecScatter_MPI_General*)in->fromdata,*out_to,*out_from;
142: PetscErrorCode ierr;
143: PetscInt ny,bs = in_from->bs;
146: out->ops->begin = in->ops->begin;
147: out->ops->end = in->ops->end;
148: out->ops->copy = in->ops->copy;
149: out->ops->destroy = in->ops->destroy;
150: out->ops->view = in->ops->view;
152: /* allocate entire send scatter context */
153: PetscNewLog(out,&out_to);
154: PetscNewLog(out,&out_from);
156: ny = in_to->starts[in_to->n];
157: out_to->n = in_to->n;
158: out_to->format = in_to->format;
160: PetscMalloc1(out_to->n,&out_to->requests);
161: PetscMalloc4(bs*ny,&out_to->values,ny,&out_to->indices,out_to->n+1,&out_to->starts,out_to->n,&out_to->procs);
162: PetscMalloc2(PetscMax(in_to->n,in_from->n),&out_to->sstatus,PetscMax(in_to->n,in_from->n),&out_to->rstatus);
163: PetscMemcpy(out_to->indices,in_to->indices,ny*sizeof(PetscInt));
164: PetscMemcpy(out_to->starts,in_to->starts,(out_to->n+1)*sizeof(PetscInt));
165: PetscMemcpy(out_to->procs,in_to->procs,(out_to->n)*sizeof(PetscMPIInt));
167: out->todata = (void*)out_to;
168: out_to->local.n = in_to->local.n;
169: out_to->local.nonmatching_computed = PETSC_FALSE;
170: out_to->local.n_nonmatching = 0;
171: out_to->local.slots_nonmatching = 0;
172: if (in_to->local.n) {
173: PetscMalloc1(in_to->local.n,&out_to->local.vslots);
174: PetscMalloc1(in_from->local.n,&out_from->local.vslots);
175: PetscMemcpy(out_to->local.vslots,in_to->local.vslots,in_to->local.n*sizeof(PetscInt));
176: PetscMemcpy(out_from->local.vslots,in_from->local.vslots,in_from->local.n*sizeof(PetscInt));
177: } else {
178: out_to->local.vslots = 0;
179: out_from->local.vslots = 0;
180: }
182: /* allocate entire receive context */
183: out_from->format = in_from->format;
184: ny = in_from->starts[in_from->n];
185: out_from->n = in_from->n;
187: PetscMalloc1(out_from->n,&out_from->requests);
188: PetscMalloc4(ny*bs,&out_from->values,ny,&out_from->indices,out_from->n+1,&out_from->starts,out_from->n,&out_from->procs);
189: PetscMemcpy(out_from->indices,in_from->indices,ny*sizeof(PetscInt));
190: PetscMemcpy(out_from->starts,in_from->starts,(out_from->n+1)*sizeof(PetscInt));
191: PetscMemcpy(out_from->procs,in_from->procs,(out_from->n)*sizeof(PetscMPIInt));
193: out->fromdata = (void*)out_from;
194: out_from->local.n = in_from->local.n;
195: out_from->local.nonmatching_computed = PETSC_FALSE;
196: out_from->local.n_nonmatching = 0;
197: out_from->local.slots_nonmatching = 0;
199: /*
200: set up the request arrays for use with isend_init() and irecv_init()
201: */
202: {
203: PetscMPIInt tag;
204: MPI_Comm comm;
205: PetscInt *sstarts = out_to->starts, *rstarts = out_from->starts;
206: PetscMPIInt *sprocs = out_to->procs, *rprocs = out_from->procs;
207: PetscInt i;
208: MPI_Request *swaits = out_to->requests,*rwaits = out_from->requests;
209: MPI_Request *rev_swaits,*rev_rwaits;
210: PetscScalar *Ssvalues = out_to->values, *Srvalues = out_from->values;
212: PetscMalloc1(in_to->n,&out_to->rev_requests);
213: PetscMalloc1(in_from->n,&out_from->rev_requests);
215: rev_rwaits = out_to->rev_requests;
216: rev_swaits = out_from->rev_requests;
218: out_from->bs = out_to->bs = bs;
219: tag = ((PetscObject)out)->tag;
220: PetscObjectGetComm((PetscObject)out,&comm);
222: /* Register the receives that you will use later (sends for scatter reverse) */
223: for (i=0; i<out_from->n; i++) {
224: MPI_Recv_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);
225: MPI_Send_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tag,comm,rev_swaits+i);
226: }
227: for (i=0; i<out_to->n; i++) {
228: MPI_Send_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
229: /* Register receives for scatter reverse */
230: MPI_Recv_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,rev_rwaits+i);
231: }
232: }
234: VecScatterMemcpyPlanCopy_PtoP(in_to,in_from,out_to,out_from);
235: return(0);
236: }
238: PetscErrorCode VecScatterCopy_PtoP_AllToAll_MPI1(VecScatter in,VecScatter out)
239: {
240: VecScatter_MPI_General *in_to = (VecScatter_MPI_General*)in->todata;
241: VecScatter_MPI_General *in_from = (VecScatter_MPI_General*)in->fromdata,*out_to,*out_from;
242: PetscErrorCode ierr;
243: PetscInt ny,bs = in_from->bs;
244: PetscMPIInt size;
247: MPI_Comm_size(PetscObjectComm((PetscObject)in),&size);
249: out->ops->begin = in->ops->begin;
250: out->ops->end = in->ops->end;
251: out->ops->copy = in->ops->copy;
252: out->ops->destroy = in->ops->destroy;
253: out->ops->view = in->ops->view;
255: /* allocate entire send scatter context */
256: PetscNewLog(out,&out_to);
257: PetscNewLog(out,&out_from);
259: ny = in_to->starts[in_to->n];
260: out_to->n = in_to->n;
261: out_to->format = in_to->format;
263: PetscMalloc1(out_to->n,&out_to->requests);
264: PetscMalloc4(bs*ny,&out_to->values,ny,&out_to->indices,out_to->n+1,&out_to->starts,out_to->n,&out_to->procs);
265: PetscMalloc2(PetscMax(in_to->n,in_from->n),&out_to->sstatus,PetscMax(in_to->n,in_from->n),&out_to->rstatus);
266: PetscMemcpy(out_to->indices,in_to->indices,ny*sizeof(PetscInt));
267: PetscMemcpy(out_to->starts,in_to->starts,(out_to->n+1)*sizeof(PetscInt));
268: PetscMemcpy(out_to->procs,in_to->procs,(out_to->n)*sizeof(PetscMPIInt));
270: out->todata = (void*)out_to;
271: out_to->local.n = in_to->local.n;
272: out_to->local.nonmatching_computed = PETSC_FALSE;
273: out_to->local.n_nonmatching = 0;
274: out_to->local.slots_nonmatching = 0;
275: if (in_to->local.n) {
276: PetscMalloc1(in_to->local.n,&out_to->local.vslots);
277: PetscMalloc1(in_from->local.n,&out_from->local.vslots);
278: PetscMemcpy(out_to->local.vslots,in_to->local.vslots,in_to->local.n*sizeof(PetscInt));
279: PetscMemcpy(out_from->local.vslots,in_from->local.vslots,in_from->local.n*sizeof(PetscInt));
280: } else {
281: out_to->local.vslots = 0;
282: out_from->local.vslots = 0;
283: }
285: /* allocate entire receive context */
286: out_from->format = in_from->format;
287: ny = in_from->starts[in_from->n];
288: out_from->n = in_from->n;
290: PetscMalloc1(out_from->n,&out_from->requests);
291: PetscMalloc4(ny*bs,&out_from->values,ny,&out_from->indices,out_from->n+1,&out_from->starts,out_from->n,&out_from->procs);
292: PetscMemcpy(out_from->indices,in_from->indices,ny*sizeof(PetscInt));
293: PetscMemcpy(out_from->starts,in_from->starts,(out_from->n+1)*sizeof(PetscInt));
294: PetscMemcpy(out_from->procs,in_from->procs,(out_from->n)*sizeof(PetscMPIInt));
296: out->fromdata = (void*)out_from;
297: out_from->local.n = in_from->local.n;
298: out_from->local.nonmatching_computed = PETSC_FALSE;
299: out_from->local.n_nonmatching = 0;
300: out_from->local.slots_nonmatching = 0;
302: VecScatterMemcpyPlanCopy_PtoP(in_to,in_from,out_to,out_from);
303: return(0);
304: }
306: /* Optimize a parallel vector to parallel vector vecscatter with memory copies */
307: PetscErrorCode VecScatterMemcpyPlanCreate_PtoP(VecScatter_MPI_General *to,VecScatter_MPI_General *from)
308: {
312: VecScatterMemcpyPlanCreate_Index(to->n,to->starts,to->indices,to->bs,&to->memcpy_plan);
313: VecScatterMemcpyPlanCreate_Index(from->n,from->starts,from->indices,to->bs,&from->memcpy_plan);
314: VecScatterMemcpyPlanCreate_SGToSG(to->bs,&to->local,&from->local);
315: return(0);
316: }
318: PetscErrorCode VecScatterMemcpyPlanCopy_PtoP(const VecScatter_MPI_General *in_to,const VecScatter_MPI_General *in_from,VecScatter_MPI_General *out_to,VecScatter_MPI_General *out_from)
319: {
323: VecScatterMemcpyPlanCopy(&in_to->memcpy_plan,&out_to->memcpy_plan);
324: VecScatterMemcpyPlanCopy(&in_from->memcpy_plan,&out_from->memcpy_plan);
325: VecScatterMemcpyPlanCopy(&in_to->local.memcpy_plan,&out_to->local.memcpy_plan);
326: VecScatterMemcpyPlanCopy(&in_from->local.memcpy_plan,&out_from->local.memcpy_plan);
327: return(0);
328: }
330: PetscErrorCode VecScatterMemcpyPlanDestroy_PtoP(VecScatter_MPI_General *to,VecScatter_MPI_General *from)
331: {
335: VecScatterMemcpyPlanDestroy(&to->memcpy_plan);
336: VecScatterMemcpyPlanDestroy(&from->memcpy_plan);
337: VecScatterMemcpyPlanDestroy(&to->local.memcpy_plan);
338: VecScatterMemcpyPlanDestroy(&from->local.memcpy_plan);
339: return(0);
340: }
342: /* --------------------------------------------------------------------------------------------------
343: Packs and unpacks the message data into send or from receive buffers.
345: These could be generated automatically.
347: Fortran kernels etc. could be used.
348: */
349: PETSC_STATIC_INLINE void Pack_MPI1_1(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
350: {
351: PetscInt i;
352: for (i=0; i<n; i++) y[i] = x[indicesx[i]];
353: }
355: PETSC_STATIC_INLINE PetscErrorCode UnPack_MPI1_1(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
356: {
357: PetscInt i;
360: switch (addv) {
361: case INSERT_VALUES:
362: case INSERT_ALL_VALUES:
363: for (i=0; i<n; i++) y[indicesy[i]] = x[i];
364: break;
365: case ADD_VALUES:
366: case ADD_ALL_VALUES:
367: for (i=0; i<n; i++) y[indicesy[i]] += x[i];
368: break;
369: #if !defined(PETSC_USE_COMPLEX)
370: case MAX_VALUES:
371: for (i=0; i<n; i++) y[indicesy[i]] = PetscMax(y[indicesy[i]],x[i]);
372: #else
373: case MAX_VALUES:
374: #endif
375: case NOT_SET_VALUES:
376: break;
377: default:
378: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
379: }
380: return(0);
381: }
383: PETSC_STATIC_INLINE PetscErrorCode Scatter_MPI1_1(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
384: {
385: PetscInt i;
388: switch (addv) {
389: case INSERT_VALUES:
390: case INSERT_ALL_VALUES:
391: for (i=0; i<n; i++) y[indicesy[i]] = x[indicesx[i]];
392: break;
393: case ADD_VALUES:
394: case ADD_ALL_VALUES:
395: for (i=0; i<n; i++) y[indicesy[i]] += x[indicesx[i]];
396: break;
397: #if !defined(PETSC_USE_COMPLEX)
398: case MAX_VALUES:
399: for (i=0; i<n; i++) y[indicesy[i]] = PetscMax(y[indicesy[i]],x[indicesx[i]]);
400: #else
401: case MAX_VALUES:
402: #endif
403: case NOT_SET_VALUES:
404: break;
405: default:
406: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
407: }
408: return(0);
409: }
411: /* ----------------------------------------------------------------------------------------------- */
412: PETSC_STATIC_INLINE void Pack_MPI1_2(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
413: {
414: PetscInt i,idx;
416: for (i=0; i<n; i++) {
417: idx = *indicesx++;
418: y[0] = x[idx];
419: y[1] = x[idx+1];
420: y += 2;
421: }
422: }
424: PETSC_STATIC_INLINE PetscErrorCode UnPack_MPI1_2(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
425: {
426: PetscInt i,idy;
429: switch (addv) {
430: case INSERT_VALUES:
431: case INSERT_ALL_VALUES:
432: for (i=0; i<n; i++) {
433: idy = *indicesy++;
434: y[idy] = x[0];
435: y[idy+1] = x[1];
436: x += 2;
437: }
438: break;
439: case ADD_VALUES:
440: case ADD_ALL_VALUES:
441: for (i=0; i<n; i++) {
442: idy = *indicesy++;
443: y[idy] += x[0];
444: y[idy+1] += x[1];
445: x += 2;
446: }
447: break;
448: #if !defined(PETSC_USE_COMPLEX)
449: case MAX_VALUES:
450: for (i=0; i<n; i++) {
451: idy = *indicesy++;
452: y[idy] = PetscMax(y[idy],x[0]);
453: y[idy+1] = PetscMax(y[idy+1],x[1]);
454: x += 2;
455: }
456: #else
457: case MAX_VALUES:
458: #endif
459: case NOT_SET_VALUES:
460: break;
461: default:
462: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
463: }
464: return(0);
465: }
467: PETSC_STATIC_INLINE PetscErrorCode Scatter_MPI1_2(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
468: {
469: PetscInt i,idx,idy;
472: switch (addv) {
473: case INSERT_VALUES:
474: case INSERT_ALL_VALUES:
475: for (i=0; i<n; i++) {
476: idx = *indicesx++;
477: idy = *indicesy++;
478: y[idy] = x[idx];
479: y[idy+1] = x[idx+1];
480: }
481: break;
482: case ADD_VALUES:
483: case ADD_ALL_VALUES:
484: for (i=0; i<n; i++) {
485: idx = *indicesx++;
486: idy = *indicesy++;
487: y[idy] += x[idx];
488: y[idy+1] += x[idx+1];
489: }
490: break;
491: #if !defined(PETSC_USE_COMPLEX)
492: case MAX_VALUES:
493: for (i=0; i<n; i++) {
494: idx = *indicesx++;
495: idy = *indicesy++;
496: y[idy] = PetscMax(y[idy],x[idx]);
497: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
498: }
499: #else
500: case MAX_VALUES:
501: #endif
502: case NOT_SET_VALUES:
503: break;
504: default:
505: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
506: }
507: return(0);
508: }
509: /* ----------------------------------------------------------------------------------------------- */
510: PETSC_STATIC_INLINE void Pack_MPI1_3(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
511: {
512: PetscInt i,idx;
514: for (i=0; i<n; i++) {
515: idx = *indicesx++;
516: y[0] = x[idx];
517: y[1] = x[idx+1];
518: y[2] = x[idx+2];
519: y += 3;
520: }
521: }
522: PETSC_STATIC_INLINE PetscErrorCode UnPack_MPI1_3(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
523: {
524: PetscInt i,idy;
527: switch (addv) {
528: case INSERT_VALUES:
529: case INSERT_ALL_VALUES:
530: for (i=0; i<n; i++) {
531: idy = *indicesy++;
532: y[idy] = x[0];
533: y[idy+1] = x[1];
534: y[idy+2] = x[2];
535: x += 3;
536: }
537: break;
538: case ADD_VALUES:
539: case ADD_ALL_VALUES:
540: for (i=0; i<n; i++) {
541: idy = *indicesy++;
542: y[idy] += x[0];
543: y[idy+1] += x[1];
544: y[idy+2] += x[2];
545: x += 3;
546: }
547: break;
548: #if !defined(PETSC_USE_COMPLEX)
549: case MAX_VALUES:
550: for (i=0; i<n; i++) {
551: idy = *indicesy++;
552: y[idy] = PetscMax(y[idy],x[0]);
553: y[idy+1] = PetscMax(y[idy+1],x[1]);
554: y[idy+2] = PetscMax(y[idy+2],x[2]);
555: x += 3;
556: }
557: #else
558: case MAX_VALUES:
559: #endif
560: case NOT_SET_VALUES:
561: break;
562: default:
563: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
564: }
565: return(0);
566: }
568: PETSC_STATIC_INLINE PetscErrorCode Scatter_MPI1_3(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
569: {
570: PetscInt i,idx,idy;
573: switch (addv) {
574: case INSERT_VALUES:
575: case INSERT_ALL_VALUES:
576: for (i=0; i<n; i++) {
577: idx = *indicesx++;
578: idy = *indicesy++;
579: y[idy] = x[idx];
580: y[idy+1] = x[idx+1];
581: y[idy+2] = x[idx+2];
582: }
583: break;
584: case ADD_VALUES:
585: case ADD_ALL_VALUES:
586: for (i=0; i<n; i++) {
587: idx = *indicesx++;
588: idy = *indicesy++;
589: y[idy] += x[idx];
590: y[idy+1] += x[idx+1];
591: y[idy+2] += x[idx+2];
592: }
593: break;
594: #if !defined(PETSC_USE_COMPLEX)
595: case MAX_VALUES:
596: for (i=0; i<n; i++) {
597: idx = *indicesx++;
598: idy = *indicesy++;
599: y[idy] = PetscMax(y[idy],x[idx]);
600: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
601: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
602: }
603: #else
604: case MAX_VALUES:
605: #endif
606: case NOT_SET_VALUES:
607: break;
608: default:
609: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
610: }
611: return(0);
612: }
613: /* ----------------------------------------------------------------------------------------------- */
614: PETSC_STATIC_INLINE void Pack_MPI1_4(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
615: {
616: PetscInt i,idx;
618: for (i=0; i<n; i++) {
619: idx = *indicesx++;
620: y[0] = x[idx];
621: y[1] = x[idx+1];
622: y[2] = x[idx+2];
623: y[3] = x[idx+3];
624: y += 4;
625: }
626: }
627: PETSC_STATIC_INLINE PetscErrorCode UnPack_MPI1_4(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
628: {
629: PetscInt i,idy;
632: switch (addv) {
633: case INSERT_VALUES:
634: case INSERT_ALL_VALUES:
635: for (i=0; i<n; i++) {
636: idy = *indicesy++;
637: y[idy] = x[0];
638: y[idy+1] = x[1];
639: y[idy+2] = x[2];
640: y[idy+3] = x[3];
641: x += 4;
642: }
643: break;
644: case ADD_VALUES:
645: case ADD_ALL_VALUES:
646: for (i=0; i<n; i++) {
647: idy = *indicesy++;
648: y[idy] += x[0];
649: y[idy+1] += x[1];
650: y[idy+2] += x[2];
651: y[idy+3] += x[3];
652: x += 4;
653: }
654: break;
655: #if !defined(PETSC_USE_COMPLEX)
656: case MAX_VALUES:
657: for (i=0; i<n; i++) {
658: idy = *indicesy++;
659: y[idy] = PetscMax(y[idy],x[0]);
660: y[idy+1] = PetscMax(y[idy+1],x[1]);
661: y[idy+2] = PetscMax(y[idy+2],x[2]);
662: y[idy+3] = PetscMax(y[idy+3],x[3]);
663: x += 4;
664: }
665: #else
666: case MAX_VALUES:
667: #endif
668: case NOT_SET_VALUES:
669: break;
670: default:
671: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
672: }
673: return(0);
674: }
676: PETSC_STATIC_INLINE PetscErrorCode Scatter_MPI1_4(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
677: {
678: PetscInt i,idx,idy;
681: switch (addv) {
682: case INSERT_VALUES:
683: case INSERT_ALL_VALUES:
684: for (i=0; i<n; i++) {
685: idx = *indicesx++;
686: idy = *indicesy++;
687: y[idy] = x[idx];
688: y[idy+1] = x[idx+1];
689: y[idy+2] = x[idx+2];
690: y[idy+3] = x[idx+3];
691: }
692: break;
693: case ADD_VALUES:
694: case ADD_ALL_VALUES:
695: for (i=0; i<n; i++) {
696: idx = *indicesx++;
697: idy = *indicesy++;
698: y[idy] += x[idx];
699: y[idy+1] += x[idx+1];
700: y[idy+2] += x[idx+2];
701: y[idy+3] += x[idx+3];
702: }
703: break;
704: #if !defined(PETSC_USE_COMPLEX)
705: case MAX_VALUES:
706: for (i=0; i<n; i++) {
707: idx = *indicesx++;
708: idy = *indicesy++;
709: y[idy] = PetscMax(y[idy],x[idx]);
710: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
711: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
712: y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
713: }
714: #else
715: case MAX_VALUES:
716: #endif
717: case NOT_SET_VALUES:
718: break;
719: default:
720: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
721: }
722: return(0);
723: }
724: /* ----------------------------------------------------------------------------------------------- */
725: PETSC_STATIC_INLINE void Pack_MPI1_5(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
726: {
727: PetscInt i,idx;
729: for (i=0; i<n; i++) {
730: idx = *indicesx++;
731: y[0] = x[idx];
732: y[1] = x[idx+1];
733: y[2] = x[idx+2];
734: y[3] = x[idx+3];
735: y[4] = x[idx+4];
736: y += 5;
737: }
738: }
740: PETSC_STATIC_INLINE PetscErrorCode UnPack_MPI1_5(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
741: {
742: PetscInt i,idy;
745: switch (addv) {
746: case INSERT_VALUES:
747: case INSERT_ALL_VALUES:
748: for (i=0; i<n; i++) {
749: idy = *indicesy++;
750: y[idy] = x[0];
751: y[idy+1] = x[1];
752: y[idy+2] = x[2];
753: y[idy+3] = x[3];
754: y[idy+4] = x[4];
755: x += 5;
756: }
757: break;
758: case ADD_VALUES:
759: case ADD_ALL_VALUES:
760: for (i=0; i<n; i++) {
761: idy = *indicesy++;
762: y[idy] += x[0];
763: y[idy+1] += x[1];
764: y[idy+2] += x[2];
765: y[idy+3] += x[3];
766: y[idy+4] += x[4];
767: x += 5;
768: }
769: break;
770: #if !defined(PETSC_USE_COMPLEX)
771: case MAX_VALUES:
772: for (i=0; i<n; i++) {
773: idy = *indicesy++;
774: y[idy] = PetscMax(y[idy],x[0]);
775: y[idy+1] = PetscMax(y[idy+1],x[1]);
776: y[idy+2] = PetscMax(y[idy+2],x[2]);
777: y[idy+3] = PetscMax(y[idy+3],x[3]);
778: y[idy+4] = PetscMax(y[idy+4],x[4]);
779: x += 5;
780: }
781: #else
782: case MAX_VALUES:
783: #endif
784: case NOT_SET_VALUES:
785: break;
786: default:
787: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
788: }
789: return(0);
790: }
792: PETSC_STATIC_INLINE PetscErrorCode Scatter_MPI1_5(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
793: {
794: PetscInt i,idx,idy;
797: switch (addv) {
798: case INSERT_VALUES:
799: case INSERT_ALL_VALUES:
800: for (i=0; i<n; i++) {
801: idx = *indicesx++;
802: idy = *indicesy++;
803: y[idy] = x[idx];
804: y[idy+1] = x[idx+1];
805: y[idy+2] = x[idx+2];
806: y[idy+3] = x[idx+3];
807: y[idy+4] = x[idx+4];
808: }
809: break;
810: case ADD_VALUES:
811: case ADD_ALL_VALUES:
812: for (i=0; i<n; i++) {
813: idx = *indicesx++;
814: idy = *indicesy++;
815: y[idy] += x[idx];
816: y[idy+1] += x[idx+1];
817: y[idy+2] += x[idx+2];
818: y[idy+3] += x[idx+3];
819: y[idy+4] += x[idx+4];
820: }
821: break;
822: #if !defined(PETSC_USE_COMPLEX)
823: case MAX_VALUES:
824: for (i=0; i<n; i++) {
825: idx = *indicesx++;
826: idy = *indicesy++;
827: y[idy] = PetscMax(y[idy],x[idx]);
828: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
829: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
830: y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
831: y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
832: }
833: #else
834: case MAX_VALUES:
835: #endif
836: case NOT_SET_VALUES:
837: break;
838: default:
839: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
840: }
841: return(0);
842: }
843: /* ----------------------------------------------------------------------------------------------- */
844: PETSC_STATIC_INLINE void Pack_MPI1_6(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
845: {
846: PetscInt i,idx;
848: for (i=0; i<n; i++) {
849: idx = *indicesx++;
850: y[0] = x[idx];
851: y[1] = x[idx+1];
852: y[2] = x[idx+2];
853: y[3] = x[idx+3];
854: y[4] = x[idx+4];
855: y[5] = x[idx+5];
856: y += 6;
857: }
858: }
860: PETSC_STATIC_INLINE PetscErrorCode UnPack_MPI1_6(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
861: {
862: PetscInt i,idy;
865: switch (addv) {
866: case INSERT_VALUES:
867: case INSERT_ALL_VALUES:
868: for (i=0; i<n; i++) {
869: idy = *indicesy++;
870: y[idy] = x[0];
871: y[idy+1] = x[1];
872: y[idy+2] = x[2];
873: y[idy+3] = x[3];
874: y[idy+4] = x[4];
875: y[idy+5] = x[5];
876: x += 6;
877: }
878: break;
879: case ADD_VALUES:
880: case ADD_ALL_VALUES:
881: for (i=0; i<n; i++) {
882: idy = *indicesy++;
883: y[idy] += x[0];
884: y[idy+1] += x[1];
885: y[idy+2] += x[2];
886: y[idy+3] += x[3];
887: y[idy+4] += x[4];
888: y[idy+5] += x[5];
889: x += 6;
890: }
891: break;
892: #if !defined(PETSC_USE_COMPLEX)
893: case MAX_VALUES:
894: for (i=0; i<n; i++) {
895: idy = *indicesy++;
896: y[idy] = PetscMax(y[idy],x[0]);
897: y[idy+1] = PetscMax(y[idy+1],x[1]);
898: y[idy+2] = PetscMax(y[idy+2],x[2]);
899: y[idy+3] = PetscMax(y[idy+3],x[3]);
900: y[idy+4] = PetscMax(y[idy+4],x[4]);
901: y[idy+5] = PetscMax(y[idy+5],x[5]);
902: x += 6;
903: }
904: #else
905: case MAX_VALUES:
906: #endif
907: case NOT_SET_VALUES:
908: break;
909: default:
910: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
911: }
912: return(0);
913: }
915: PETSC_STATIC_INLINE PetscErrorCode Scatter_MPI1_6(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
916: {
917: PetscInt i,idx,idy;
920: switch (addv) {
921: case INSERT_VALUES:
922: case INSERT_ALL_VALUES:
923: for (i=0; i<n; i++) {
924: idx = *indicesx++;
925: idy = *indicesy++;
926: y[idy] = x[idx];
927: y[idy+1] = x[idx+1];
928: y[idy+2] = x[idx+2];
929: y[idy+3] = x[idx+3];
930: y[idy+4] = x[idx+4];
931: y[idy+5] = x[idx+5];
932: }
933: break;
934: case ADD_VALUES:
935: case ADD_ALL_VALUES:
936: for (i=0; i<n; i++) {
937: idx = *indicesx++;
938: idy = *indicesy++;
939: y[idy] += x[idx];
940: y[idy+1] += x[idx+1];
941: y[idy+2] += x[idx+2];
942: y[idy+3] += x[idx+3];
943: y[idy+4] += x[idx+4];
944: y[idy+5] += x[idx+5];
945: }
946: break;
947: #if !defined(PETSC_USE_COMPLEX)
948: case MAX_VALUES:
949: for (i=0; i<n; i++) {
950: idx = *indicesx++;
951: idy = *indicesy++;
952: y[idy] = PetscMax(y[idy],x[idx]);
953: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
954: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
955: y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
956: y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
957: y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
958: }
959: #else
960: case MAX_VALUES:
961: #endif
962: case NOT_SET_VALUES:
963: break;
964: default:
965: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
966: }
967: return(0);
968: }
969: /* ----------------------------------------------------------------------------------------------- */
970: PETSC_STATIC_INLINE void Pack_MPI1_7(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
971: {
972: PetscInt i,idx;
974: for (i=0; i<n; i++) {
975: idx = *indicesx++;
976: y[0] = x[idx];
977: y[1] = x[idx+1];
978: y[2] = x[idx+2];
979: y[3] = x[idx+3];
980: y[4] = x[idx+4];
981: y[5] = x[idx+5];
982: y[6] = x[idx+6];
983: y += 7;
984: }
985: }
987: PETSC_STATIC_INLINE PetscErrorCode UnPack_MPI1_7(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
988: {
989: PetscInt i,idy;
992: switch (addv) {
993: case INSERT_VALUES:
994: case INSERT_ALL_VALUES:
995: for (i=0; i<n; i++) {
996: idy = *indicesy++;
997: y[idy] = x[0];
998: y[idy+1] = x[1];
999: y[idy+2] = x[2];
1000: y[idy+3] = x[3];
1001: y[idy+4] = x[4];
1002: y[idy+5] = x[5];
1003: y[idy+6] = x[6];
1004: x += 7;
1005: }
1006: break;
1007: case ADD_VALUES:
1008: case ADD_ALL_VALUES:
1009: for (i=0; i<n; i++) {
1010: idy = *indicesy++;
1011: y[idy] += x[0];
1012: y[idy+1] += x[1];
1013: y[idy+2] += x[2];
1014: y[idy+3] += x[3];
1015: y[idy+4] += x[4];
1016: y[idy+5] += x[5];
1017: y[idy+6] += x[6];
1018: x += 7;
1019: }
1020: break;
1021: #if !defined(PETSC_USE_COMPLEX)
1022: case MAX_VALUES:
1023: for (i=0; i<n; i++) {
1024: idy = *indicesy++;
1025: y[idy] = PetscMax(y[idy],x[0]);
1026: y[idy+1] = PetscMax(y[idy+1],x[1]);
1027: y[idy+2] = PetscMax(y[idy+2],x[2]);
1028: y[idy+3] = PetscMax(y[idy+3],x[3]);
1029: y[idy+4] = PetscMax(y[idy+4],x[4]);
1030: y[idy+5] = PetscMax(y[idy+5],x[5]);
1031: y[idy+6] = PetscMax(y[idy+6],x[6]);
1032: x += 7;
1033: }
1034: #else
1035: case MAX_VALUES:
1036: #endif
1037: case NOT_SET_VALUES:
1038: break;
1039: default:
1040: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1041: }
1042: return(0);
1043: }
1045: PETSC_STATIC_INLINE PetscErrorCode Scatter_MPI1_7(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1046: {
1047: PetscInt i,idx,idy;
1050: switch (addv) {
1051: case INSERT_VALUES:
1052: case INSERT_ALL_VALUES:
1053: for (i=0; i<n; i++) {
1054: idx = *indicesx++;
1055: idy = *indicesy++;
1056: y[idy] = x[idx];
1057: y[idy+1] = x[idx+1];
1058: y[idy+2] = x[idx+2];
1059: y[idy+3] = x[idx+3];
1060: y[idy+4] = x[idx+4];
1061: y[idy+5] = x[idx+5];
1062: y[idy+6] = x[idx+6];
1063: }
1064: break;
1065: case ADD_VALUES:
1066: case ADD_ALL_VALUES:
1067: for (i=0; i<n; i++) {
1068: idx = *indicesx++;
1069: idy = *indicesy++;
1070: y[idy] += x[idx];
1071: y[idy+1] += x[idx+1];
1072: y[idy+2] += x[idx+2];
1073: y[idy+3] += x[idx+3];
1074: y[idy+4] += x[idx+4];
1075: y[idy+5] += x[idx+5];
1076: y[idy+6] += x[idx+6];
1077: }
1078: break;
1079: #if !defined(PETSC_USE_COMPLEX)
1080: case MAX_VALUES:
1081: for (i=0; i<n; i++) {
1082: idx = *indicesx++;
1083: idy = *indicesy++;
1084: y[idy] = PetscMax(y[idy],x[idx]);
1085: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
1086: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
1087: y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
1088: y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1089: y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
1090: y[idy+6] = PetscMax(y[idy+6],x[idx+6]);
1091: }
1092: #else
1093: case MAX_VALUES:
1094: #endif
1095: case NOT_SET_VALUES:
1096: break;
1097: default:
1098: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1099: }
1100: return(0);
1101: }
1102: /* ----------------------------------------------------------------------------------------------- */
1103: PETSC_STATIC_INLINE void Pack_MPI1_8(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1104: {
1105: PetscInt i,idx;
1107: for (i=0; i<n; i++) {
1108: idx = *indicesx++;
1109: y[0] = x[idx];
1110: y[1] = x[idx+1];
1111: y[2] = x[idx+2];
1112: y[3] = x[idx+3];
1113: y[4] = x[idx+4];
1114: y[5] = x[idx+5];
1115: y[6] = x[idx+6];
1116: y[7] = x[idx+7];
1117: y += 8;
1118: }
1119: }
1121: PETSC_STATIC_INLINE PetscErrorCode UnPack_MPI1_8(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1122: {
1123: PetscInt i,idy;
1126: switch (addv) {
1127: case INSERT_VALUES:
1128: case INSERT_ALL_VALUES:
1129: for (i=0; i<n; i++) {
1130: idy = *indicesy++;
1131: y[idy] = x[0];
1132: y[idy+1] = x[1];
1133: y[idy+2] = x[2];
1134: y[idy+3] = x[3];
1135: y[idy+4] = x[4];
1136: y[idy+5] = x[5];
1137: y[idy+6] = x[6];
1138: y[idy+7] = x[7];
1139: x += 8;
1140: }
1141: break;
1142: case ADD_VALUES:
1143: case ADD_ALL_VALUES:
1144: for (i=0; i<n; i++) {
1145: idy = *indicesy++;
1146: y[idy] += x[0];
1147: y[idy+1] += x[1];
1148: y[idy+2] += x[2];
1149: y[idy+3] += x[3];
1150: y[idy+4] += x[4];
1151: y[idy+5] += x[5];
1152: y[idy+6] += x[6];
1153: y[idy+7] += x[7];
1154: x += 8;
1155: }
1156: break;
1157: #if !defined(PETSC_USE_COMPLEX)
1158: case MAX_VALUES:
1159: for (i=0; i<n; i++) {
1160: idy = *indicesy++;
1161: y[idy] = PetscMax(y[idy],x[0]);
1162: y[idy+1] = PetscMax(y[idy+1],x[1]);
1163: y[idy+2] = PetscMax(y[idy+2],x[2]);
1164: y[idy+3] = PetscMax(y[idy+3],x[3]);
1165: y[idy+4] = PetscMax(y[idy+4],x[4]);
1166: y[idy+5] = PetscMax(y[idy+5],x[5]);
1167: y[idy+6] = PetscMax(y[idy+6],x[6]);
1168: y[idy+7] = PetscMax(y[idy+7],x[7]);
1169: x += 8;
1170: }
1171: #else
1172: case MAX_VALUES:
1173: #endif
1174: case NOT_SET_VALUES:
1175: break;
1176: default:
1177: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1178: }
1179: return(0);
1180: }
1182: PETSC_STATIC_INLINE PetscErrorCode Scatter_MPI1_8(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1183: {
1184: PetscInt i,idx,idy;
1187: switch (addv) {
1188: case INSERT_VALUES:
1189: case INSERT_ALL_VALUES:
1190: for (i=0; i<n; i++) {
1191: idx = *indicesx++;
1192: idy = *indicesy++;
1193: y[idy] = x[idx];
1194: y[idy+1] = x[idx+1];
1195: y[idy+2] = x[idx+2];
1196: y[idy+3] = x[idx+3];
1197: y[idy+4] = x[idx+4];
1198: y[idy+5] = x[idx+5];
1199: y[idy+6] = x[idx+6];
1200: y[idy+7] = x[idx+7];
1201: }
1202: break;
1203: case ADD_VALUES:
1204: case ADD_ALL_VALUES:
1205: for (i=0; i<n; i++) {
1206: idx = *indicesx++;
1207: idy = *indicesy++;
1208: y[idy] += x[idx];
1209: y[idy+1] += x[idx+1];
1210: y[idy+2] += x[idx+2];
1211: y[idy+3] += x[idx+3];
1212: y[idy+4] += x[idx+4];
1213: y[idy+5] += x[idx+5];
1214: y[idy+6] += x[idx+6];
1215: y[idy+7] += x[idx+7];
1216: }
1217: break;
1218: #if !defined(PETSC_USE_COMPLEX)
1219: case MAX_VALUES:
1220: for (i=0; i<n; i++) {
1221: idx = *indicesx++;
1222: idy = *indicesy++;
1223: y[idy] = PetscMax(y[idy],x[idx]);
1224: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
1225: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
1226: y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
1227: y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1228: y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
1229: y[idy+6] = PetscMax(y[idy+6],x[idx+6]);
1230: y[idy+7] = PetscMax(y[idy+7],x[idx+7]);
1231: }
1232: #else
1233: case MAX_VALUES:
1234: #endif
1235: case NOT_SET_VALUES:
1236: break;
1237: default:
1238: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1239: }
1240: return(0);
1241: }
1243: PETSC_STATIC_INLINE void Pack_MPI1_9(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1244: {
1245: PetscInt i,idx;
1247: for (i=0; i<n; i++) {
1248: idx = *indicesx++;
1249: y[0] = x[idx];
1250: y[1] = x[idx+1];
1251: y[2] = x[idx+2];
1252: y[3] = x[idx+3];
1253: y[4] = x[idx+4];
1254: y[5] = x[idx+5];
1255: y[6] = x[idx+6];
1256: y[7] = x[idx+7];
1257: y[8] = x[idx+8];
1258: y += 9;
1259: }
1260: }
1262: PETSC_STATIC_INLINE PetscErrorCode UnPack_MPI1_9(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1263: {
1264: PetscInt i,idy;
1267: switch (addv) {
1268: case INSERT_VALUES:
1269: case INSERT_ALL_VALUES:
1270: for (i=0; i<n; i++) {
1271: idy = *indicesy++;
1272: y[idy] = x[0];
1273: y[idy+1] = x[1];
1274: y[idy+2] = x[2];
1275: y[idy+3] = x[3];
1276: y[idy+4] = x[4];
1277: y[idy+5] = x[5];
1278: y[idy+6] = x[6];
1279: y[idy+7] = x[7];
1280: y[idy+8] = x[8];
1281: x += 9;
1282: }
1283: break;
1284: case ADD_VALUES:
1285: case ADD_ALL_VALUES:
1286: for (i=0; i<n; i++) {
1287: idy = *indicesy++;
1288: y[idy] += x[0];
1289: y[idy+1] += x[1];
1290: y[idy+2] += x[2];
1291: y[idy+3] += x[3];
1292: y[idy+4] += x[4];
1293: y[idy+5] += x[5];
1294: y[idy+6] += x[6];
1295: y[idy+7] += x[7];
1296: y[idy+8] += x[8];
1297: x += 9;
1298: }
1299: break;
1300: #if !defined(PETSC_USE_COMPLEX)
1301: case MAX_VALUES:
1302: for (i=0; i<n; i++) {
1303: idy = *indicesy++;
1304: y[idy] = PetscMax(y[idy],x[0]);
1305: y[idy+1] = PetscMax(y[idy+1],x[1]);
1306: y[idy+2] = PetscMax(y[idy+2],x[2]);
1307: y[idy+3] = PetscMax(y[idy+3],x[3]);
1308: y[idy+4] = PetscMax(y[idy+4],x[4]);
1309: y[idy+5] = PetscMax(y[idy+5],x[5]);
1310: y[idy+6] = PetscMax(y[idy+6],x[6]);
1311: y[idy+7] = PetscMax(y[idy+7],x[7]);
1312: y[idy+8] = PetscMax(y[idy+8],x[8]);
1313: x += 9;
1314: }
1315: #else
1316: case MAX_VALUES:
1317: #endif
1318: case NOT_SET_VALUES:
1319: break;
1320: default:
1321: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1322: }
1323: return(0);
1324: }
1326: PETSC_STATIC_INLINE PetscErrorCode Scatter_MPI1_9(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1327: {
1328: PetscInt i,idx,idy;
1331: switch (addv) {
1332: case INSERT_VALUES:
1333: case INSERT_ALL_VALUES:
1334: for (i=0; i<n; i++) {
1335: idx = *indicesx++;
1336: idy = *indicesy++;
1337: y[idy] = x[idx];
1338: y[idy+1] = x[idx+1];
1339: y[idy+2] = x[idx+2];
1340: y[idy+3] = x[idx+3];
1341: y[idy+4] = x[idx+4];
1342: y[idy+5] = x[idx+5];
1343: y[idy+6] = x[idx+6];
1344: y[idy+7] = x[idx+7];
1345: y[idy+8] = x[idx+8];
1346: }
1347: break;
1348: case ADD_VALUES:
1349: case ADD_ALL_VALUES:
1350: for (i=0; i<n; i++) {
1351: idx = *indicesx++;
1352: idy = *indicesy++;
1353: y[idy] += x[idx];
1354: y[idy+1] += x[idx+1];
1355: y[idy+2] += x[idx+2];
1356: y[idy+3] += x[idx+3];
1357: y[idy+4] += x[idx+4];
1358: y[idy+5] += x[idx+5];
1359: y[idy+6] += x[idx+6];
1360: y[idy+7] += x[idx+7];
1361: y[idy+8] += x[idx+8];
1362: }
1363: break;
1364: #if !defined(PETSC_USE_COMPLEX)
1365: case MAX_VALUES:
1366: for (i=0; i<n; i++) {
1367: idx = *indicesx++;
1368: idy = *indicesy++;
1369: y[idy] = PetscMax(y[idy],x[idx]);
1370: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
1371: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
1372: y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
1373: y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1374: y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
1375: y[idy+6] = PetscMax(y[idy+6],x[idx+6]);
1376: y[idy+7] = PetscMax(y[idy+7],x[idx+7]);
1377: y[idy+8] = PetscMax(y[idy+8],x[idx+8]);
1378: }
1379: #else
1380: case MAX_VALUES:
1381: #endif
1382: case NOT_SET_VALUES:
1383: break;
1384: default:
1385: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1386: }
1387: return(0);
1388: }
1390: PETSC_STATIC_INLINE void Pack_MPI1_10(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1391: {
1392: PetscInt i,idx;
1394: for (i=0; i<n; i++) {
1395: idx = *indicesx++;
1396: y[0] = x[idx];
1397: y[1] = x[idx+1];
1398: y[2] = x[idx+2];
1399: y[3] = x[idx+3];
1400: y[4] = x[idx+4];
1401: y[5] = x[idx+5];
1402: y[6] = x[idx+6];
1403: y[7] = x[idx+7];
1404: y[8] = x[idx+8];
1405: y[9] = x[idx+9];
1406: y += 10;
1407: }
1408: }
1410: PETSC_STATIC_INLINE PetscErrorCode UnPack_MPI1_10(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1411: {
1412: PetscInt i,idy;
1415: switch (addv) {
1416: case INSERT_VALUES:
1417: case INSERT_ALL_VALUES:
1418: for (i=0; i<n; i++) {
1419: idy = *indicesy++;
1420: y[idy] = x[0];
1421: y[idy+1] = x[1];
1422: y[idy+2] = x[2];
1423: y[idy+3] = x[3];
1424: y[idy+4] = x[4];
1425: y[idy+5] = x[5];
1426: y[idy+6] = x[6];
1427: y[idy+7] = x[7];
1428: y[idy+8] = x[8];
1429: y[idy+9] = x[9];
1430: x += 10;
1431: }
1432: break;
1433: case ADD_VALUES:
1434: case ADD_ALL_VALUES:
1435: for (i=0; i<n; i++) {
1436: idy = *indicesy++;
1437: y[idy] += x[0];
1438: y[idy+1] += x[1];
1439: y[idy+2] += x[2];
1440: y[idy+3] += x[3];
1441: y[idy+4] += x[4];
1442: y[idy+5] += x[5];
1443: y[idy+6] += x[6];
1444: y[idy+7] += x[7];
1445: y[idy+8] += x[8];
1446: y[idy+9] += x[9];
1447: x += 10;
1448: }
1449: break;
1450: #if !defined(PETSC_USE_COMPLEX)
1451: case MAX_VALUES:
1452: for (i=0; i<n; i++) {
1453: idy = *indicesy++;
1454: y[idy] = PetscMax(y[idy],x[0]);
1455: y[idy+1] = PetscMax(y[idy+1],x[1]);
1456: y[idy+2] = PetscMax(y[idy+2],x[2]);
1457: y[idy+3] = PetscMax(y[idy+3],x[3]);
1458: y[idy+4] = PetscMax(y[idy+4],x[4]);
1459: y[idy+5] = PetscMax(y[idy+5],x[5]);
1460: y[idy+6] = PetscMax(y[idy+6],x[6]);
1461: y[idy+7] = PetscMax(y[idy+7],x[7]);
1462: y[idy+8] = PetscMax(y[idy+8],x[8]);
1463: y[idy+9] = PetscMax(y[idy+9],x[9]);
1464: x += 10;
1465: }
1466: #else
1467: case MAX_VALUES:
1468: #endif
1469: case NOT_SET_VALUES:
1470: break;
1471: default:
1472: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1473: }
1474: return(0);
1475: }
1477: PETSC_STATIC_INLINE PetscErrorCode Scatter_MPI1_10(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1478: {
1479: PetscInt i,idx,idy;
1482: switch (addv) {
1483: case INSERT_VALUES:
1484: case INSERT_ALL_VALUES:
1485: for (i=0; i<n; i++) {
1486: idx = *indicesx++;
1487: idy = *indicesy++;
1488: y[idy] = x[idx];
1489: y[idy+1] = x[idx+1];
1490: y[idy+2] = x[idx+2];
1491: y[idy+3] = x[idx+3];
1492: y[idy+4] = x[idx+4];
1493: y[idy+5] = x[idx+5];
1494: y[idy+6] = x[idx+6];
1495: y[idy+7] = x[idx+7];
1496: y[idy+8] = x[idx+8];
1497: y[idy+9] = x[idx+9];
1498: }
1499: break;
1500: case ADD_VALUES:
1501: case ADD_ALL_VALUES:
1502: for (i=0; i<n; i++) {
1503: idx = *indicesx++;
1504: idy = *indicesy++;
1505: y[idy] += x[idx];
1506: y[idy+1] += x[idx+1];
1507: y[idy+2] += x[idx+2];
1508: y[idy+3] += x[idx+3];
1509: y[idy+4] += x[idx+4];
1510: y[idy+5] += x[idx+5];
1511: y[idy+6] += x[idx+6];
1512: y[idy+7] += x[idx+7];
1513: y[idy+8] += x[idx+8];
1514: y[idy+9] += x[idx+9];
1515: }
1516: break;
1517: #if !defined(PETSC_USE_COMPLEX)
1518: case MAX_VALUES:
1519: for (i=0; i<n; i++) {
1520: idx = *indicesx++;
1521: idy = *indicesy++;
1522: y[idy] = PetscMax(y[idy],x[idx]);
1523: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
1524: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
1525: y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
1526: y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1527: y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
1528: y[idy+6] = PetscMax(y[idy+6],x[idx+6]);
1529: y[idy+7] = PetscMax(y[idy+7],x[idx+7]);
1530: y[idy+8] = PetscMax(y[idy+8],x[idx+8]);
1531: y[idy+9] = PetscMax(y[idy+9],x[idx+9]);
1532: }
1533: #else
1534: case MAX_VALUES:
1535: #endif
1536: case NOT_SET_VALUES:
1537: break;
1538: default:
1539: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1540: }
1541: return(0);
1542: }
1544: PETSC_STATIC_INLINE void Pack_MPI1_11(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1545: {
1546: PetscInt i,idx;
1548: for (i=0; i<n; i++) {
1549: idx = *indicesx++;
1550: y[0] = x[idx];
1551: y[1] = x[idx+1];
1552: y[2] = x[idx+2];
1553: y[3] = x[idx+3];
1554: y[4] = x[idx+4];
1555: y[5] = x[idx+5];
1556: y[6] = x[idx+6];
1557: y[7] = x[idx+7];
1558: y[8] = x[idx+8];
1559: y[9] = x[idx+9];
1560: y[10] = x[idx+10];
1561: y += 11;
1562: }
1563: }
1565: PETSC_STATIC_INLINE PetscErrorCode UnPack_MPI1_11(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1566: {
1567: PetscInt i,idy;
1570: switch (addv) {
1571: case INSERT_VALUES:
1572: case INSERT_ALL_VALUES:
1573: for (i=0; i<n; i++) {
1574: idy = *indicesy++;
1575: y[idy] = x[0];
1576: y[idy+1] = x[1];
1577: y[idy+2] = x[2];
1578: y[idy+3] = x[3];
1579: y[idy+4] = x[4];
1580: y[idy+5] = x[5];
1581: y[idy+6] = x[6];
1582: y[idy+7] = x[7];
1583: y[idy+8] = x[8];
1584: y[idy+9] = x[9];
1585: y[idy+10] = x[10];
1586: x += 11;
1587: }
1588: break;
1589: case ADD_VALUES:
1590: case ADD_ALL_VALUES:
1591: for (i=0; i<n; i++) {
1592: idy = *indicesy++;
1593: y[idy] += x[0];
1594: y[idy+1] += x[1];
1595: y[idy+2] += x[2];
1596: y[idy+3] += x[3];
1597: y[idy+4] += x[4];
1598: y[idy+5] += x[5];
1599: y[idy+6] += x[6];
1600: y[idy+7] += x[7];
1601: y[idy+8] += x[8];
1602: y[idy+9] += x[9];
1603: y[idy+10] += x[10];
1604: x += 11;
1605: }
1606: break;
1607: #if !defined(PETSC_USE_COMPLEX)
1608: case MAX_VALUES:
1609: for (i=0; i<n; i++) {
1610: idy = *indicesy++;
1611: y[idy] = PetscMax(y[idy],x[0]);
1612: y[idy+1] = PetscMax(y[idy+1],x[1]);
1613: y[idy+2] = PetscMax(y[idy+2],x[2]);
1614: y[idy+3] = PetscMax(y[idy+3],x[3]);
1615: y[idy+4] = PetscMax(y[idy+4],x[4]);
1616: y[idy+5] = PetscMax(y[idy+5],x[5]);
1617: y[idy+6] = PetscMax(y[idy+6],x[6]);
1618: y[idy+7] = PetscMax(y[idy+7],x[7]);
1619: y[idy+8] = PetscMax(y[idy+8],x[8]);
1620: y[idy+9] = PetscMax(y[idy+9],x[9]);
1621: y[idy+10] = PetscMax(y[idy+10],x[10]);
1622: x += 11;
1623: }
1624: #else
1625: case MAX_VALUES:
1626: #endif
1627: case NOT_SET_VALUES:
1628: break;
1629: default:
1630: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1631: }
1632: return(0);
1633: }
1635: PETSC_STATIC_INLINE PetscErrorCode Scatter_MPI1_11(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1636: {
1637: PetscInt i,idx,idy;
1640: switch (addv) {
1641: case INSERT_VALUES:
1642: case INSERT_ALL_VALUES:
1643: for (i=0; i<n; i++) {
1644: idx = *indicesx++;
1645: idy = *indicesy++;
1646: y[idy] = x[idx];
1647: y[idy+1] = x[idx+1];
1648: y[idy+2] = x[idx+2];
1649: y[idy+3] = x[idx+3];
1650: y[idy+4] = x[idx+4];
1651: y[idy+5] = x[idx+5];
1652: y[idy+6] = x[idx+6];
1653: y[idy+7] = x[idx+7];
1654: y[idy+8] = x[idx+8];
1655: y[idy+9] = x[idx+9];
1656: y[idy+10] = x[idx+10];
1657: }
1658: break;
1659: case ADD_VALUES:
1660: case ADD_ALL_VALUES:
1661: for (i=0; i<n; i++) {
1662: idx = *indicesx++;
1663: idy = *indicesy++;
1664: y[idy] += x[idx];
1665: y[idy+1] += x[idx+1];
1666: y[idy+2] += x[idx+2];
1667: y[idy+3] += x[idx+3];
1668: y[idy+4] += x[idx+4];
1669: y[idy+5] += x[idx+5];
1670: y[idy+6] += x[idx+6];
1671: y[idy+7] += x[idx+7];
1672: y[idy+8] += x[idx+8];
1673: y[idy+9] += x[idx+9];
1674: y[idy+10] += x[idx+10];
1675: }
1676: break;
1677: #if !defined(PETSC_USE_COMPLEX)
1678: case MAX_VALUES:
1679: for (i=0; i<n; i++) {
1680: idx = *indicesx++;
1681: idy = *indicesy++;
1682: y[idy] = PetscMax(y[idy],x[idx]);
1683: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
1684: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
1685: y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
1686: y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1687: y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
1688: y[idy+6] = PetscMax(y[idy+6],x[idx+6]);
1689: y[idy+7] = PetscMax(y[idy+7],x[idx+7]);
1690: y[idy+8] = PetscMax(y[idy+8],x[idx+8]);
1691: y[idy+9] = PetscMax(y[idy+9],x[idx+9]);
1692: y[idy+10] = PetscMax(y[idy+10],x[idx+10]);
1693: }
1694: #else
1695: case MAX_VALUES:
1696: #endif
1697: case NOT_SET_VALUES:
1698: break;
1699: default:
1700: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1701: }
1702: return(0);
1703: }
1705: /* ----------------------------------------------------------------------------------------------- */
1706: PETSC_STATIC_INLINE void Pack_MPI1_12(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1707: {
1708: PetscInt i,idx;
1710: for (i=0; i<n; i++) {
1711: idx = *indicesx++;
1712: y[0] = x[idx];
1713: y[1] = x[idx+1];
1714: y[2] = x[idx+2];
1715: y[3] = x[idx+3];
1716: y[4] = x[idx+4];
1717: y[5] = x[idx+5];
1718: y[6] = x[idx+6];
1719: y[7] = x[idx+7];
1720: y[8] = x[idx+8];
1721: y[9] = x[idx+9];
1722: y[10] = x[idx+10];
1723: y[11] = x[idx+11];
1724: y += 12;
1725: }
1726: }
1728: PETSC_STATIC_INLINE PetscErrorCode UnPack_MPI1_12(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1729: {
1730: PetscInt i,idy;
1733: switch (addv) {
1734: case INSERT_VALUES:
1735: case INSERT_ALL_VALUES:
1736: for (i=0; i<n; i++) {
1737: idy = *indicesy++;
1738: y[idy] = x[0];
1739: y[idy+1] = x[1];
1740: y[idy+2] = x[2];
1741: y[idy+3] = x[3];
1742: y[idy+4] = x[4];
1743: y[idy+5] = x[5];
1744: y[idy+6] = x[6];
1745: y[idy+7] = x[7];
1746: y[idy+8] = x[8];
1747: y[idy+9] = x[9];
1748: y[idy+10] = x[10];
1749: y[idy+11] = x[11];
1750: x += 12;
1751: }
1752: break;
1753: case ADD_VALUES:
1754: case ADD_ALL_VALUES:
1755: for (i=0; i<n; i++) {
1756: idy = *indicesy++;
1757: y[idy] += x[0];
1758: y[idy+1] += x[1];
1759: y[idy+2] += x[2];
1760: y[idy+3] += x[3];
1761: y[idy+4] += x[4];
1762: y[idy+5] += x[5];
1763: y[idy+6] += x[6];
1764: y[idy+7] += x[7];
1765: y[idy+8] += x[8];
1766: y[idy+9] += x[9];
1767: y[idy+10] += x[10];
1768: y[idy+11] += x[11];
1769: x += 12;
1770: }
1771: break;
1772: #if !defined(PETSC_USE_COMPLEX)
1773: case MAX_VALUES:
1774: for (i=0; i<n; i++) {
1775: idy = *indicesy++;
1776: y[idy] = PetscMax(y[idy],x[0]);
1777: y[idy+1] = PetscMax(y[idy+1],x[1]);
1778: y[idy+2] = PetscMax(y[idy+2],x[2]);
1779: y[idy+3] = PetscMax(y[idy+3],x[3]);
1780: y[idy+4] = PetscMax(y[idy+4],x[4]);
1781: y[idy+5] = PetscMax(y[idy+5],x[5]);
1782: y[idy+6] = PetscMax(y[idy+6],x[6]);
1783: y[idy+7] = PetscMax(y[idy+7],x[7]);
1784: y[idy+8] = PetscMax(y[idy+8],x[8]);
1785: y[idy+9] = PetscMax(y[idy+9],x[9]);
1786: y[idy+10] = PetscMax(y[idy+10],x[10]);
1787: y[idy+11] = PetscMax(y[idy+11],x[11]);
1788: x += 12;
1789: }
1790: #else
1791: case MAX_VALUES:
1792: #endif
1793: case NOT_SET_VALUES:
1794: break;
1795: default:
1796: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1797: }
1798: return(0);
1799: }
1801: PETSC_STATIC_INLINE PetscErrorCode Scatter_MPI1_12(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1802: {
1803: PetscInt i,idx,idy;
1806: switch (addv) {
1807: case INSERT_VALUES:
1808: case INSERT_ALL_VALUES:
1809: for (i=0; i<n; i++) {
1810: idx = *indicesx++;
1811: idy = *indicesy++;
1812: y[idy] = x[idx];
1813: y[idy+1] = x[idx+1];
1814: y[idy+2] = x[idx+2];
1815: y[idy+3] = x[idx+3];
1816: y[idy+4] = x[idx+4];
1817: y[idy+5] = x[idx+5];
1818: y[idy+6] = x[idx+6];
1819: y[idy+7] = x[idx+7];
1820: y[idy+8] = x[idx+8];
1821: y[idy+9] = x[idx+9];
1822: y[idy+10] = x[idx+10];
1823: y[idy+11] = x[idx+11];
1824: }
1825: break;
1826: case ADD_VALUES:
1827: case ADD_ALL_VALUES:
1828: for (i=0; i<n; i++) {
1829: idx = *indicesx++;
1830: idy = *indicesy++;
1831: y[idy] += x[idx];
1832: y[idy+1] += x[idx+1];
1833: y[idy+2] += x[idx+2];
1834: y[idy+3] += x[idx+3];
1835: y[idy+4] += x[idx+4];
1836: y[idy+5] += x[idx+5];
1837: y[idy+6] += x[idx+6];
1838: y[idy+7] += x[idx+7];
1839: y[idy+8] += x[idx+8];
1840: y[idy+9] += x[idx+9];
1841: y[idy+10] += x[idx+10];
1842: y[idy+11] += x[idx+11];
1843: }
1844: break;
1845: #if !defined(PETSC_USE_COMPLEX)
1846: case MAX_VALUES:
1847: for (i=0; i<n; i++) {
1848: idx = *indicesx++;
1849: idy = *indicesy++;
1850: y[idy] = PetscMax(y[idy],x[idx]);
1851: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
1852: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
1853: y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
1854: y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1855: y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
1856: y[idy+6] = PetscMax(y[idy+6],x[idx+6]);
1857: y[idy+7] = PetscMax(y[idy+7],x[idx+7]);
1858: y[idy+8] = PetscMax(y[idy+8],x[idx+8]);
1859: y[idy+9] = PetscMax(y[idy+9],x[idx+9]);
1860: y[idy+10] = PetscMax(y[idy+10],x[idx+10]);
1861: y[idy+11] = PetscMax(y[idy+11],x[idx+11]);
1862: }
1863: #else
1864: case MAX_VALUES:
1865: #endif
1866: case NOT_SET_VALUES:
1867: break;
1868: default:
1869: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1870: }
1871: return(0);
1872: }
1874: /* ----------------------------------------------------------------------------------------------- */
1875: PETSC_STATIC_INLINE void Pack_MPI1_bs(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1876: {
1877: PetscInt i,idx;
1879: for (i=0; i<n; i++) {
1880: idx = *indicesx++;
1881: PetscMemcpy(y,x + idx,bs*sizeof(PetscScalar));
1882: y += bs;
1883: }
1884: }
1886: PETSC_STATIC_INLINE PetscErrorCode UnPack_MPI1_bs(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1887: {
1888: PetscInt i,idy,j;
1891: switch (addv) {
1892: case INSERT_VALUES:
1893: case INSERT_ALL_VALUES:
1894: for (i=0; i<n; i++) {
1895: idy = *indicesy++;
1896: PetscMemcpy(y + idy,x,bs*sizeof(PetscScalar));
1897: x += bs;
1898: }
1899: break;
1900: case ADD_VALUES:
1901: case ADD_ALL_VALUES:
1902: for (i=0; i<n; i++) {
1903: idy = *indicesy++;
1904: for (j=0; j<bs; j++) y[idy+j] += x[j];
1905: x += bs;
1906: }
1907: break;
1908: #if !defined(PETSC_USE_COMPLEX)
1909: case MAX_VALUES:
1910: for (i=0; i<n; i++) {
1911: idy = *indicesy++;
1912: for (j=0; j<bs; j++) y[idy+j] = PetscMax(y[idy+j],x[j]);
1913: x += bs;
1914: }
1915: #else
1916: case MAX_VALUES:
1917: #endif
1918: case NOT_SET_VALUES:
1919: break;
1920: default:
1921: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1922: }
1923: return(0);
1924: }
1926: PETSC_STATIC_INLINE PetscErrorCode Scatter_MPI1_bs(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1927: {
1928: PetscInt i,idx,idy,j;
1931: switch (addv) {
1932: case INSERT_VALUES:
1933: case INSERT_ALL_VALUES:
1934: for (i=0; i<n; i++) {
1935: idx = *indicesx++;
1936: idy = *indicesy++;
1937: PetscMemcpy(y + idy, x + idx,bs*sizeof(PetscScalar));
1938: }
1939: break;
1940: case ADD_VALUES:
1941: case ADD_ALL_VALUES:
1942: for (i=0; i<n; i++) {
1943: idx = *indicesx++;
1944: idy = *indicesy++;
1945: for (j=0; j<bs; j++ ) y[idy+j] += x[idx+j];
1946: }
1947: break;
1948: #if !defined(PETSC_USE_COMPLEX)
1949: case MAX_VALUES:
1950: for (i=0; i<n; i++) {
1951: idx = *indicesx++;
1952: idy = *indicesy++;
1953: for (j=0; j<bs; j++ ) y[idy+j] = PetscMax(y[idy+j],x[idx+j]);
1954: }
1955: #else
1956: case MAX_VALUES:
1957: #endif
1958: case NOT_SET_VALUES:
1959: break;
1960: default:
1961: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1962: }
1963: return(0);
1964: }
1966: /* Create the VecScatterBegin/End_P for our chosen block sizes */
1967: #define BS 1
1968: #include <../src/vec/vscat/impls/mpi1/vpscat_mpi1.h>
1969: #define BS 2
1970: #include <../src/vec/vscat/impls/mpi1/vpscat_mpi1.h>
1971: #define BS 3
1972: #include <../src/vec/vscat/impls/mpi1/vpscat_mpi1.h>
1973: #define BS 4
1974: #include <../src/vec/vscat/impls/mpi1/vpscat_mpi1.h>
1975: #define BS 5
1976: #include <../src/vec/vscat/impls/mpi1/vpscat_mpi1.h>
1977: #define BS 6
1978: #include <../src/vec/vscat/impls/mpi1/vpscat_mpi1.h>
1979: #define BS 7
1980: #include <../src/vec/vscat/impls/mpi1/vpscat_mpi1.h>
1981: #define BS 8
1982: #include <../src/vec/vscat/impls/mpi1/vpscat_mpi1.h>
1983: #define BS 9
1984: #include <../src/vec/vscat/impls/mpi1/vpscat_mpi1.h>
1985: #define BS 10
1986: #include <../src/vec/vscat/impls/mpi1/vpscat_mpi1.h>
1987: #define BS 11
1988: #include <../src/vec/vscat/impls/mpi1/vpscat_mpi1.h>
1989: #define BS 12
1990: #include <../src/vec/vscat/impls/mpi1/vpscat_mpi1.h>
1991: #define BS bs
1992: #include <../src/vec/vscat/impls/mpi1/vpscat_mpi1.h>
1994: /* ==========================================================================================*/
1996: /* create parallel to sequential scatter context */
1998: PetscErrorCode VecScatterCreateCommon_PtoS_MPI1(VecScatter_MPI_General*,VecScatter_MPI_General*,VecScatter);
2000: /*
2001: bs indicates how many elements there are in each block. Normally this would be 1.
2003: contains check that PetscMPIInt can handle the sizes needed
2004: */
2005: PetscErrorCode VecScatterCreateLocal_PtoS_MPI1(PetscInt nx,const PetscInt *inidx,PetscInt ny,const PetscInt *inidy,Vec xin,Vec yin,PetscInt bs,VecScatter ctx)
2006: {
2007: VecScatter_MPI_General *from,*to;
2008: PetscMPIInt size,rank,imdex,tag,n;
2009: PetscInt *source = NULL,*owners = NULL,nxr;
2010: PetscInt *lowner = NULL,*start = NULL,lengthy,lengthx;
2011: PetscMPIInt *nprocs = NULL,nrecvs;
2012: PetscInt i,j,idx,nsends;
2013: PetscMPIInt *owner = NULL;
2014: PetscInt *starts = NULL,count,slen;
2015: PetscInt *rvalues,*svalues,base,*values,nprocslocal,recvtotal,*rsvalues;
2016: PetscMPIInt *onodes1,*olengths1;
2017: MPI_Comm comm;
2018: MPI_Request *send_waits = NULL,*recv_waits = NULL;
2019: MPI_Status recv_status,*send_status;
2020: PetscErrorCode ierr;
2023: PetscObjectGetNewTag((PetscObject)ctx,&tag);
2024: PetscObjectGetComm((PetscObject)xin,&comm);
2025: MPI_Comm_rank(comm,&rank);
2026: MPI_Comm_size(comm,&size);
2027: owners = xin->map->range;
2028: VecGetSize(yin,&lengthy);
2029: VecGetSize(xin,&lengthx);
2031: /* first count number of contributors to each processor */
2032: /* owner[i]: owner of ith inidx; nproc[j]: num of inidx to be sent to jth proc */
2033: PetscMalloc2(size,&nprocs,nx,&owner);
2034: PetscMemzero(nprocs,size*sizeof(PetscMPIInt));
2036: j = 0;
2037: nsends = 0;
2038: for (i=0; i<nx; i++) {
2039: idx = bs*inidx[i];
2040: if (idx < owners[j]) j = 0;
2041: for (; j<size; j++) {
2042: if (idx < owners[j+1]) {
2043: if (!nprocs[j]++) nsends++;
2044: owner[i] = j;
2045: break;
2046: }
2047: }
2048: if (j == size) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"ith %D block entry %D not owned by any process, upper bound %D",i,idx,owners[size]);
2049: }
2051: nprocslocal = nprocs[rank];
2052: nprocs[rank] = 0;
2053: if (nprocslocal) nsends--;
2054: /* inform other processors of number of messages and max length*/
2055: PetscGatherNumberOfMessages(comm,NULL,nprocs,&nrecvs);
2056: PetscGatherMessageLengths(comm,nsends,nrecvs,nprocs,&onodes1,&olengths1);
2057: PetscSortMPIIntWithArray(nrecvs,onodes1,olengths1);
2058: recvtotal = 0; for (i=0; i<nrecvs; i++) recvtotal += olengths1[i];
2060: /* post receives: */
2061: PetscMalloc3(recvtotal,&rvalues,nrecvs,&source,nrecvs,&recv_waits);
2062: count = 0;
2063: for (i=0; i<nrecvs; i++) {
2064: MPI_Irecv((rvalues+count),olengths1[i],MPIU_INT,onodes1[i],tag,comm,recv_waits+i);
2065: count += olengths1[i];
2066: }
2068: /* do sends:
2069: 1) starts[i] gives the starting index in svalues for stuff going to
2070: the ith processor
2071: */
2072: nxr = 0;
2073: for (i=0; i<nx; i++) {
2074: if (owner[i] != rank) nxr++;
2075: }
2076: PetscMalloc3(nxr,&svalues,nsends,&send_waits,size+1,&starts);
2078: starts[0] = 0;
2079: for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[i-1];
2080: for (i=0; i<nx; i++) {
2081: if (owner[i] != rank) svalues[starts[owner[i]]++] = bs*inidx[i];
2082: }
2083: starts[0] = 0;
2084: for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[i-1];
2085: count = 0;
2086: for (i=0; i<size; i++) {
2087: if (nprocs[i]) {
2088: MPI_Isend(svalues+starts[i],nprocs[i],MPIU_INT,i,tag,comm,send_waits+count++);
2089: }
2090: }
2092: /* wait on receives */
2093: count = nrecvs;
2094: slen = 0;
2095: while (count) {
2096: MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
2097: /* unpack receives into our local space */
2098: MPI_Get_count(&recv_status,MPIU_INT,&n);
2099: slen += n;
2100: count--;
2101: }
2103: if (slen != recvtotal) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Total message lengths %D not expected %D",slen,recvtotal);
2105: /* allocate entire send scatter context */
2106: PetscNewLog(ctx,&to);
2107: to->n = nrecvs;
2109: PetscMalloc1(nrecvs,&to->requests);
2110: PetscMalloc4(bs*slen,&to->values,slen,&to->indices,nrecvs+1,&to->starts,nrecvs,&to->procs);
2111: PetscMalloc2(PetscMax(to->n,nsends),&to->sstatus,PetscMax(to->n,nsends),&to->rstatus);
2113: ctx->todata = (void*)to;
2114: to->starts[0] = 0;
2116: if (nrecvs) {
2117: /* move the data into the send scatter */
2118: base = owners[rank];
2119: rsvalues = rvalues;
2120: for (i=0; i<nrecvs; i++) {
2121: to->starts[i+1] = to->starts[i] + olengths1[i];
2122: to->procs[i] = onodes1[i];
2123: values = rsvalues;
2124: rsvalues += olengths1[i];
2125: for (j=0; j<olengths1[i]; j++) to->indices[to->starts[i] + j] = values[j] - base;
2126: }
2127: }
2128: PetscFree(olengths1);
2129: PetscFree(onodes1);
2130: PetscFree3(rvalues,source,recv_waits);
2132: /* allocate entire receive scatter context */
2133: PetscNewLog(ctx,&from);
2134: from->n = nsends;
2136: PetscMalloc1(nsends,&from->requests);
2137: PetscMalloc4((ny-nprocslocal)*bs,&from->values,ny-nprocslocal,&from->indices,nsends+1,&from->starts,from->n,&from->procs);
2138: ctx->fromdata = (void*)from;
2140: /* move data into receive scatter */
2141: PetscMalloc2(size,&lowner,nsends+1,&start);
2142: count = 0; from->starts[0] = start[0] = 0;
2143: for (i=0; i<size; i++) {
2144: if (nprocs[i]) {
2145: lowner[i] = count;
2146: from->procs[count++] = i;
2147: from->starts[count] = start[count] = start[count-1] + nprocs[i];
2148: }
2149: }
2151: for (i=0; i<nx; i++) {
2152: if (owner[i] != rank) {
2153: from->indices[start[lowner[owner[i]]]++] = bs*inidy[i];
2154: if (bs*inidy[i] >= lengthy) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Scattering past end of TO vector");
2155: }
2156: }
2157: PetscFree2(lowner,start);
2158: PetscFree2(nprocs,owner);
2160: /* wait on sends */
2161: if (nsends) {
2162: PetscMalloc1(nsends,&send_status);
2163: MPI_Waitall(nsends,send_waits,send_status);
2164: PetscFree(send_status);
2165: }
2166: PetscFree3(svalues,send_waits,starts);
2168: if (nprocslocal) {
2169: PetscInt nt = from->local.n = to->local.n = nprocslocal;
2170: /* we have a scatter to ourselves */
2171: PetscMalloc1(nt,&to->local.vslots);
2172: PetscMalloc1(nt,&from->local.vslots);
2173: nt = 0;
2174: for (i=0; i<nx; i++) {
2175: idx = bs*inidx[i];
2176: if (idx >= owners[rank] && idx < owners[rank+1]) {
2177: to->local.vslots[nt] = idx - owners[rank];
2178: from->local.vslots[nt++] = bs*inidy[i];
2179: if (bs*inidy[i] >= lengthy) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Scattering past end of TO vector");
2180: }
2181: }
2182: PetscLogObjectMemory((PetscObject)ctx,2*nt*sizeof(PetscInt));
2183: } else {
2184: from->local.n = 0;
2185: from->local.vslots = 0;
2186: to->local.n = 0;
2187: to->local.vslots = 0;
2188: }
2190: from->local.nonmatching_computed = PETSC_FALSE;
2191: from->local.n_nonmatching = 0;
2192: from->local.slots_nonmatching = 0;
2193: to->local.nonmatching_computed = PETSC_FALSE;
2194: to->local.n_nonmatching = 0;
2195: to->local.slots_nonmatching = 0;
2197: from->format = VEC_SCATTER_MPI_GENERAL;
2198: to->format = VEC_SCATTER_MPI_GENERAL;
2199: from->bs = bs;
2200: to->bs = bs;
2202: VecScatterCreateCommon_PtoS_MPI1(from,to,ctx);
2203: return(0);
2204: }
2206: /*
2207: bs indicates how many elements there are in each block. Normally this would be 1.
2208: */
2209: PetscErrorCode VecScatterCreateCommon_PtoS_MPI1(VecScatter_MPI_General *from,VecScatter_MPI_General *to,VecScatter ctx)
2210: {
2211: MPI_Comm comm;
2212: PetscMPIInt tag = ((PetscObject)ctx)->tag, tagr;
2213: PetscInt bs = to->bs;
2214: PetscMPIInt size;
2215: PetscInt i, n;
2219: PetscObjectGetComm((PetscObject)ctx,&comm);
2220: PetscObjectGetNewTag((PetscObject)ctx,&tagr);
2221: ctx->ops->destroy = VecScatterDestroy_PtoP_MPI1;
2223: MPI_Comm_size(comm,&size);
2224: /* check if the receives are ALL going into contiguous locations; if so can skip indexing */
2225: to->contiq = PETSC_FALSE;
2226: n = from->starts[from->n];
2227: from->contiq = PETSC_TRUE;
2228: for (i=1; i<n; i++) {
2229: if (from->indices[i] != from->indices[i-1] + bs) {
2230: from->contiq = PETSC_FALSE;
2231: break;
2232: }
2233: }
2235: {
2236: PetscInt *sstarts = to->starts, *rstarts = from->starts;
2237: PetscMPIInt *sprocs = to->procs, *rprocs = from->procs;
2238: MPI_Request *swaits = to->requests,*rwaits = from->requests;
2239: MPI_Request *rev_swaits,*rev_rwaits;
2240: PetscScalar *Ssvalues = to->values, *Srvalues = from->values;
2242: /* allocate additional wait variables for the "reverse" scatter */
2243: PetscMalloc1(to->n,&rev_rwaits);
2244: PetscMalloc1(from->n,&rev_swaits);
2245: to->rev_requests = rev_rwaits;
2246: from->rev_requests = rev_swaits;
2248: for (i=0; i<from->n; i++) {
2249: MPI_Send_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tagr,comm,rev_swaits+i);
2250: }
2252: for (i=0; i<to->n; i++) {
2253: MPI_Send_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
2254: }
2255: /* Register receives for scatter and reverse */
2256: for (i=0; i<from->n; i++) {
2257: MPI_Recv_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);
2258: }
2259: for (i=0; i<to->n; i++) {
2260: MPI_Recv_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tagr,comm,rev_rwaits+i);
2261: }
2262: ctx->ops->copy = VecScatterCopy_PtoP_X_MPI1;
2263: }
2264: PetscInfo1(ctx,"Using blocksize %D scatter\n",bs);
2266: #if defined(PETSC_USE_DEBUG)
2267: MPIU_Allreduce(&bs,&i,1,MPIU_INT,MPI_MIN,PetscObjectComm((PetscObject)ctx));
2268: MPIU_Allreduce(&bs,&n,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)ctx));
2269: if (bs!=i || bs!=n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Blocks size %D != %D or %D",bs,i,n);
2270: #endif
2272: switch (bs) {
2273: case 12:
2274: ctx->ops->begin = VecScatterBeginMPI1_12;
2275: ctx->ops->end = VecScatterEndMPI1_12;
2276: break;
2277: case 11:
2278: ctx->ops->begin = VecScatterBeginMPI1_11;
2279: ctx->ops->end = VecScatterEndMPI1_11;
2280: break;
2281: case 10:
2282: ctx->ops->begin = VecScatterBeginMPI1_10;
2283: ctx->ops->end = VecScatterEndMPI1_10;
2284: break;
2285: case 9:
2286: ctx->ops->begin = VecScatterBeginMPI1_9;
2287: ctx->ops->end = VecScatterEndMPI1_9;
2288: break;
2289: case 8:
2290: ctx->ops->begin = VecScatterBeginMPI1_8;
2291: ctx->ops->end = VecScatterEndMPI1_8;
2292: break;
2293: case 7:
2294: ctx->ops->begin = VecScatterBeginMPI1_7;
2295: ctx->ops->end = VecScatterEndMPI1_7;
2296: break;
2297: case 6:
2298: ctx->ops->begin = VecScatterBeginMPI1_6;
2299: ctx->ops->end = VecScatterEndMPI1_6;
2300: break;
2301: case 5:
2302: ctx->ops->begin = VecScatterBeginMPI1_5;
2303: ctx->ops->end = VecScatterEndMPI1_5;
2304: break;
2305: case 4:
2306: ctx->ops->begin = VecScatterBeginMPI1_4;
2307: ctx->ops->end = VecScatterEndMPI1_4;
2308: break;
2309: case 3:
2310: ctx->ops->begin = VecScatterBeginMPI1_3;
2311: ctx->ops->end = VecScatterEndMPI1_3;
2312: break;
2313: case 2:
2314: ctx->ops->begin = VecScatterBeginMPI1_2;
2315: ctx->ops->end = VecScatterEndMPI1_2;
2316: break;
2317: case 1:
2318: ctx->ops->begin = VecScatterBeginMPI1_1;
2319: ctx->ops->end = VecScatterEndMPI1_1;
2320: break;
2321: default:
2322: ctx->ops->begin = VecScatterBeginMPI1_bs;
2323: ctx->ops->end = VecScatterEndMPI1_bs;
2325: }
2326: ctx->ops->view = VecScatterView_MPI_MPI1;
2327: /* try to optimize PtoP vecscatter with memcpy's */
2328: VecScatterMemcpyPlanCreate_PtoP(to,from);
2329: return(0);
2330: }
2333: /* ------------------------------------------------------------------------------------*/
2334: /*
2335: Scatter from local Seq vectors to a parallel vector.
2336: Reverses the order of the arguments, calls VecScatterCreateLocal_PtoS() then
2337: reverses the result.
2338: */
2339: PetscErrorCode VecScatterCreateLocal_StoP_MPI1(PetscInt nx,const PetscInt *inidx,PetscInt ny,const PetscInt *inidy,Vec xin,Vec yin,PetscInt bs,VecScatter ctx)
2340: {
2341: PetscErrorCode ierr;
2342: MPI_Request *waits;
2343: VecScatter_MPI_General *to,*from;
2346: VecScatterCreateLocal_PtoS_MPI1(ny,inidy,nx,inidx,yin,xin,bs,ctx);
2347: to = (VecScatter_MPI_General*)ctx->fromdata;
2348: from = (VecScatter_MPI_General*)ctx->todata;
2349: ctx->todata = (void*)to;
2350: ctx->fromdata = (void*)from;
2351: /* these two are special, they are ALWAYS stored in to struct */
2352: to->sstatus = from->sstatus;
2353: to->rstatus = from->rstatus;
2355: from->sstatus = 0;
2356: from->rstatus = 0;
2358: waits = from->rev_requests;
2359: from->rev_requests = from->requests;
2360: from->requests = waits;
2361: waits = to->rev_requests;
2362: to->rev_requests = to->requests;
2363: to->requests = waits;
2364: return(0);
2365: }
2367: /* ---------------------------------------------------------------------------------*/
2368: PetscErrorCode VecScatterCreateLocal_PtoP_MPI1(PetscInt nx,const PetscInt *inidx,PetscInt ny,const PetscInt *inidy,Vec xin,Vec yin,PetscInt bs,VecScatter ctx)
2369: {
2371: PetscMPIInt size,rank,tag,imdex,n;
2372: PetscInt *owners = xin->map->range;
2373: PetscMPIInt *nprocs = NULL;
2374: PetscInt i,j,idx,nsends,*local_inidx = NULL,*local_inidy = NULL;
2375: PetscMPIInt *owner = NULL;
2376: PetscInt *starts = NULL,count,slen;
2377: PetscInt *rvalues = NULL,*svalues = NULL,base,*values = NULL,*rsvalues,recvtotal,lastidx;
2378: PetscMPIInt *onodes1,*olengths1,nrecvs;
2379: MPI_Comm comm;
2380: MPI_Request *send_waits = NULL,*recv_waits = NULL;
2381: MPI_Status recv_status,*send_status = NULL;
2382: PetscBool duplicate = PETSC_FALSE;
2383: #if defined(PETSC_USE_DEBUG)
2384: PetscBool found = PETSC_FALSE;
2385: #endif
2388: PetscObjectGetNewTag((PetscObject)ctx,&tag);
2389: PetscObjectGetComm((PetscObject)xin,&comm);
2390: MPI_Comm_size(comm,&size);
2391: MPI_Comm_rank(comm,&rank);
2392: if (size == 1) {
2393: VecScatterCreateLocal_StoP_MPI1(nx,inidx,ny,inidy,xin,yin,bs,ctx);
2394: return(0);
2395: }
2397: /*
2398: Each processor ships off its inidx[j] and inidy[j] to the appropriate processor
2399: They then call the StoPScatterCreate()
2400: */
2401: /* first count number of contributors to each processor */
2402: PetscMalloc3(size,&nprocs,nx,&owner,(size+1),&starts);
2403: PetscMemzero(nprocs,size*sizeof(PetscMPIInt));
2405: lastidx = -1;
2406: j = 0;
2407: for (i=0; i<nx; i++) {
2408: /* if indices are NOT locally sorted, need to start search at the beginning */
2409: if (lastidx > (idx = bs*inidx[i])) j = 0;
2410: lastidx = idx;
2411: for (; j<size; j++) {
2412: if (idx >= owners[j] && idx < owners[j+1]) {
2413: nprocs[j]++;
2414: owner[i] = j;
2415: #if defined(PETSC_USE_DEBUG)
2416: found = PETSC_TRUE;
2417: #endif
2418: break;
2419: }
2420: }
2421: #if defined(PETSC_USE_DEBUG)
2422: if (!found) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index %D out of range",idx);
2423: found = PETSC_FALSE;
2424: #endif
2425: }
2426: nsends = 0;
2427: for (i=0; i<size; i++) nsends += (nprocs[i] > 0);
2429: /* inform other processors of number of messages and max length*/
2430: PetscGatherNumberOfMessages(comm,NULL,nprocs,&nrecvs);
2431: PetscGatherMessageLengths(comm,nsends,nrecvs,nprocs,&onodes1,&olengths1);
2432: PetscSortMPIIntWithArray(nrecvs,onodes1,olengths1);
2433: recvtotal = 0; for (i=0; i<nrecvs; i++) recvtotal += olengths1[i];
2435: /* post receives: */
2436: PetscMalloc5(2*recvtotal,&rvalues,2*nx,&svalues,nrecvs,&recv_waits,nsends,&send_waits,nsends,&send_status);
2438: count = 0;
2439: for (i=0; i<nrecvs; i++) {
2440: MPI_Irecv((rvalues+2*count),2*olengths1[i],MPIU_INT,onodes1[i],tag,comm,recv_waits+i);
2441: count += olengths1[i];
2442: }
2443: PetscFree(onodes1);
2445: /* do sends:
2446: 1) starts[i] gives the starting index in svalues for stuff going to
2447: the ith processor
2448: */
2449: starts[0]= 0;
2450: for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[i-1];
2451: for (i=0; i<nx; i++) {
2452: svalues[2*starts[owner[i]]] = bs*inidx[i];
2453: svalues[1 + 2*starts[owner[i]]++] = bs*inidy[i];
2454: }
2456: starts[0] = 0;
2457: for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[i-1];
2458: count = 0;
2459: for (i=0; i<size; i++) {
2460: if (nprocs[i]) {
2461: MPI_Isend(svalues+2*starts[i],2*nprocs[i],MPIU_INT,i,tag,comm,send_waits+count);
2462: count++;
2463: }
2464: }
2465: PetscFree3(nprocs,owner,starts);
2467: /* wait on receives */
2468: count = nrecvs;
2469: slen = 0;
2470: while (count) {
2471: MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
2472: /* unpack receives into our local space */
2473: MPI_Get_count(&recv_status,MPIU_INT,&n);
2474: slen += n/2;
2475: count--;
2476: }
2477: if (slen != recvtotal) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Total message lengths %D not as expected %D",slen,recvtotal);
2479: PetscMalloc2(slen,&local_inidx,slen,&local_inidy);
2480: base = owners[rank];
2481: count = 0;
2482: rsvalues = rvalues;
2483: for (i=0; i<nrecvs; i++) {
2484: values = rsvalues;
2485: rsvalues += 2*olengths1[i];
2486: for (j=0; j<olengths1[i]; j++) {
2487: local_inidx[count] = values[2*j] - base;
2488: local_inidy[count++] = values[2*j+1];
2489: }
2490: }
2491: PetscFree(olengths1);
2493: /* wait on sends */
2494: if (nsends) {MPI_Waitall(nsends,send_waits,send_status);}
2495: PetscFree5(rvalues,svalues,recv_waits,send_waits,send_status);
2497: /*
2498: should sort and remove duplicates from local_inidx,local_inidy
2499: */
2501: #if defined(do_it_slow)
2502: /* sort on the from index */
2503: PetscSortIntWithArray(slen,local_inidx,local_inidy);
2504: start = 0;
2505: while (start < slen) {
2506: count = start+1;
2507: last = local_inidx[start];
2508: while (count < slen && last == local_inidx[count]) count++;
2509: if (count > start + 1) { /* found 2 or more same local_inidx[] in a row */
2510: /* sort on to index */
2511: PetscSortInt(count-start,local_inidy+start);
2512: }
2513: /* remove duplicates; not most efficient way, but probably good enough */
2514: i = start;
2515: while (i < count-1) {
2516: if (local_inidy[i] != local_inidy[i+1]) i++;
2517: else { /* found a duplicate */
2518: duplicate = PETSC_TRUE;
2519: for (j=i; j<slen-1; j++) {
2520: local_inidx[j] = local_inidx[j+1];
2521: local_inidy[j] = local_inidy[j+1];
2522: }
2523: slen--;
2524: count--;
2525: }
2526: }
2527: start = count;
2528: }
2529: #endif
2530: if (duplicate) {
2531: PetscInfo(ctx,"Duplicate from to indices passed in VecScatterCreate(), they are ignored\n");
2532: }
2533: VecScatterCreateLocal_StoP_MPI1(slen,local_inidx,slen,local_inidy,xin,yin,bs,ctx);
2534: PetscFree2(local_inidx,local_inidy);
2535: return(0);
2536: }
2538: PetscErrorCode VecScatterSetUp_MPI1(VecScatter ctx)
2539: {
2543: VecScatterSetUp_vectype_private(ctx,VecScatterCreateLocal_PtoS_MPI1,VecScatterCreateLocal_StoP_MPI1,VecScatterCreateLocal_PtoP_MPI1);
2544: return(0);
2545: }
2547: PetscErrorCode VecScatterCreate_MPI1(VecScatter ctx)
2548: {
2549: PetscErrorCode ierr;
2552: ctx->ops->setup = VecScatterSetUp_MPI1;
2553: PetscObjectChangeTypeName((PetscObject)ctx,VECSCATTERMPI1);
2554: PetscInfo(ctx,"Using MPI1 for vector scatter\n");
2555: return(0);
2556: }