Actual source code: vpscat_mpi1.c
petsc-3.10.5 2019-03-28
2: /*
3: Defines parallel vector scatters using MPI1.
4: */
6: #include <../src/vec/vec/impls/dvecimpl.h>
7: #include <../src/vec/vec/impls/mpi/pvecimpl.h>
8: #include <petsc/private/vecscatterimpl.h>
10: PetscErrorCode VecScatterView_MPI_MPI1(VecScatter ctx,PetscViewer viewer)
11: {
12: VecScatter_MPI_General *to =(VecScatter_MPI_General*)ctx->todata;
13: VecScatter_MPI_General *from=(VecScatter_MPI_General*)ctx->fromdata;
14: PetscErrorCode ierr;
15: PetscInt i;
16: PetscMPIInt rank;
17: PetscViewerFormat format;
18: PetscBool iascii;
21: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
22: if (iascii) {
23: MPI_Comm_rank(PetscObjectComm((PetscObject)ctx),&rank);
24: PetscViewerGetFormat(viewer,&format);
25: if (format == PETSC_VIEWER_ASCII_INFO) {
26: PetscInt nsend_max,nrecv_max,lensend_max,lenrecv_max,alldata,itmp;
28: MPI_Reduce(&to->n,&nsend_max,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)ctx));
29: MPI_Reduce(&from->n,&nrecv_max,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)ctx));
30: itmp = to->starts[to->n+1];
31: MPI_Reduce(&itmp,&lensend_max,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)ctx));
32: itmp = from->starts[from->n+1];
33: MPI_Reduce(&itmp,&lenrecv_max,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)ctx));
34: MPI_Reduce(&itmp,&alldata,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)ctx));
36: PetscViewerASCIIPrintf(viewer,"VecScatter statistics\n");
37: PetscViewerASCIIPrintf(viewer," Maximum number sends %D\n",nsend_max);
38: PetscViewerASCIIPrintf(viewer," Maximum number receives %D\n",nrecv_max);
39: PetscViewerASCIIPrintf(viewer," Maximum data sent %D\n",(int)(lensend_max*to->bs*sizeof(PetscScalar)));
40: PetscViewerASCIIPrintf(viewer," Maximum data received %D\n",(int)(lenrecv_max*to->bs*sizeof(PetscScalar)));
41: PetscViewerASCIIPrintf(viewer," Total data sent %D\n",(int)(alldata*to->bs*sizeof(PetscScalar)));
43: } else {
44: PetscViewerASCIIPushSynchronized(viewer);
45: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number sends = %D; Number to self = %D\n",rank,to->n,to->local.n);
46: if (to->n) {
47: for (i=0; i<to->n; i++) {
48: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D length = %D to whom %d\n",rank,i,to->starts[i+1]-to->starts[i],to->procs[i]);
49: if (to->memcpy_plan.optimized[i]) { PetscViewerASCIISynchronizedPrintf(viewer," is optimized with %D memcpy's in Pack\n",to->memcpy_plan.copy_offsets[i+1]-to->memcpy_plan.copy_offsets[i]); }
50: }
51: PetscViewerASCIISynchronizedPrintf(viewer,"Now the indices for all remote sends (in order by process sent to)\n");
52: for (i=0; i<to->starts[to->n]; i++) {
53: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D \n",rank,to->indices[i]);
54: }
55: }
57: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number receives = %D; Number from self = %D\n",rank,from->n,from->local.n);
58: if (from->n) {
59: for (i=0; i<from->n; i++) {
60: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D length %D from whom %d\n",rank,i,from->starts[i+1]-from->starts[i],from->procs[i]);
61: if (from->memcpy_plan.optimized[i]) { PetscViewerASCIISynchronizedPrintf(viewer," is optimized with %D memcpy's in Unpack\n",to->memcpy_plan.copy_offsets[i+1]-to->memcpy_plan.copy_offsets[i]); }
62: }
64: PetscViewerASCIISynchronizedPrintf(viewer,"Now the indices for all remote receives (in order by process received from)\n");
65: for (i=0; i<from->starts[from->n]; i++) {
66: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D \n",rank,from->indices[i]);
67: }
68: }
69: if (to->local.n) {
70: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Indices for local part of scatter\n",rank);
71: if (to->local.memcpy_plan.optimized[0]) {
72: PetscViewerASCIIPrintf(viewer,"Local part of the scatter is made of %D copies\n",to->local.memcpy_plan.copy_offsets[1]);
73: }
74: for (i=0; i<to->local.n; i++) { /* the to and from have the opposite meaning from what you would expect */
75: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] From %D to %D \n",rank,to->local.vslots[i],from->local.vslots[i]);
76: }
77: }
79: PetscViewerFlush(viewer);
80: PetscViewerASCIIPopSynchronized(viewer);
81: }
82: }
83: return(0);
84: }
86: /* -------------------------------------------------------------------------------------*/
87: PetscErrorCode VecScatterDestroy_PtoP_MPI1(VecScatter ctx)
88: {
89: VecScatter_MPI_General *to = (VecScatter_MPI_General*)ctx->todata;
90: VecScatter_MPI_General *from = (VecScatter_MPI_General*)ctx->fromdata;
91: PetscErrorCode ierr;
92: PetscInt i;
95: /* release MPI resources obtained with MPI_Send_init() and MPI_Recv_init() */
96: if (to->requests) {
97: for (i=0; i<to->n; i++) {
98: MPI_Request_free(to->requests + i);
99: }
100: }
101: if (to->rev_requests) {
102: for (i=0; i<to->n; i++) {
103: MPI_Request_free(to->rev_requests + i);
104: }
105: }
106: if (from->requests) {
107: for (i=0; i<from->n; i++) {
108: MPI_Request_free(from->requests + i);
109: }
110: }
112: if (from->rev_requests) {
113: for (i=0; i<from->n; i++) {
114: MPI_Request_free(from->rev_requests + i);
115: }
116: }
118: PetscFree(to->local.vslots);
119: PetscFree(from->local.vslots);
120: PetscFree(to->local.slots_nonmatching);
121: PetscFree(from->local.slots_nonmatching);
122: PetscFree(to->rev_requests);
123: PetscFree(from->rev_requests);
124: PetscFree(to->requests);
125: PetscFree(from->requests);
126: PetscFree4(to->values,to->indices,to->starts,to->procs);
127: PetscFree2(to->sstatus,to->rstatus);
128: PetscFree4(from->values,from->indices,from->starts,from->procs);
129: VecScatterMemcpyPlanDestroy_PtoP(to,from);
130: PetscFree(from);
131: PetscFree(to);
132: return(0);
133: }
135: /* --------------------------------------------------------------------------------------*/
137: PetscErrorCode VecScatterCopy_PtoP_X_MPI1(VecScatter in,VecScatter out)
138: {
139: VecScatter_MPI_General *in_to = (VecScatter_MPI_General*)in->todata;
140: VecScatter_MPI_General *in_from = (VecScatter_MPI_General*)in->fromdata,*out_to,*out_from;
141: PetscErrorCode ierr;
142: PetscInt ny,bs = in_from->bs;
145: out->ops->begin = in->ops->begin;
146: out->ops->end = in->ops->end;
147: out->ops->copy = in->ops->copy;
148: out->ops->destroy = in->ops->destroy;
149: out->ops->view = in->ops->view;
151: /* allocate entire send scatter context */
152: PetscNewLog(out,&out_to);
153: PetscNewLog(out,&out_from);
155: ny = in_to->starts[in_to->n];
156: out_to->n = in_to->n;
157: out_to->format = in_to->format;
159: PetscMalloc1(out_to->n,&out_to->requests);
160: PetscMalloc4(bs*ny,&out_to->values,ny,&out_to->indices,out_to->n+1,&out_to->starts,out_to->n,&out_to->procs);
161: PetscMalloc2(PetscMax(in_to->n,in_from->n),&out_to->sstatus,PetscMax(in_to->n,in_from->n),&out_to->rstatus);
162: PetscMemcpy(out_to->indices,in_to->indices,ny*sizeof(PetscInt));
163: PetscMemcpy(out_to->starts,in_to->starts,(out_to->n+1)*sizeof(PetscInt));
164: PetscMemcpy(out_to->procs,in_to->procs,(out_to->n)*sizeof(PetscMPIInt));
166: out->todata = (void*)out_to;
167: out_to->local.n = in_to->local.n;
168: out_to->local.nonmatching_computed = PETSC_FALSE;
169: out_to->local.n_nonmatching = 0;
170: out_to->local.slots_nonmatching = 0;
171: if (in_to->local.n) {
172: PetscMalloc1(in_to->local.n,&out_to->local.vslots);
173: PetscMalloc1(in_from->local.n,&out_from->local.vslots);
174: PetscMemcpy(out_to->local.vslots,in_to->local.vslots,in_to->local.n*sizeof(PetscInt));
175: PetscMemcpy(out_from->local.vslots,in_from->local.vslots,in_from->local.n*sizeof(PetscInt));
176: } else {
177: out_to->local.vslots = 0;
178: out_from->local.vslots = 0;
179: }
181: /* allocate entire receive context */
182: out_from->format = in_from->format;
183: ny = in_from->starts[in_from->n];
184: out_from->n = in_from->n;
186: PetscMalloc1(out_from->n,&out_from->requests);
187: PetscMalloc4(ny*bs,&out_from->values,ny,&out_from->indices,out_from->n+1,&out_from->starts,out_from->n,&out_from->procs);
188: PetscMemcpy(out_from->indices,in_from->indices,ny*sizeof(PetscInt));
189: PetscMemcpy(out_from->starts,in_from->starts,(out_from->n+1)*sizeof(PetscInt));
190: PetscMemcpy(out_from->procs,in_from->procs,(out_from->n)*sizeof(PetscMPIInt));
192: out->fromdata = (void*)out_from;
193: out_from->local.n = in_from->local.n;
194: out_from->local.nonmatching_computed = PETSC_FALSE;
195: out_from->local.n_nonmatching = 0;
196: out_from->local.slots_nonmatching = 0;
198: /*
199: set up the request arrays for use with isend_init() and irecv_init()
200: */
201: {
202: PetscMPIInt tag;
203: MPI_Comm comm;
204: PetscInt *sstarts = out_to->starts, *rstarts = out_from->starts;
205: PetscMPIInt *sprocs = out_to->procs, *rprocs = out_from->procs;
206: PetscInt i;
207: MPI_Request *swaits = out_to->requests,*rwaits = out_from->requests;
208: MPI_Request *rev_swaits,*rev_rwaits;
209: PetscScalar *Ssvalues = out_to->values, *Srvalues = out_from->values;
211: PetscMalloc1(in_to->n,&out_to->rev_requests);
212: PetscMalloc1(in_from->n,&out_from->rev_requests);
214: rev_rwaits = out_to->rev_requests;
215: rev_swaits = out_from->rev_requests;
217: out_from->bs = out_to->bs = bs;
218: tag = ((PetscObject)out)->tag;
219: PetscObjectGetComm((PetscObject)out,&comm);
221: /* Register the receives that you will use later (sends for scatter reverse) */
222: for (i=0; i<out_from->n; i++) {
223: MPI_Recv_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);
224: MPI_Send_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tag,comm,rev_swaits+i);
225: }
226: for (i=0; i<out_to->n; i++) {
227: MPI_Send_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
228: /* Register receives for scatter reverse */
229: MPI_Recv_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,rev_rwaits+i);
230: }
231: }
233: VecScatterMemcpyPlanCopy_PtoP(in_to,in_from,out_to,out_from);
234: return(0);
235: }
237: PetscErrorCode VecScatterCopy_PtoP_AllToAll_MPI1(VecScatter in,VecScatter out)
238: {
239: VecScatter_MPI_General *in_to = (VecScatter_MPI_General*)in->todata;
240: VecScatter_MPI_General *in_from = (VecScatter_MPI_General*)in->fromdata,*out_to,*out_from;
241: PetscErrorCode ierr;
242: PetscInt ny,bs = in_from->bs;
243: PetscMPIInt size;
246: MPI_Comm_size(PetscObjectComm((PetscObject)in),&size);
248: out->ops->begin = in->ops->begin;
249: out->ops->end = in->ops->end;
250: out->ops->copy = in->ops->copy;
251: out->ops->destroy = in->ops->destroy;
252: out->ops->view = in->ops->view;
254: /* allocate entire send scatter context */
255: PetscNewLog(out,&out_to);
256: PetscNewLog(out,&out_from);
258: ny = in_to->starts[in_to->n];
259: out_to->n = in_to->n;
260: out_to->format = in_to->format;
262: PetscMalloc1(out_to->n,&out_to->requests);
263: PetscMalloc4(bs*ny,&out_to->values,ny,&out_to->indices,out_to->n+1,&out_to->starts,out_to->n,&out_to->procs);
264: PetscMalloc2(PetscMax(in_to->n,in_from->n),&out_to->sstatus,PetscMax(in_to->n,in_from->n),&out_to->rstatus);
265: PetscMemcpy(out_to->indices,in_to->indices,ny*sizeof(PetscInt));
266: PetscMemcpy(out_to->starts,in_to->starts,(out_to->n+1)*sizeof(PetscInt));
267: PetscMemcpy(out_to->procs,in_to->procs,(out_to->n)*sizeof(PetscMPIInt));
269: out->todata = (void*)out_to;
270: out_to->local.n = in_to->local.n;
271: out_to->local.nonmatching_computed = PETSC_FALSE;
272: out_to->local.n_nonmatching = 0;
273: out_to->local.slots_nonmatching = 0;
274: if (in_to->local.n) {
275: PetscMalloc1(in_to->local.n,&out_to->local.vslots);
276: PetscMalloc1(in_from->local.n,&out_from->local.vslots);
277: PetscMemcpy(out_to->local.vslots,in_to->local.vslots,in_to->local.n*sizeof(PetscInt));
278: PetscMemcpy(out_from->local.vslots,in_from->local.vslots,in_from->local.n*sizeof(PetscInt));
279: } else {
280: out_to->local.vslots = 0;
281: out_from->local.vslots = 0;
282: }
284: /* allocate entire receive context */
285: out_from->format = in_from->format;
286: ny = in_from->starts[in_from->n];
287: out_from->n = in_from->n;
289: PetscMalloc1(out_from->n,&out_from->requests);
290: PetscMalloc4(ny*bs,&out_from->values,ny,&out_from->indices,out_from->n+1,&out_from->starts,out_from->n,&out_from->procs);
291: PetscMemcpy(out_from->indices,in_from->indices,ny*sizeof(PetscInt));
292: PetscMemcpy(out_from->starts,in_from->starts,(out_from->n+1)*sizeof(PetscInt));
293: PetscMemcpy(out_from->procs,in_from->procs,(out_from->n)*sizeof(PetscMPIInt));
295: out->fromdata = (void*)out_from;
296: out_from->local.n = in_from->local.n;
297: out_from->local.nonmatching_computed = PETSC_FALSE;
298: out_from->local.n_nonmatching = 0;
299: out_from->local.slots_nonmatching = 0;
301: VecScatterMemcpyPlanCopy_PtoP(in_to,in_from,out_to,out_from);
302: return(0);
303: }
305: /* Optimize a parallel vector to parallel vector vecscatter with memory copies */
306: PetscErrorCode VecScatterMemcpyPlanCreate_PtoP(VecScatter_MPI_General *to,VecScatter_MPI_General *from)
307: {
311: VecScatterMemcpyPlanCreate_Index(to->n,to->starts,to->indices,to->bs,&to->memcpy_plan);
312: VecScatterMemcpyPlanCreate_Index(from->n,from->starts,from->indices,to->bs,&from->memcpy_plan);
313: VecScatterMemcpyPlanCreate_SGToSG(to->bs,&to->local,&from->local);
314: return(0);
315: }
317: PetscErrorCode VecScatterMemcpyPlanCopy_PtoP(const VecScatter_MPI_General *in_to,const VecScatter_MPI_General *in_from,VecScatter_MPI_General *out_to,VecScatter_MPI_General *out_from)
318: {
322: VecScatterMemcpyPlanCopy(&in_to->memcpy_plan,&out_to->memcpy_plan);
323: VecScatterMemcpyPlanCopy(&in_from->memcpy_plan,&out_from->memcpy_plan);
324: VecScatterMemcpyPlanCopy(&in_to->local.memcpy_plan,&out_to->local.memcpy_plan);
325: VecScatterMemcpyPlanCopy(&in_from->local.memcpy_plan,&out_from->local.memcpy_plan);
326: return(0);
327: }
329: PetscErrorCode VecScatterMemcpyPlanDestroy_PtoP(VecScatter_MPI_General *to,VecScatter_MPI_General *from)
330: {
334: VecScatterMemcpyPlanDestroy(&to->memcpy_plan);
335: VecScatterMemcpyPlanDestroy(&from->memcpy_plan);
336: VecScatterMemcpyPlanDestroy(&to->local.memcpy_plan);
337: VecScatterMemcpyPlanDestroy(&from->local.memcpy_plan);
338: return(0);
339: }
341: /* --------------------------------------------------------------------------------------------------
342: Packs and unpacks the message data into send or from receive buffers.
344: These could be generated automatically.
346: Fortran kernels etc. could be used.
347: */
348: PETSC_STATIC_INLINE void Pack_MPI1_1(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
349: {
350: PetscInt i;
351: for (i=0; i<n; i++) y[i] = x[indicesx[i]];
352: }
354: PETSC_STATIC_INLINE PetscErrorCode UnPack_MPI1_1(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
355: {
356: PetscInt i;
359: switch (addv) {
360: case INSERT_VALUES:
361: case INSERT_ALL_VALUES:
362: for (i=0; i<n; i++) y[indicesy[i]] = x[i];
363: break;
364: case ADD_VALUES:
365: case ADD_ALL_VALUES:
366: for (i=0; i<n; i++) y[indicesy[i]] += x[i];
367: break;
368: #if !defined(PETSC_USE_COMPLEX)
369: case MAX_VALUES:
370: for (i=0; i<n; i++) y[indicesy[i]] = PetscMax(y[indicesy[i]],x[i]);
371: #else
372: case MAX_VALUES:
373: #endif
374: case NOT_SET_VALUES:
375: break;
376: default:
377: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
378: }
379: return(0);
380: }
382: PETSC_STATIC_INLINE PetscErrorCode Scatter_MPI1_1(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
383: {
384: PetscInt i;
387: switch (addv) {
388: case INSERT_VALUES:
389: case INSERT_ALL_VALUES:
390: for (i=0; i<n; i++) y[indicesy[i]] = x[indicesx[i]];
391: break;
392: case ADD_VALUES:
393: case ADD_ALL_VALUES:
394: for (i=0; i<n; i++) y[indicesy[i]] += x[indicesx[i]];
395: break;
396: #if !defined(PETSC_USE_COMPLEX)
397: case MAX_VALUES:
398: for (i=0; i<n; i++) y[indicesy[i]] = PetscMax(y[indicesy[i]],x[indicesx[i]]);
399: #else
400: case MAX_VALUES:
401: #endif
402: case NOT_SET_VALUES:
403: break;
404: default:
405: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
406: }
407: return(0);
408: }
410: /* ----------------------------------------------------------------------------------------------- */
411: PETSC_STATIC_INLINE void Pack_MPI1_2(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
412: {
413: PetscInt i,idx;
415: for (i=0; i<n; i++) {
416: idx = *indicesx++;
417: y[0] = x[idx];
418: y[1] = x[idx+1];
419: y += 2;
420: }
421: }
423: PETSC_STATIC_INLINE PetscErrorCode UnPack_MPI1_2(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
424: {
425: PetscInt i,idy;
428: switch (addv) {
429: case INSERT_VALUES:
430: case INSERT_ALL_VALUES:
431: for (i=0; i<n; i++) {
432: idy = *indicesy++;
433: y[idy] = x[0];
434: y[idy+1] = x[1];
435: x += 2;
436: }
437: break;
438: case ADD_VALUES:
439: case ADD_ALL_VALUES:
440: for (i=0; i<n; i++) {
441: idy = *indicesy++;
442: y[idy] += x[0];
443: y[idy+1] += x[1];
444: x += 2;
445: }
446: break;
447: #if !defined(PETSC_USE_COMPLEX)
448: case MAX_VALUES:
449: for (i=0; i<n; i++) {
450: idy = *indicesy++;
451: y[idy] = PetscMax(y[idy],x[0]);
452: y[idy+1] = PetscMax(y[idy+1],x[1]);
453: x += 2;
454: }
455: #else
456: case MAX_VALUES:
457: #endif
458: case NOT_SET_VALUES:
459: break;
460: default:
461: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
462: }
463: return(0);
464: }
466: PETSC_STATIC_INLINE PetscErrorCode Scatter_MPI1_2(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
467: {
468: PetscInt i,idx,idy;
471: switch (addv) {
472: case INSERT_VALUES:
473: case INSERT_ALL_VALUES:
474: for (i=0; i<n; i++) {
475: idx = *indicesx++;
476: idy = *indicesy++;
477: y[idy] = x[idx];
478: y[idy+1] = x[idx+1];
479: }
480: break;
481: case ADD_VALUES:
482: case ADD_ALL_VALUES:
483: for (i=0; i<n; i++) {
484: idx = *indicesx++;
485: idy = *indicesy++;
486: y[idy] += x[idx];
487: y[idy+1] += x[idx+1];
488: }
489: break;
490: #if !defined(PETSC_USE_COMPLEX)
491: case MAX_VALUES:
492: for (i=0; i<n; i++) {
493: idx = *indicesx++;
494: idy = *indicesy++;
495: y[idy] = PetscMax(y[idy],x[idx]);
496: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
497: }
498: #else
499: case MAX_VALUES:
500: #endif
501: case NOT_SET_VALUES:
502: break;
503: default:
504: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
505: }
506: return(0);
507: }
508: /* ----------------------------------------------------------------------------------------------- */
509: PETSC_STATIC_INLINE void Pack_MPI1_3(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
510: {
511: PetscInt i,idx;
513: for (i=0; i<n; i++) {
514: idx = *indicesx++;
515: y[0] = x[idx];
516: y[1] = x[idx+1];
517: y[2] = x[idx+2];
518: y += 3;
519: }
520: }
521: PETSC_STATIC_INLINE PetscErrorCode UnPack_MPI1_3(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
522: {
523: PetscInt i,idy;
526: switch (addv) {
527: case INSERT_VALUES:
528: case INSERT_ALL_VALUES:
529: for (i=0; i<n; i++) {
530: idy = *indicesy++;
531: y[idy] = x[0];
532: y[idy+1] = x[1];
533: y[idy+2] = x[2];
534: x += 3;
535: }
536: break;
537: case ADD_VALUES:
538: case ADD_ALL_VALUES:
539: for (i=0; i<n; i++) {
540: idy = *indicesy++;
541: y[idy] += x[0];
542: y[idy+1] += x[1];
543: y[idy+2] += x[2];
544: x += 3;
545: }
546: break;
547: #if !defined(PETSC_USE_COMPLEX)
548: case MAX_VALUES:
549: for (i=0; i<n; i++) {
550: idy = *indicesy++;
551: y[idy] = PetscMax(y[idy],x[0]);
552: y[idy+1] = PetscMax(y[idy+1],x[1]);
553: y[idy+2] = PetscMax(y[idy+2],x[2]);
554: x += 3;
555: }
556: #else
557: case MAX_VALUES:
558: #endif
559: case NOT_SET_VALUES:
560: break;
561: default:
562: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
563: }
564: return(0);
565: }
567: PETSC_STATIC_INLINE PetscErrorCode Scatter_MPI1_3(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
568: {
569: PetscInt i,idx,idy;
572: switch (addv) {
573: case INSERT_VALUES:
574: case INSERT_ALL_VALUES:
575: for (i=0; i<n; i++) {
576: idx = *indicesx++;
577: idy = *indicesy++;
578: y[idy] = x[idx];
579: y[idy+1] = x[idx+1];
580: y[idy+2] = x[idx+2];
581: }
582: break;
583: case ADD_VALUES:
584: case ADD_ALL_VALUES:
585: for (i=0; i<n; i++) {
586: idx = *indicesx++;
587: idy = *indicesy++;
588: y[idy] += x[idx];
589: y[idy+1] += x[idx+1];
590: y[idy+2] += x[idx+2];
591: }
592: break;
593: #if !defined(PETSC_USE_COMPLEX)
594: case MAX_VALUES:
595: for (i=0; i<n; i++) {
596: idx = *indicesx++;
597: idy = *indicesy++;
598: y[idy] = PetscMax(y[idy],x[idx]);
599: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
600: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
601: }
602: #else
603: case MAX_VALUES:
604: #endif
605: case NOT_SET_VALUES:
606: break;
607: default:
608: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
609: }
610: return(0);
611: }
612: /* ----------------------------------------------------------------------------------------------- */
613: PETSC_STATIC_INLINE void Pack_MPI1_4(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
614: {
615: PetscInt i,idx;
617: for (i=0; i<n; i++) {
618: idx = *indicesx++;
619: y[0] = x[idx];
620: y[1] = x[idx+1];
621: y[2] = x[idx+2];
622: y[3] = x[idx+3];
623: y += 4;
624: }
625: }
626: PETSC_STATIC_INLINE PetscErrorCode UnPack_MPI1_4(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
627: {
628: PetscInt i,idy;
631: switch (addv) {
632: case INSERT_VALUES:
633: case INSERT_ALL_VALUES:
634: for (i=0; i<n; i++) {
635: idy = *indicesy++;
636: y[idy] = x[0];
637: y[idy+1] = x[1];
638: y[idy+2] = x[2];
639: y[idy+3] = x[3];
640: x += 4;
641: }
642: break;
643: case ADD_VALUES:
644: case ADD_ALL_VALUES:
645: for (i=0; i<n; i++) {
646: idy = *indicesy++;
647: y[idy] += x[0];
648: y[idy+1] += x[1];
649: y[idy+2] += x[2];
650: y[idy+3] += x[3];
651: x += 4;
652: }
653: break;
654: #if !defined(PETSC_USE_COMPLEX)
655: case MAX_VALUES:
656: for (i=0; i<n; i++) {
657: idy = *indicesy++;
658: y[idy] = PetscMax(y[idy],x[0]);
659: y[idy+1] = PetscMax(y[idy+1],x[1]);
660: y[idy+2] = PetscMax(y[idy+2],x[2]);
661: y[idy+3] = PetscMax(y[idy+3],x[3]);
662: x += 4;
663: }
664: #else
665: case MAX_VALUES:
666: #endif
667: case NOT_SET_VALUES:
668: break;
669: default:
670: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
671: }
672: return(0);
673: }
675: PETSC_STATIC_INLINE PetscErrorCode Scatter_MPI1_4(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
676: {
677: PetscInt i,idx,idy;
680: switch (addv) {
681: case INSERT_VALUES:
682: case INSERT_ALL_VALUES:
683: for (i=0; i<n; i++) {
684: idx = *indicesx++;
685: idy = *indicesy++;
686: y[idy] = x[idx];
687: y[idy+1] = x[idx+1];
688: y[idy+2] = x[idx+2];
689: y[idy+3] = x[idx+3];
690: }
691: break;
692: case ADD_VALUES:
693: case ADD_ALL_VALUES:
694: for (i=0; i<n; i++) {
695: idx = *indicesx++;
696: idy = *indicesy++;
697: y[idy] += x[idx];
698: y[idy+1] += x[idx+1];
699: y[idy+2] += x[idx+2];
700: y[idy+3] += x[idx+3];
701: }
702: break;
703: #if !defined(PETSC_USE_COMPLEX)
704: case MAX_VALUES:
705: for (i=0; i<n; i++) {
706: idx = *indicesx++;
707: idy = *indicesy++;
708: y[idy] = PetscMax(y[idy],x[idx]);
709: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
710: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
711: y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
712: }
713: #else
714: case MAX_VALUES:
715: #endif
716: case NOT_SET_VALUES:
717: break;
718: default:
719: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
720: }
721: return(0);
722: }
723: /* ----------------------------------------------------------------------------------------------- */
724: PETSC_STATIC_INLINE void Pack_MPI1_5(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
725: {
726: PetscInt i,idx;
728: for (i=0; i<n; i++) {
729: idx = *indicesx++;
730: y[0] = x[idx];
731: y[1] = x[idx+1];
732: y[2] = x[idx+2];
733: y[3] = x[idx+3];
734: y[4] = x[idx+4];
735: y += 5;
736: }
737: }
739: PETSC_STATIC_INLINE PetscErrorCode UnPack_MPI1_5(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
740: {
741: PetscInt i,idy;
744: switch (addv) {
745: case INSERT_VALUES:
746: case INSERT_ALL_VALUES:
747: for (i=0; i<n; i++) {
748: idy = *indicesy++;
749: y[idy] = x[0];
750: y[idy+1] = x[1];
751: y[idy+2] = x[2];
752: y[idy+3] = x[3];
753: y[idy+4] = x[4];
754: x += 5;
755: }
756: break;
757: case ADD_VALUES:
758: case ADD_ALL_VALUES:
759: for (i=0; i<n; i++) {
760: idy = *indicesy++;
761: y[idy] += x[0];
762: y[idy+1] += x[1];
763: y[idy+2] += x[2];
764: y[idy+3] += x[3];
765: y[idy+4] += x[4];
766: x += 5;
767: }
768: break;
769: #if !defined(PETSC_USE_COMPLEX)
770: case MAX_VALUES:
771: for (i=0; i<n; i++) {
772: idy = *indicesy++;
773: y[idy] = PetscMax(y[idy],x[0]);
774: y[idy+1] = PetscMax(y[idy+1],x[1]);
775: y[idy+2] = PetscMax(y[idy+2],x[2]);
776: y[idy+3] = PetscMax(y[idy+3],x[3]);
777: y[idy+4] = PetscMax(y[idy+4],x[4]);
778: x += 5;
779: }
780: #else
781: case MAX_VALUES:
782: #endif
783: case NOT_SET_VALUES:
784: break;
785: default:
786: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
787: }
788: return(0);
789: }
791: PETSC_STATIC_INLINE PetscErrorCode Scatter_MPI1_5(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
792: {
793: PetscInt i,idx,idy;
796: switch (addv) {
797: case INSERT_VALUES:
798: case INSERT_ALL_VALUES:
799: for (i=0; i<n; i++) {
800: idx = *indicesx++;
801: idy = *indicesy++;
802: y[idy] = x[idx];
803: y[idy+1] = x[idx+1];
804: y[idy+2] = x[idx+2];
805: y[idy+3] = x[idx+3];
806: y[idy+4] = x[idx+4];
807: }
808: break;
809: case ADD_VALUES:
810: case ADD_ALL_VALUES:
811: for (i=0; i<n; i++) {
812: idx = *indicesx++;
813: idy = *indicesy++;
814: y[idy] += x[idx];
815: y[idy+1] += x[idx+1];
816: y[idy+2] += x[idx+2];
817: y[idy+3] += x[idx+3];
818: y[idy+4] += x[idx+4];
819: }
820: break;
821: #if !defined(PETSC_USE_COMPLEX)
822: case MAX_VALUES:
823: for (i=0; i<n; i++) {
824: idx = *indicesx++;
825: idy = *indicesy++;
826: y[idy] = PetscMax(y[idy],x[idx]);
827: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
828: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
829: y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
830: y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
831: }
832: #else
833: case MAX_VALUES:
834: #endif
835: case NOT_SET_VALUES:
836: break;
837: default:
838: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
839: }
840: return(0);
841: }
842: /* ----------------------------------------------------------------------------------------------- */
843: PETSC_STATIC_INLINE void Pack_MPI1_6(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
844: {
845: PetscInt i,idx;
847: for (i=0; i<n; i++) {
848: idx = *indicesx++;
849: y[0] = x[idx];
850: y[1] = x[idx+1];
851: y[2] = x[idx+2];
852: y[3] = x[idx+3];
853: y[4] = x[idx+4];
854: y[5] = x[idx+5];
855: y += 6;
856: }
857: }
859: PETSC_STATIC_INLINE PetscErrorCode UnPack_MPI1_6(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
860: {
861: PetscInt i,idy;
864: switch (addv) {
865: case INSERT_VALUES:
866: case INSERT_ALL_VALUES:
867: for (i=0; i<n; i++) {
868: idy = *indicesy++;
869: y[idy] = x[0];
870: y[idy+1] = x[1];
871: y[idy+2] = x[2];
872: y[idy+3] = x[3];
873: y[idy+4] = x[4];
874: y[idy+5] = x[5];
875: x += 6;
876: }
877: break;
878: case ADD_VALUES:
879: case ADD_ALL_VALUES:
880: for (i=0; i<n; i++) {
881: idy = *indicesy++;
882: y[idy] += x[0];
883: y[idy+1] += x[1];
884: y[idy+2] += x[2];
885: y[idy+3] += x[3];
886: y[idy+4] += x[4];
887: y[idy+5] += x[5];
888: x += 6;
889: }
890: break;
891: #if !defined(PETSC_USE_COMPLEX)
892: case MAX_VALUES:
893: for (i=0; i<n; i++) {
894: idy = *indicesy++;
895: y[idy] = PetscMax(y[idy],x[0]);
896: y[idy+1] = PetscMax(y[idy+1],x[1]);
897: y[idy+2] = PetscMax(y[idy+2],x[2]);
898: y[idy+3] = PetscMax(y[idy+3],x[3]);
899: y[idy+4] = PetscMax(y[idy+4],x[4]);
900: y[idy+5] = PetscMax(y[idy+5],x[5]);
901: x += 6;
902: }
903: #else
904: case MAX_VALUES:
905: #endif
906: case NOT_SET_VALUES:
907: break;
908: default:
909: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
910: }
911: return(0);
912: }
914: PETSC_STATIC_INLINE PetscErrorCode Scatter_MPI1_6(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
915: {
916: PetscInt i,idx,idy;
919: switch (addv) {
920: case INSERT_VALUES:
921: case INSERT_ALL_VALUES:
922: for (i=0; i<n; i++) {
923: idx = *indicesx++;
924: idy = *indicesy++;
925: y[idy] = x[idx];
926: y[idy+1] = x[idx+1];
927: y[idy+2] = x[idx+2];
928: y[idy+3] = x[idx+3];
929: y[idy+4] = x[idx+4];
930: y[idy+5] = x[idx+5];
931: }
932: break;
933: case ADD_VALUES:
934: case ADD_ALL_VALUES:
935: for (i=0; i<n; i++) {
936: idx = *indicesx++;
937: idy = *indicesy++;
938: y[idy] += x[idx];
939: y[idy+1] += x[idx+1];
940: y[idy+2] += x[idx+2];
941: y[idy+3] += x[idx+3];
942: y[idy+4] += x[idx+4];
943: y[idy+5] += x[idx+5];
944: }
945: break;
946: #if !defined(PETSC_USE_COMPLEX)
947: case MAX_VALUES:
948: for (i=0; i<n; i++) {
949: idx = *indicesx++;
950: idy = *indicesy++;
951: y[idy] = PetscMax(y[idy],x[idx]);
952: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
953: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
954: y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
955: y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
956: y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
957: }
958: #else
959: case MAX_VALUES:
960: #endif
961: case NOT_SET_VALUES:
962: break;
963: default:
964: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
965: }
966: return(0);
967: }
968: /* ----------------------------------------------------------------------------------------------- */
969: PETSC_STATIC_INLINE void Pack_MPI1_7(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
970: {
971: PetscInt i,idx;
973: for (i=0; i<n; i++) {
974: idx = *indicesx++;
975: y[0] = x[idx];
976: y[1] = x[idx+1];
977: y[2] = x[idx+2];
978: y[3] = x[idx+3];
979: y[4] = x[idx+4];
980: y[5] = x[idx+5];
981: y[6] = x[idx+6];
982: y += 7;
983: }
984: }
986: PETSC_STATIC_INLINE PetscErrorCode UnPack_MPI1_7(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
987: {
988: PetscInt i,idy;
991: switch (addv) {
992: case INSERT_VALUES:
993: case INSERT_ALL_VALUES:
994: for (i=0; i<n; i++) {
995: idy = *indicesy++;
996: y[idy] = x[0];
997: y[idy+1] = x[1];
998: y[idy+2] = x[2];
999: y[idy+3] = x[3];
1000: y[idy+4] = x[4];
1001: y[idy+5] = x[5];
1002: y[idy+6] = x[6];
1003: x += 7;
1004: }
1005: break;
1006: case ADD_VALUES:
1007: case ADD_ALL_VALUES:
1008: for (i=0; i<n; i++) {
1009: idy = *indicesy++;
1010: y[idy] += x[0];
1011: y[idy+1] += x[1];
1012: y[idy+2] += x[2];
1013: y[idy+3] += x[3];
1014: y[idy+4] += x[4];
1015: y[idy+5] += x[5];
1016: y[idy+6] += x[6];
1017: x += 7;
1018: }
1019: break;
1020: #if !defined(PETSC_USE_COMPLEX)
1021: case MAX_VALUES:
1022: for (i=0; i<n; i++) {
1023: idy = *indicesy++;
1024: y[idy] = PetscMax(y[idy],x[0]);
1025: y[idy+1] = PetscMax(y[idy+1],x[1]);
1026: y[idy+2] = PetscMax(y[idy+2],x[2]);
1027: y[idy+3] = PetscMax(y[idy+3],x[3]);
1028: y[idy+4] = PetscMax(y[idy+4],x[4]);
1029: y[idy+5] = PetscMax(y[idy+5],x[5]);
1030: y[idy+6] = PetscMax(y[idy+6],x[6]);
1031: x += 7;
1032: }
1033: #else
1034: case MAX_VALUES:
1035: #endif
1036: case NOT_SET_VALUES:
1037: break;
1038: default:
1039: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1040: }
1041: return(0);
1042: }
1044: PETSC_STATIC_INLINE PetscErrorCode Scatter_MPI1_7(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1045: {
1046: PetscInt i,idx,idy;
1049: switch (addv) {
1050: case INSERT_VALUES:
1051: case INSERT_ALL_VALUES:
1052: for (i=0; i<n; i++) {
1053: idx = *indicesx++;
1054: idy = *indicesy++;
1055: y[idy] = x[idx];
1056: y[idy+1] = x[idx+1];
1057: y[idy+2] = x[idx+2];
1058: y[idy+3] = x[idx+3];
1059: y[idy+4] = x[idx+4];
1060: y[idy+5] = x[idx+5];
1061: y[idy+6] = x[idx+6];
1062: }
1063: break;
1064: case ADD_VALUES:
1065: case ADD_ALL_VALUES:
1066: for (i=0; i<n; i++) {
1067: idx = *indicesx++;
1068: idy = *indicesy++;
1069: y[idy] += x[idx];
1070: y[idy+1] += x[idx+1];
1071: y[idy+2] += x[idx+2];
1072: y[idy+3] += x[idx+3];
1073: y[idy+4] += x[idx+4];
1074: y[idy+5] += x[idx+5];
1075: y[idy+6] += x[idx+6];
1076: }
1077: break;
1078: #if !defined(PETSC_USE_COMPLEX)
1079: case MAX_VALUES:
1080: for (i=0; i<n; i++) {
1081: idx = *indicesx++;
1082: idy = *indicesy++;
1083: y[idy] = PetscMax(y[idy],x[idx]);
1084: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
1085: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
1086: y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
1087: y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1088: y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
1089: y[idy+6] = PetscMax(y[idy+6],x[idx+6]);
1090: }
1091: #else
1092: case MAX_VALUES:
1093: #endif
1094: case NOT_SET_VALUES:
1095: break;
1096: default:
1097: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1098: }
1099: return(0);
1100: }
1101: /* ----------------------------------------------------------------------------------------------- */
1102: PETSC_STATIC_INLINE void Pack_MPI1_8(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1103: {
1104: PetscInt i,idx;
1106: for (i=0; i<n; i++) {
1107: idx = *indicesx++;
1108: y[0] = x[idx];
1109: y[1] = x[idx+1];
1110: y[2] = x[idx+2];
1111: y[3] = x[idx+3];
1112: y[4] = x[idx+4];
1113: y[5] = x[idx+5];
1114: y[6] = x[idx+6];
1115: y[7] = x[idx+7];
1116: y += 8;
1117: }
1118: }
1120: PETSC_STATIC_INLINE PetscErrorCode UnPack_MPI1_8(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1121: {
1122: PetscInt i,idy;
1125: switch (addv) {
1126: case INSERT_VALUES:
1127: case INSERT_ALL_VALUES:
1128: for (i=0; i<n; i++) {
1129: idy = *indicesy++;
1130: y[idy] = x[0];
1131: y[idy+1] = x[1];
1132: y[idy+2] = x[2];
1133: y[idy+3] = x[3];
1134: y[idy+4] = x[4];
1135: y[idy+5] = x[5];
1136: y[idy+6] = x[6];
1137: y[idy+7] = x[7];
1138: x += 8;
1139: }
1140: break;
1141: case ADD_VALUES:
1142: case ADD_ALL_VALUES:
1143: for (i=0; i<n; i++) {
1144: idy = *indicesy++;
1145: y[idy] += x[0];
1146: y[idy+1] += x[1];
1147: y[idy+2] += x[2];
1148: y[idy+3] += x[3];
1149: y[idy+4] += x[4];
1150: y[idy+5] += x[5];
1151: y[idy+6] += x[6];
1152: y[idy+7] += x[7];
1153: x += 8;
1154: }
1155: break;
1156: #if !defined(PETSC_USE_COMPLEX)
1157: case MAX_VALUES:
1158: for (i=0; i<n; i++) {
1159: idy = *indicesy++;
1160: y[idy] = PetscMax(y[idy],x[0]);
1161: y[idy+1] = PetscMax(y[idy+1],x[1]);
1162: y[idy+2] = PetscMax(y[idy+2],x[2]);
1163: y[idy+3] = PetscMax(y[idy+3],x[3]);
1164: y[idy+4] = PetscMax(y[idy+4],x[4]);
1165: y[idy+5] = PetscMax(y[idy+5],x[5]);
1166: y[idy+6] = PetscMax(y[idy+6],x[6]);
1167: y[idy+7] = PetscMax(y[idy+7],x[7]);
1168: x += 8;
1169: }
1170: #else
1171: case MAX_VALUES:
1172: #endif
1173: case NOT_SET_VALUES:
1174: break;
1175: default:
1176: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1177: }
1178: return(0);
1179: }
1181: PETSC_STATIC_INLINE PetscErrorCode Scatter_MPI1_8(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1182: {
1183: PetscInt i,idx,idy;
1186: switch (addv) {
1187: case INSERT_VALUES:
1188: case INSERT_ALL_VALUES:
1189: for (i=0; i<n; i++) {
1190: idx = *indicesx++;
1191: idy = *indicesy++;
1192: y[idy] = x[idx];
1193: y[idy+1] = x[idx+1];
1194: y[idy+2] = x[idx+2];
1195: y[idy+3] = x[idx+3];
1196: y[idy+4] = x[idx+4];
1197: y[idy+5] = x[idx+5];
1198: y[idy+6] = x[idx+6];
1199: y[idy+7] = x[idx+7];
1200: }
1201: break;
1202: case ADD_VALUES:
1203: case ADD_ALL_VALUES:
1204: for (i=0; i<n; i++) {
1205: idx = *indicesx++;
1206: idy = *indicesy++;
1207: y[idy] += x[idx];
1208: y[idy+1] += x[idx+1];
1209: y[idy+2] += x[idx+2];
1210: y[idy+3] += x[idx+3];
1211: y[idy+4] += x[idx+4];
1212: y[idy+5] += x[idx+5];
1213: y[idy+6] += x[idx+6];
1214: y[idy+7] += x[idx+7];
1215: }
1216: break;
1217: #if !defined(PETSC_USE_COMPLEX)
1218: case MAX_VALUES:
1219: for (i=0; i<n; i++) {
1220: idx = *indicesx++;
1221: idy = *indicesy++;
1222: y[idy] = PetscMax(y[idy],x[idx]);
1223: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
1224: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
1225: y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
1226: y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1227: y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
1228: y[idy+6] = PetscMax(y[idy+6],x[idx+6]);
1229: y[idy+7] = PetscMax(y[idy+7],x[idx+7]);
1230: }
1231: #else
1232: case MAX_VALUES:
1233: #endif
1234: case NOT_SET_VALUES:
1235: break;
1236: default:
1237: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1238: }
1239: return(0);
1240: }
1242: PETSC_STATIC_INLINE void Pack_MPI1_9(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1243: {
1244: PetscInt i,idx;
1246: for (i=0; i<n; i++) {
1247: idx = *indicesx++;
1248: y[0] = x[idx];
1249: y[1] = x[idx+1];
1250: y[2] = x[idx+2];
1251: y[3] = x[idx+3];
1252: y[4] = x[idx+4];
1253: y[5] = x[idx+5];
1254: y[6] = x[idx+6];
1255: y[7] = x[idx+7];
1256: y[8] = x[idx+8];
1257: y += 9;
1258: }
1259: }
1261: PETSC_STATIC_INLINE PetscErrorCode UnPack_MPI1_9(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1262: {
1263: PetscInt i,idy;
1266: switch (addv) {
1267: case INSERT_VALUES:
1268: case INSERT_ALL_VALUES:
1269: for (i=0; i<n; i++) {
1270: idy = *indicesy++;
1271: y[idy] = x[0];
1272: y[idy+1] = x[1];
1273: y[idy+2] = x[2];
1274: y[idy+3] = x[3];
1275: y[idy+4] = x[4];
1276: y[idy+5] = x[5];
1277: y[idy+6] = x[6];
1278: y[idy+7] = x[7];
1279: y[idy+8] = x[8];
1280: x += 9;
1281: }
1282: break;
1283: case ADD_VALUES:
1284: case ADD_ALL_VALUES:
1285: for (i=0; i<n; i++) {
1286: idy = *indicesy++;
1287: y[idy] += x[0];
1288: y[idy+1] += x[1];
1289: y[idy+2] += x[2];
1290: y[idy+3] += x[3];
1291: y[idy+4] += x[4];
1292: y[idy+5] += x[5];
1293: y[idy+6] += x[6];
1294: y[idy+7] += x[7];
1295: y[idy+8] += x[8];
1296: x += 9;
1297: }
1298: break;
1299: #if !defined(PETSC_USE_COMPLEX)
1300: case MAX_VALUES:
1301: for (i=0; i<n; i++) {
1302: idy = *indicesy++;
1303: y[idy] = PetscMax(y[idy],x[0]);
1304: y[idy+1] = PetscMax(y[idy+1],x[1]);
1305: y[idy+2] = PetscMax(y[idy+2],x[2]);
1306: y[idy+3] = PetscMax(y[idy+3],x[3]);
1307: y[idy+4] = PetscMax(y[idy+4],x[4]);
1308: y[idy+5] = PetscMax(y[idy+5],x[5]);
1309: y[idy+6] = PetscMax(y[idy+6],x[6]);
1310: y[idy+7] = PetscMax(y[idy+7],x[7]);
1311: y[idy+8] = PetscMax(y[idy+8],x[8]);
1312: x += 9;
1313: }
1314: #else
1315: case MAX_VALUES:
1316: #endif
1317: case NOT_SET_VALUES:
1318: break;
1319: default:
1320: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1321: }
1322: return(0);
1323: }
1325: PETSC_STATIC_INLINE PetscErrorCode Scatter_MPI1_9(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1326: {
1327: PetscInt i,idx,idy;
1330: switch (addv) {
1331: case INSERT_VALUES:
1332: case INSERT_ALL_VALUES:
1333: for (i=0; i<n; i++) {
1334: idx = *indicesx++;
1335: idy = *indicesy++;
1336: y[idy] = x[idx];
1337: y[idy+1] = x[idx+1];
1338: y[idy+2] = x[idx+2];
1339: y[idy+3] = x[idx+3];
1340: y[idy+4] = x[idx+4];
1341: y[idy+5] = x[idx+5];
1342: y[idy+6] = x[idx+6];
1343: y[idy+7] = x[idx+7];
1344: y[idy+8] = x[idx+8];
1345: }
1346: break;
1347: case ADD_VALUES:
1348: case ADD_ALL_VALUES:
1349: for (i=0; i<n; i++) {
1350: idx = *indicesx++;
1351: idy = *indicesy++;
1352: y[idy] += x[idx];
1353: y[idy+1] += x[idx+1];
1354: y[idy+2] += x[idx+2];
1355: y[idy+3] += x[idx+3];
1356: y[idy+4] += x[idx+4];
1357: y[idy+5] += x[idx+5];
1358: y[idy+6] += x[idx+6];
1359: y[idy+7] += x[idx+7];
1360: y[idy+8] += x[idx+8];
1361: }
1362: break;
1363: #if !defined(PETSC_USE_COMPLEX)
1364: case MAX_VALUES:
1365: for (i=0; i<n; i++) {
1366: idx = *indicesx++;
1367: idy = *indicesy++;
1368: y[idy] = PetscMax(y[idy],x[idx]);
1369: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
1370: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
1371: y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
1372: y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1373: y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
1374: y[idy+6] = PetscMax(y[idy+6],x[idx+6]);
1375: y[idy+7] = PetscMax(y[idy+7],x[idx+7]);
1376: y[idy+8] = PetscMax(y[idy+8],x[idx+8]);
1377: }
1378: #else
1379: case MAX_VALUES:
1380: #endif
1381: case NOT_SET_VALUES:
1382: break;
1383: default:
1384: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1385: }
1386: return(0);
1387: }
1389: PETSC_STATIC_INLINE void Pack_MPI1_10(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1390: {
1391: PetscInt i,idx;
1393: for (i=0; i<n; i++) {
1394: idx = *indicesx++;
1395: y[0] = x[idx];
1396: y[1] = x[idx+1];
1397: y[2] = x[idx+2];
1398: y[3] = x[idx+3];
1399: y[4] = x[idx+4];
1400: y[5] = x[idx+5];
1401: y[6] = x[idx+6];
1402: y[7] = x[idx+7];
1403: y[8] = x[idx+8];
1404: y[9] = x[idx+9];
1405: y += 10;
1406: }
1407: }
1409: PETSC_STATIC_INLINE PetscErrorCode UnPack_MPI1_10(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1410: {
1411: PetscInt i,idy;
1414: switch (addv) {
1415: case INSERT_VALUES:
1416: case INSERT_ALL_VALUES:
1417: for (i=0; i<n; i++) {
1418: idy = *indicesy++;
1419: y[idy] = x[0];
1420: y[idy+1] = x[1];
1421: y[idy+2] = x[2];
1422: y[idy+3] = x[3];
1423: y[idy+4] = x[4];
1424: y[idy+5] = x[5];
1425: y[idy+6] = x[6];
1426: y[idy+7] = x[7];
1427: y[idy+8] = x[8];
1428: y[idy+9] = x[9];
1429: x += 10;
1430: }
1431: break;
1432: case ADD_VALUES:
1433: case ADD_ALL_VALUES:
1434: for (i=0; i<n; i++) {
1435: idy = *indicesy++;
1436: y[idy] += x[0];
1437: y[idy+1] += x[1];
1438: y[idy+2] += x[2];
1439: y[idy+3] += x[3];
1440: y[idy+4] += x[4];
1441: y[idy+5] += x[5];
1442: y[idy+6] += x[6];
1443: y[idy+7] += x[7];
1444: y[idy+8] += x[8];
1445: y[idy+9] += x[9];
1446: x += 10;
1447: }
1448: break;
1449: #if !defined(PETSC_USE_COMPLEX)
1450: case MAX_VALUES:
1451: for (i=0; i<n; i++) {
1452: idy = *indicesy++;
1453: y[idy] = PetscMax(y[idy],x[0]);
1454: y[idy+1] = PetscMax(y[idy+1],x[1]);
1455: y[idy+2] = PetscMax(y[idy+2],x[2]);
1456: y[idy+3] = PetscMax(y[idy+3],x[3]);
1457: y[idy+4] = PetscMax(y[idy+4],x[4]);
1458: y[idy+5] = PetscMax(y[idy+5],x[5]);
1459: y[idy+6] = PetscMax(y[idy+6],x[6]);
1460: y[idy+7] = PetscMax(y[idy+7],x[7]);
1461: y[idy+8] = PetscMax(y[idy+8],x[8]);
1462: y[idy+9] = PetscMax(y[idy+9],x[9]);
1463: x += 10;
1464: }
1465: #else
1466: case MAX_VALUES:
1467: #endif
1468: case NOT_SET_VALUES:
1469: break;
1470: default:
1471: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1472: }
1473: return(0);
1474: }
1476: PETSC_STATIC_INLINE PetscErrorCode Scatter_MPI1_10(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1477: {
1478: PetscInt i,idx,idy;
1481: switch (addv) {
1482: case INSERT_VALUES:
1483: case INSERT_ALL_VALUES:
1484: for (i=0; i<n; i++) {
1485: idx = *indicesx++;
1486: idy = *indicesy++;
1487: y[idy] = x[idx];
1488: y[idy+1] = x[idx+1];
1489: y[idy+2] = x[idx+2];
1490: y[idy+3] = x[idx+3];
1491: y[idy+4] = x[idx+4];
1492: y[idy+5] = x[idx+5];
1493: y[idy+6] = x[idx+6];
1494: y[idy+7] = x[idx+7];
1495: y[idy+8] = x[idx+8];
1496: y[idy+9] = x[idx+9];
1497: }
1498: break;
1499: case ADD_VALUES:
1500: case ADD_ALL_VALUES:
1501: for (i=0; i<n; i++) {
1502: idx = *indicesx++;
1503: idy = *indicesy++;
1504: y[idy] += x[idx];
1505: y[idy+1] += x[idx+1];
1506: y[idy+2] += x[idx+2];
1507: y[idy+3] += x[idx+3];
1508: y[idy+4] += x[idx+4];
1509: y[idy+5] += x[idx+5];
1510: y[idy+6] += x[idx+6];
1511: y[idy+7] += x[idx+7];
1512: y[idy+8] += x[idx+8];
1513: y[idy+9] += x[idx+9];
1514: }
1515: break;
1516: #if !defined(PETSC_USE_COMPLEX)
1517: case MAX_VALUES:
1518: for (i=0; i<n; i++) {
1519: idx = *indicesx++;
1520: idy = *indicesy++;
1521: y[idy] = PetscMax(y[idy],x[idx]);
1522: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
1523: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
1524: y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
1525: y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1526: y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
1527: y[idy+6] = PetscMax(y[idy+6],x[idx+6]);
1528: y[idy+7] = PetscMax(y[idy+7],x[idx+7]);
1529: y[idy+8] = PetscMax(y[idy+8],x[idx+8]);
1530: y[idy+9] = PetscMax(y[idy+9],x[idx+9]);
1531: }
1532: #else
1533: case MAX_VALUES:
1534: #endif
1535: case NOT_SET_VALUES:
1536: break;
1537: default:
1538: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1539: }
1540: return(0);
1541: }
1543: PETSC_STATIC_INLINE void Pack_MPI1_11(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1544: {
1545: PetscInt i,idx;
1547: for (i=0; i<n; i++) {
1548: idx = *indicesx++;
1549: y[0] = x[idx];
1550: y[1] = x[idx+1];
1551: y[2] = x[idx+2];
1552: y[3] = x[idx+3];
1553: y[4] = x[idx+4];
1554: y[5] = x[idx+5];
1555: y[6] = x[idx+6];
1556: y[7] = x[idx+7];
1557: y[8] = x[idx+8];
1558: y[9] = x[idx+9];
1559: y[10] = x[idx+10];
1560: y += 11;
1561: }
1562: }
1564: PETSC_STATIC_INLINE PetscErrorCode UnPack_MPI1_11(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1565: {
1566: PetscInt i,idy;
1569: switch (addv) {
1570: case INSERT_VALUES:
1571: case INSERT_ALL_VALUES:
1572: for (i=0; i<n; i++) {
1573: idy = *indicesy++;
1574: y[idy] = x[0];
1575: y[idy+1] = x[1];
1576: y[idy+2] = x[2];
1577: y[idy+3] = x[3];
1578: y[idy+4] = x[4];
1579: y[idy+5] = x[5];
1580: y[idy+6] = x[6];
1581: y[idy+7] = x[7];
1582: y[idy+8] = x[8];
1583: y[idy+9] = x[9];
1584: y[idy+10] = x[10];
1585: x += 11;
1586: }
1587: break;
1588: case ADD_VALUES:
1589: case ADD_ALL_VALUES:
1590: for (i=0; i<n; i++) {
1591: idy = *indicesy++;
1592: y[idy] += x[0];
1593: y[idy+1] += x[1];
1594: y[idy+2] += x[2];
1595: y[idy+3] += x[3];
1596: y[idy+4] += x[4];
1597: y[idy+5] += x[5];
1598: y[idy+6] += x[6];
1599: y[idy+7] += x[7];
1600: y[idy+8] += x[8];
1601: y[idy+9] += x[9];
1602: y[idy+10] += x[10];
1603: x += 11;
1604: }
1605: break;
1606: #if !defined(PETSC_USE_COMPLEX)
1607: case MAX_VALUES:
1608: for (i=0; i<n; i++) {
1609: idy = *indicesy++;
1610: y[idy] = PetscMax(y[idy],x[0]);
1611: y[idy+1] = PetscMax(y[idy+1],x[1]);
1612: y[idy+2] = PetscMax(y[idy+2],x[2]);
1613: y[idy+3] = PetscMax(y[idy+3],x[3]);
1614: y[idy+4] = PetscMax(y[idy+4],x[4]);
1615: y[idy+5] = PetscMax(y[idy+5],x[5]);
1616: y[idy+6] = PetscMax(y[idy+6],x[6]);
1617: y[idy+7] = PetscMax(y[idy+7],x[7]);
1618: y[idy+8] = PetscMax(y[idy+8],x[8]);
1619: y[idy+9] = PetscMax(y[idy+9],x[9]);
1620: y[idy+10] = PetscMax(y[idy+10],x[10]);
1621: x += 11;
1622: }
1623: #else
1624: case MAX_VALUES:
1625: #endif
1626: case NOT_SET_VALUES:
1627: break;
1628: default:
1629: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1630: }
1631: return(0);
1632: }
1634: PETSC_STATIC_INLINE PetscErrorCode Scatter_MPI1_11(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1635: {
1636: PetscInt i,idx,idy;
1639: switch (addv) {
1640: case INSERT_VALUES:
1641: case INSERT_ALL_VALUES:
1642: for (i=0; i<n; i++) {
1643: idx = *indicesx++;
1644: idy = *indicesy++;
1645: y[idy] = x[idx];
1646: y[idy+1] = x[idx+1];
1647: y[idy+2] = x[idx+2];
1648: y[idy+3] = x[idx+3];
1649: y[idy+4] = x[idx+4];
1650: y[idy+5] = x[idx+5];
1651: y[idy+6] = x[idx+6];
1652: y[idy+7] = x[idx+7];
1653: y[idy+8] = x[idx+8];
1654: y[idy+9] = x[idx+9];
1655: y[idy+10] = x[idx+10];
1656: }
1657: break;
1658: case ADD_VALUES:
1659: case ADD_ALL_VALUES:
1660: for (i=0; i<n; i++) {
1661: idx = *indicesx++;
1662: idy = *indicesy++;
1663: y[idy] += x[idx];
1664: y[idy+1] += x[idx+1];
1665: y[idy+2] += x[idx+2];
1666: y[idy+3] += x[idx+3];
1667: y[idy+4] += x[idx+4];
1668: y[idy+5] += x[idx+5];
1669: y[idy+6] += x[idx+6];
1670: y[idy+7] += x[idx+7];
1671: y[idy+8] += x[idx+8];
1672: y[idy+9] += x[idx+9];
1673: y[idy+10] += x[idx+10];
1674: }
1675: break;
1676: #if !defined(PETSC_USE_COMPLEX)
1677: case MAX_VALUES:
1678: for (i=0; i<n; i++) {
1679: idx = *indicesx++;
1680: idy = *indicesy++;
1681: y[idy] = PetscMax(y[idy],x[idx]);
1682: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
1683: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
1684: y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
1685: y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1686: y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
1687: y[idy+6] = PetscMax(y[idy+6],x[idx+6]);
1688: y[idy+7] = PetscMax(y[idy+7],x[idx+7]);
1689: y[idy+8] = PetscMax(y[idy+8],x[idx+8]);
1690: y[idy+9] = PetscMax(y[idy+9],x[idx+9]);
1691: y[idy+10] = PetscMax(y[idy+10],x[idx+10]);
1692: }
1693: #else
1694: case MAX_VALUES:
1695: #endif
1696: case NOT_SET_VALUES:
1697: break;
1698: default:
1699: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1700: }
1701: return(0);
1702: }
1704: /* ----------------------------------------------------------------------------------------------- */
1705: PETSC_STATIC_INLINE void Pack_MPI1_12(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1706: {
1707: PetscInt i,idx;
1709: for (i=0; i<n; i++) {
1710: idx = *indicesx++;
1711: y[0] = x[idx];
1712: y[1] = x[idx+1];
1713: y[2] = x[idx+2];
1714: y[3] = x[idx+3];
1715: y[4] = x[idx+4];
1716: y[5] = x[idx+5];
1717: y[6] = x[idx+6];
1718: y[7] = x[idx+7];
1719: y[8] = x[idx+8];
1720: y[9] = x[idx+9];
1721: y[10] = x[idx+10];
1722: y[11] = x[idx+11];
1723: y += 12;
1724: }
1725: }
1727: PETSC_STATIC_INLINE PetscErrorCode UnPack_MPI1_12(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1728: {
1729: PetscInt i,idy;
1732: switch (addv) {
1733: case INSERT_VALUES:
1734: case INSERT_ALL_VALUES:
1735: for (i=0; i<n; i++) {
1736: idy = *indicesy++;
1737: y[idy] = x[0];
1738: y[idy+1] = x[1];
1739: y[idy+2] = x[2];
1740: y[idy+3] = x[3];
1741: y[idy+4] = x[4];
1742: y[idy+5] = x[5];
1743: y[idy+6] = x[6];
1744: y[idy+7] = x[7];
1745: y[idy+8] = x[8];
1746: y[idy+9] = x[9];
1747: y[idy+10] = x[10];
1748: y[idy+11] = x[11];
1749: x += 12;
1750: }
1751: break;
1752: case ADD_VALUES:
1753: case ADD_ALL_VALUES:
1754: for (i=0; i<n; i++) {
1755: idy = *indicesy++;
1756: y[idy] += x[0];
1757: y[idy+1] += x[1];
1758: y[idy+2] += x[2];
1759: y[idy+3] += x[3];
1760: y[idy+4] += x[4];
1761: y[idy+5] += x[5];
1762: y[idy+6] += x[6];
1763: y[idy+7] += x[7];
1764: y[idy+8] += x[8];
1765: y[idy+9] += x[9];
1766: y[idy+10] += x[10];
1767: y[idy+11] += x[11];
1768: x += 12;
1769: }
1770: break;
1771: #if !defined(PETSC_USE_COMPLEX)
1772: case MAX_VALUES:
1773: for (i=0; i<n; i++) {
1774: idy = *indicesy++;
1775: y[idy] = PetscMax(y[idy],x[0]);
1776: y[idy+1] = PetscMax(y[idy+1],x[1]);
1777: y[idy+2] = PetscMax(y[idy+2],x[2]);
1778: y[idy+3] = PetscMax(y[idy+3],x[3]);
1779: y[idy+4] = PetscMax(y[idy+4],x[4]);
1780: y[idy+5] = PetscMax(y[idy+5],x[5]);
1781: y[idy+6] = PetscMax(y[idy+6],x[6]);
1782: y[idy+7] = PetscMax(y[idy+7],x[7]);
1783: y[idy+8] = PetscMax(y[idy+8],x[8]);
1784: y[idy+9] = PetscMax(y[idy+9],x[9]);
1785: y[idy+10] = PetscMax(y[idy+10],x[10]);
1786: y[idy+11] = PetscMax(y[idy+11],x[11]);
1787: x += 12;
1788: }
1789: #else
1790: case MAX_VALUES:
1791: #endif
1792: case NOT_SET_VALUES:
1793: break;
1794: default:
1795: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1796: }
1797: return(0);
1798: }
1800: PETSC_STATIC_INLINE PetscErrorCode Scatter_MPI1_12(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1801: {
1802: PetscInt i,idx,idy;
1805: switch (addv) {
1806: case INSERT_VALUES:
1807: case INSERT_ALL_VALUES:
1808: for (i=0; i<n; i++) {
1809: idx = *indicesx++;
1810: idy = *indicesy++;
1811: y[idy] = x[idx];
1812: y[idy+1] = x[idx+1];
1813: y[idy+2] = x[idx+2];
1814: y[idy+3] = x[idx+3];
1815: y[idy+4] = x[idx+4];
1816: y[idy+5] = x[idx+5];
1817: y[idy+6] = x[idx+6];
1818: y[idy+7] = x[idx+7];
1819: y[idy+8] = x[idx+8];
1820: y[idy+9] = x[idx+9];
1821: y[idy+10] = x[idx+10];
1822: y[idy+11] = x[idx+11];
1823: }
1824: break;
1825: case ADD_VALUES:
1826: case ADD_ALL_VALUES:
1827: for (i=0; i<n; i++) {
1828: idx = *indicesx++;
1829: idy = *indicesy++;
1830: y[idy] += x[idx];
1831: y[idy+1] += x[idx+1];
1832: y[idy+2] += x[idx+2];
1833: y[idy+3] += x[idx+3];
1834: y[idy+4] += x[idx+4];
1835: y[idy+5] += x[idx+5];
1836: y[idy+6] += x[idx+6];
1837: y[idy+7] += x[idx+7];
1838: y[idy+8] += x[idx+8];
1839: y[idy+9] += x[idx+9];
1840: y[idy+10] += x[idx+10];
1841: y[idy+11] += x[idx+11];
1842: }
1843: break;
1844: #if !defined(PETSC_USE_COMPLEX)
1845: case MAX_VALUES:
1846: for (i=0; i<n; i++) {
1847: idx = *indicesx++;
1848: idy = *indicesy++;
1849: y[idy] = PetscMax(y[idy],x[idx]);
1850: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
1851: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
1852: y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
1853: y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1854: y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
1855: y[idy+6] = PetscMax(y[idy+6],x[idx+6]);
1856: y[idy+7] = PetscMax(y[idy+7],x[idx+7]);
1857: y[idy+8] = PetscMax(y[idy+8],x[idx+8]);
1858: y[idy+9] = PetscMax(y[idy+9],x[idx+9]);
1859: y[idy+10] = PetscMax(y[idy+10],x[idx+10]);
1860: y[idy+11] = PetscMax(y[idy+11],x[idx+11]);
1861: }
1862: #else
1863: case MAX_VALUES:
1864: #endif
1865: case NOT_SET_VALUES:
1866: break;
1867: default:
1868: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1869: }
1870: return(0);
1871: }
1873: /* ----------------------------------------------------------------------------------------------- */
1874: PETSC_STATIC_INLINE void Pack_MPI1_bs(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y,PetscInt bs)
1875: {
1876: PetscInt i,idx;
1878: for (i=0; i<n; i++) {
1879: idx = *indicesx++;
1880: PetscMemcpy(y,x + idx,bs*sizeof(PetscScalar));
1881: y += bs;
1882: }
1883: }
1885: PETSC_STATIC_INLINE PetscErrorCode UnPack_MPI1_bs(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1886: {
1887: PetscInt i,idy,j;
1890: switch (addv) {
1891: case INSERT_VALUES:
1892: case INSERT_ALL_VALUES:
1893: for (i=0; i<n; i++) {
1894: idy = *indicesy++;
1895: PetscMemcpy(y + idy,x,bs*sizeof(PetscScalar));
1896: x += bs;
1897: }
1898: break;
1899: case ADD_VALUES:
1900: case ADD_ALL_VALUES:
1901: for (i=0; i<n; i++) {
1902: idy = *indicesy++;
1903: for (j=0; j<bs; j++) y[idy+j] += x[j];
1904: x += bs;
1905: }
1906: break;
1907: #if !defined(PETSC_USE_COMPLEX)
1908: case MAX_VALUES:
1909: for (i=0; i<n; i++) {
1910: idy = *indicesy++;
1911: for (j=0; j<bs; j++) y[idy+j] = PetscMax(y[idy+j],x[j]);
1912: x += bs;
1913: }
1914: #else
1915: case MAX_VALUES:
1916: #endif
1917: case NOT_SET_VALUES:
1918: break;
1919: default:
1920: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1921: }
1922: return(0);
1923: }
1925: PETSC_STATIC_INLINE PetscErrorCode Scatter_MPI1_bs(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv,PetscInt bs)
1926: {
1927: PetscInt i,idx,idy,j;
1930: switch (addv) {
1931: case INSERT_VALUES:
1932: case INSERT_ALL_VALUES:
1933: for (i=0; i<n; i++) {
1934: idx = *indicesx++;
1935: idy = *indicesy++;
1936: PetscMemcpy(y + idy, x + idx,bs*sizeof(PetscScalar));
1937: }
1938: break;
1939: case ADD_VALUES:
1940: case ADD_ALL_VALUES:
1941: for (i=0; i<n; i++) {
1942: idx = *indicesx++;
1943: idy = *indicesy++;
1944: for (j=0; j<bs; j++ ) y[idy+j] += x[idx+j];
1945: }
1946: break;
1947: #if !defined(PETSC_USE_COMPLEX)
1948: case MAX_VALUES:
1949: for (i=0; i<n; i++) {
1950: idx = *indicesx++;
1951: idy = *indicesy++;
1952: for (j=0; j<bs; j++ ) y[idy+j] = PetscMax(y[idy+j],x[idx+j]);
1953: }
1954: #else
1955: case MAX_VALUES:
1956: #endif
1957: case NOT_SET_VALUES:
1958: break;
1959: default:
1960: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1961: }
1962: return(0);
1963: }
1965: /* Create the VecScatterBegin/End_P for our chosen block sizes */
1966: #define BS 1
1967: #include <../src/vec/vscat/impls/vpscat_mpi1.h>
1968: #define BS 2
1969: #include <../src/vec/vscat/impls/vpscat_mpi1.h>
1970: #define BS 3
1971: #include <../src/vec/vscat/impls/vpscat_mpi1.h>
1972: #define BS 4
1973: #include <../src/vec/vscat/impls/vpscat_mpi1.h>
1974: #define BS 5
1975: #include <../src/vec/vscat/impls/vpscat_mpi1.h>
1976: #define BS 6
1977: #include <../src/vec/vscat/impls/vpscat_mpi1.h>
1978: #define BS 7
1979: #include <../src/vec/vscat/impls/vpscat_mpi1.h>
1980: #define BS 8
1981: #include <../src/vec/vscat/impls/vpscat_mpi1.h>
1982: #define BS 9
1983: #include <../src/vec/vscat/impls/vpscat_mpi1.h>
1984: #define BS 10
1985: #include <../src/vec/vscat/impls/vpscat_mpi1.h>
1986: #define BS 11
1987: #include <../src/vec/vscat/impls/vpscat_mpi1.h>
1988: #define BS 12
1989: #include <../src/vec/vscat/impls/vpscat_mpi1.h>
1990: #define BS bs
1991: #include <../src/vec/vscat/impls/vpscat_mpi1.h>
1993: /* ==========================================================================================*/
1995: /* create parallel to sequential scatter context */
1997: PetscErrorCode VecScatterCreateCommon_PtoS_MPI1(VecScatter_MPI_General*,VecScatter_MPI_General*,VecScatter);
1999: /*
2000: bs indicates how many elements there are in each block. Normally this would be 1.
2002: contains check that PetscMPIInt can handle the sizes needed
2003: */
2004: PetscErrorCode VecScatterCreateLocal_PtoS_MPI1(PetscInt nx,const PetscInt *inidx,PetscInt ny,const PetscInt *inidy,Vec xin,Vec yin,PetscInt bs,VecScatter ctx)
2005: {
2006: VecScatter_MPI_General *from,*to;
2007: PetscMPIInt size,rank,imdex,tag,n;
2008: PetscInt *source = NULL,*owners = NULL,nxr;
2009: PetscInt *lowner = NULL,*start = NULL,lengthy,lengthx;
2010: PetscMPIInt *nprocs = NULL,nrecvs;
2011: PetscInt i,j,idx,nsends;
2012: PetscMPIInt *owner = NULL;
2013: PetscInt *starts = NULL,count,slen;
2014: PetscInt *rvalues,*svalues,base,*values,nprocslocal,recvtotal,*rsvalues;
2015: PetscMPIInt *onodes1,*olengths1;
2016: MPI_Comm comm;
2017: MPI_Request *send_waits = NULL,*recv_waits = NULL;
2018: MPI_Status recv_status,*send_status;
2019: PetscErrorCode ierr;
2022: PetscObjectGetNewTag((PetscObject)ctx,&tag);
2023: PetscObjectGetComm((PetscObject)xin,&comm);
2024: MPI_Comm_rank(comm,&rank);
2025: MPI_Comm_size(comm,&size);
2026: owners = xin->map->range;
2027: VecGetSize(yin,&lengthy);
2028: VecGetSize(xin,&lengthx);
2030: /* first count number of contributors to each processor */
2031: /* owner[i]: owner of ith inidx; nproc[j]: num of inidx to be sent to jth proc */
2032: PetscMalloc2(size,&nprocs,nx,&owner);
2033: PetscMemzero(nprocs,size*sizeof(PetscMPIInt));
2035: j = 0;
2036: nsends = 0;
2037: for (i=0; i<nx; i++) {
2038: idx = bs*inidx[i];
2039: if (idx < owners[j]) j = 0;
2040: for (; j<size; j++) {
2041: if (idx < owners[j+1]) {
2042: if (!nprocs[j]++) nsends++;
2043: owner[i] = j;
2044: break;
2045: }
2046: }
2047: if (j == size) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"ith %D block entry %D not owned by any process, upper bound %D",i,idx,owners[size]);
2048: }
2050: nprocslocal = nprocs[rank];
2051: nprocs[rank] = 0;
2052: if (nprocslocal) nsends--;
2053: /* inform other processors of number of messages and max length*/
2054: PetscGatherNumberOfMessages(comm,NULL,nprocs,&nrecvs);
2055: PetscGatherMessageLengths(comm,nsends,nrecvs,nprocs,&onodes1,&olengths1);
2056: PetscSortMPIIntWithArray(nrecvs,onodes1,olengths1);
2057: recvtotal = 0; for (i=0; i<nrecvs; i++) recvtotal += olengths1[i];
2059: /* post receives: */
2060: PetscMalloc3(recvtotal,&rvalues,nrecvs,&source,nrecvs,&recv_waits);
2061: count = 0;
2062: for (i=0; i<nrecvs; i++) {
2063: MPI_Irecv((rvalues+count),olengths1[i],MPIU_INT,onodes1[i],tag,comm,recv_waits+i);
2064: count += olengths1[i];
2065: }
2067: /* do sends:
2068: 1) starts[i] gives the starting index in svalues for stuff going to
2069: the ith processor
2070: */
2071: nxr = 0;
2072: for (i=0; i<nx; i++) {
2073: if (owner[i] != rank) nxr++;
2074: }
2075: PetscMalloc3(nxr,&svalues,nsends,&send_waits,size+1,&starts);
2077: starts[0] = 0;
2078: for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[i-1];
2079: for (i=0; i<nx; i++) {
2080: if (owner[i] != rank) svalues[starts[owner[i]]++] = bs*inidx[i];
2081: }
2082: starts[0] = 0;
2083: for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[i-1];
2084: count = 0;
2085: for (i=0; i<size; i++) {
2086: if (nprocs[i]) {
2087: MPI_Isend(svalues+starts[i],nprocs[i],MPIU_INT,i,tag,comm,send_waits+count++);
2088: }
2089: }
2091: /* wait on receives */
2092: count = nrecvs;
2093: slen = 0;
2094: while (count) {
2095: MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
2096: /* unpack receives into our local space */
2097: MPI_Get_count(&recv_status,MPIU_INT,&n);
2098: slen += n;
2099: count--;
2100: }
2102: if (slen != recvtotal) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Total message lengths %D not expected %D",slen,recvtotal);
2104: /* allocate entire send scatter context */
2105: PetscNewLog(ctx,&to);
2106: to->n = nrecvs;
2108: PetscMalloc1(nrecvs,&to->requests);
2109: PetscMalloc4(bs*slen,&to->values,slen,&to->indices,nrecvs+1,&to->starts,nrecvs,&to->procs);
2110: PetscMalloc2(PetscMax(to->n,nsends),&to->sstatus,PetscMax(to->n,nsends),&to->rstatus);
2112: ctx->todata = (void*)to;
2113: to->starts[0] = 0;
2115: if (nrecvs) {
2116: /* move the data into the send scatter */
2117: base = owners[rank];
2118: rsvalues = rvalues;
2119: for (i=0; i<nrecvs; i++) {
2120: to->starts[i+1] = to->starts[i] + olengths1[i];
2121: to->procs[i] = onodes1[i];
2122: values = rsvalues;
2123: rsvalues += olengths1[i];
2124: for (j=0; j<olengths1[i]; j++) to->indices[to->starts[i] + j] = values[j] - base;
2125: }
2126: }
2127: PetscFree(olengths1);
2128: PetscFree(onodes1);
2129: PetscFree3(rvalues,source,recv_waits);
2131: /* allocate entire receive scatter context */
2132: PetscNewLog(ctx,&from);
2133: from->n = nsends;
2135: PetscMalloc1(nsends,&from->requests);
2136: PetscMalloc4((ny-nprocslocal)*bs,&from->values,ny-nprocslocal,&from->indices,nsends+1,&from->starts,from->n,&from->procs);
2137: ctx->fromdata = (void*)from;
2139: /* move data into receive scatter */
2140: PetscMalloc2(size,&lowner,nsends+1,&start);
2141: count = 0; from->starts[0] = start[0] = 0;
2142: for (i=0; i<size; i++) {
2143: if (nprocs[i]) {
2144: lowner[i] = count;
2145: from->procs[count++] = i;
2146: from->starts[count] = start[count] = start[count-1] + nprocs[i];
2147: }
2148: }
2150: for (i=0; i<nx; i++) {
2151: if (owner[i] != rank) {
2152: from->indices[start[lowner[owner[i]]]++] = bs*inidy[i];
2153: if (bs*inidy[i] >= lengthy) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Scattering past end of TO vector");
2154: }
2155: }
2156: PetscFree2(lowner,start);
2157: PetscFree2(nprocs,owner);
2159: /* wait on sends */
2160: if (nsends) {
2161: PetscMalloc1(nsends,&send_status);
2162: MPI_Waitall(nsends,send_waits,send_status);
2163: PetscFree(send_status);
2164: }
2165: PetscFree3(svalues,send_waits,starts);
2167: if (nprocslocal) {
2168: PetscInt nt = from->local.n = to->local.n = nprocslocal;
2169: /* we have a scatter to ourselves */
2170: PetscMalloc1(nt,&to->local.vslots);
2171: PetscMalloc1(nt,&from->local.vslots);
2172: nt = 0;
2173: for (i=0; i<nx; i++) {
2174: idx = bs*inidx[i];
2175: if (idx >= owners[rank] && idx < owners[rank+1]) {
2176: to->local.vslots[nt] = idx - owners[rank];
2177: from->local.vslots[nt++] = bs*inidy[i];
2178: if (bs*inidy[i] >= lengthy) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Scattering past end of TO vector");
2179: }
2180: }
2181: PetscLogObjectMemory((PetscObject)ctx,2*nt*sizeof(PetscInt));
2182: } else {
2183: from->local.n = 0;
2184: from->local.vslots = 0;
2185: to->local.n = 0;
2186: to->local.vslots = 0;
2187: }
2189: from->local.nonmatching_computed = PETSC_FALSE;
2190: from->local.n_nonmatching = 0;
2191: from->local.slots_nonmatching = 0;
2192: to->local.nonmatching_computed = PETSC_FALSE;
2193: to->local.n_nonmatching = 0;
2194: to->local.slots_nonmatching = 0;
2196: from->format = VEC_SCATTER_MPI_GENERAL;
2197: to->format = VEC_SCATTER_MPI_GENERAL;
2198: from->bs = bs;
2199: to->bs = bs;
2201: VecScatterCreateCommon_PtoS_MPI1(from,to,ctx);
2202: return(0);
2203: }
2205: /*
2206: bs indicates how many elements there are in each block. Normally this would be 1.
2207: */
2208: PetscErrorCode VecScatterCreateCommon_PtoS_MPI1(VecScatter_MPI_General *from,VecScatter_MPI_General *to,VecScatter ctx)
2209: {
2210: MPI_Comm comm;
2211: PetscMPIInt tag = ((PetscObject)ctx)->tag, tagr;
2212: PetscInt bs = to->bs;
2213: PetscMPIInt size;
2214: PetscInt i, n;
2218: PetscObjectGetComm((PetscObject)ctx,&comm);
2219: PetscObjectGetNewTag((PetscObject)ctx,&tagr);
2220: ctx->ops->destroy = VecScatterDestroy_PtoP_MPI1;
2222: MPI_Comm_size(comm,&size);
2223: /* check if the receives are ALL going into contiguous locations; if so can skip indexing */
2224: to->contiq = PETSC_FALSE;
2225: n = from->starts[from->n];
2226: from->contiq = PETSC_TRUE;
2227: for (i=1; i<n; i++) {
2228: if (from->indices[i] != from->indices[i-1] + bs) {
2229: from->contiq = PETSC_FALSE;
2230: break;
2231: }
2232: }
2234: {
2235: PetscInt *sstarts = to->starts, *rstarts = from->starts;
2236: PetscMPIInt *sprocs = to->procs, *rprocs = from->procs;
2237: MPI_Request *swaits = to->requests,*rwaits = from->requests;
2238: MPI_Request *rev_swaits,*rev_rwaits;
2239: PetscScalar *Ssvalues = to->values, *Srvalues = from->values;
2241: /* allocate additional wait variables for the "reverse" scatter */
2242: PetscMalloc1(to->n,&rev_rwaits);
2243: PetscMalloc1(from->n,&rev_swaits);
2244: to->rev_requests = rev_rwaits;
2245: from->rev_requests = rev_swaits;
2247: for (i=0; i<from->n; i++) {
2248: MPI_Send_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tagr,comm,rev_swaits+i);
2249: }
2251: for (i=0; i<to->n; i++) {
2252: MPI_Send_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
2253: }
2254: /* Register receives for scatter and reverse */
2255: for (i=0; i<from->n; i++) {
2256: MPI_Recv_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);
2257: }
2258: for (i=0; i<to->n; i++) {
2259: MPI_Recv_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tagr,comm,rev_rwaits+i);
2260: }
2261: ctx->ops->copy = VecScatterCopy_PtoP_X_MPI1;
2262: }
2263: PetscInfo1(ctx,"Using blocksize %D scatter\n",bs);
2265: #if defined(PETSC_USE_DEBUG)
2266: MPIU_Allreduce(&bs,&i,1,MPIU_INT,MPI_MIN,PetscObjectComm((PetscObject)ctx));
2267: MPIU_Allreduce(&bs,&n,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)ctx));
2268: if (bs!=i || bs!=n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Blocks size %D != %D or %D",bs,i,n);
2269: #endif
2271: switch (bs) {
2272: case 12:
2273: ctx->ops->begin = VecScatterBeginMPI1_12;
2274: ctx->ops->end = VecScatterEndMPI1_12;
2275: break;
2276: case 11:
2277: ctx->ops->begin = VecScatterBeginMPI1_11;
2278: ctx->ops->end = VecScatterEndMPI1_11;
2279: break;
2280: case 10:
2281: ctx->ops->begin = VecScatterBeginMPI1_10;
2282: ctx->ops->end = VecScatterEndMPI1_10;
2283: break;
2284: case 9:
2285: ctx->ops->begin = VecScatterBeginMPI1_9;
2286: ctx->ops->end = VecScatterEndMPI1_9;
2287: break;
2288: case 8:
2289: ctx->ops->begin = VecScatterBeginMPI1_8;
2290: ctx->ops->end = VecScatterEndMPI1_8;
2291: break;
2292: case 7:
2293: ctx->ops->begin = VecScatterBeginMPI1_7;
2294: ctx->ops->end = VecScatterEndMPI1_7;
2295: break;
2296: case 6:
2297: ctx->ops->begin = VecScatterBeginMPI1_6;
2298: ctx->ops->end = VecScatterEndMPI1_6;
2299: break;
2300: case 5:
2301: ctx->ops->begin = VecScatterBeginMPI1_5;
2302: ctx->ops->end = VecScatterEndMPI1_5;
2303: break;
2304: case 4:
2305: ctx->ops->begin = VecScatterBeginMPI1_4;
2306: ctx->ops->end = VecScatterEndMPI1_4;
2307: break;
2308: case 3:
2309: ctx->ops->begin = VecScatterBeginMPI1_3;
2310: ctx->ops->end = VecScatterEndMPI1_3;
2311: break;
2312: case 2:
2313: ctx->ops->begin = VecScatterBeginMPI1_2;
2314: ctx->ops->end = VecScatterEndMPI1_2;
2315: break;
2316: case 1:
2317: ctx->ops->begin = VecScatterBeginMPI1_1;
2318: ctx->ops->end = VecScatterEndMPI1_1;
2319: break;
2320: default:
2321: ctx->ops->begin = VecScatterBeginMPI1_bs;
2322: ctx->ops->end = VecScatterEndMPI1_bs;
2324: }
2325: ctx->ops->view = VecScatterView_MPI_MPI1;
2326: /* try to optimize PtoP vecscatter with memcpy's */
2327: VecScatterMemcpyPlanCreate_PtoP(to,from);
2328: return(0);
2329: }
2332: /* ------------------------------------------------------------------------------------*/
2333: /*
2334: Scatter from local Seq vectors to a parallel vector.
2335: Reverses the order of the arguments, calls VecScatterCreateLocal_PtoS() then
2336: reverses the result.
2337: */
2338: PetscErrorCode VecScatterCreateLocal_StoP_MPI1(PetscInt nx,const PetscInt *inidx,PetscInt ny,const PetscInt *inidy,Vec xin,Vec yin,PetscInt bs,VecScatter ctx)
2339: {
2340: PetscErrorCode ierr;
2341: MPI_Request *waits;
2342: VecScatter_MPI_General *to,*from;
2345: VecScatterCreateLocal_PtoS_MPI1(ny,inidy,nx,inidx,yin,xin,bs,ctx);
2346: to = (VecScatter_MPI_General*)ctx->fromdata;
2347: from = (VecScatter_MPI_General*)ctx->todata;
2348: ctx->todata = (void*)to;
2349: ctx->fromdata = (void*)from;
2350: /* these two are special, they are ALWAYS stored in to struct */
2351: to->sstatus = from->sstatus;
2352: to->rstatus = from->rstatus;
2354: from->sstatus = 0;
2355: from->rstatus = 0;
2357: waits = from->rev_requests;
2358: from->rev_requests = from->requests;
2359: from->requests = waits;
2360: waits = to->rev_requests;
2361: to->rev_requests = to->requests;
2362: to->requests = waits;
2363: return(0);
2364: }
2366: /* ---------------------------------------------------------------------------------*/
2367: PetscErrorCode VecScatterCreateLocal_PtoP_MPI1(PetscInt nx,const PetscInt *inidx,PetscInt ny,const PetscInt *inidy,Vec xin,Vec yin,PetscInt bs,VecScatter ctx)
2368: {
2370: PetscMPIInt size,rank,tag,imdex,n;
2371: PetscInt *owners = xin->map->range;
2372: PetscMPIInt *nprocs = NULL;
2373: PetscInt i,j,idx,nsends,*local_inidx = NULL,*local_inidy = NULL;
2374: PetscMPIInt *owner = NULL;
2375: PetscInt *starts = NULL,count,slen;
2376: PetscInt *rvalues = NULL,*svalues = NULL,base,*values = NULL,*rsvalues,recvtotal,lastidx;
2377: PetscMPIInt *onodes1,*olengths1,nrecvs;
2378: MPI_Comm comm;
2379: MPI_Request *send_waits = NULL,*recv_waits = NULL;
2380: MPI_Status recv_status,*send_status = NULL;
2381: PetscBool duplicate = PETSC_FALSE;
2382: #if defined(PETSC_USE_DEBUG)
2383: PetscBool found = PETSC_FALSE;
2384: #endif
2387: PetscObjectGetNewTag((PetscObject)ctx,&tag);
2388: PetscObjectGetComm((PetscObject)xin,&comm);
2389: MPI_Comm_size(comm,&size);
2390: MPI_Comm_rank(comm,&rank);
2391: if (size == 1) {
2392: VecScatterCreateLocal_StoP_MPI1(nx,inidx,ny,inidy,xin,yin,bs,ctx);
2393: return(0);
2394: }
2396: /*
2397: Each processor ships off its inidx[j] and inidy[j] to the appropriate processor
2398: They then call the StoPScatterCreate()
2399: */
2400: /* first count number of contributors to each processor */
2401: PetscMalloc3(size,&nprocs,nx,&owner,(size+1),&starts);
2402: PetscMemzero(nprocs,size*sizeof(PetscMPIInt));
2404: lastidx = -1;
2405: j = 0;
2406: for (i=0; i<nx; i++) {
2407: /* if indices are NOT locally sorted, need to start search at the beginning */
2408: if (lastidx > (idx = bs*inidx[i])) j = 0;
2409: lastidx = idx;
2410: for (; j<size; j++) {
2411: if (idx >= owners[j] && idx < owners[j+1]) {
2412: nprocs[j]++;
2413: owner[i] = j;
2414: #if defined(PETSC_USE_DEBUG)
2415: found = PETSC_TRUE;
2416: #endif
2417: break;
2418: }
2419: }
2420: #if defined(PETSC_USE_DEBUG)
2421: if (!found) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index %D out of range",idx);
2422: found = PETSC_FALSE;
2423: #endif
2424: }
2425: nsends = 0;
2426: for (i=0; i<size; i++) nsends += (nprocs[i] > 0);
2428: /* inform other processors of number of messages and max length*/
2429: PetscGatherNumberOfMessages(comm,NULL,nprocs,&nrecvs);
2430: PetscGatherMessageLengths(comm,nsends,nrecvs,nprocs,&onodes1,&olengths1);
2431: PetscSortMPIIntWithArray(nrecvs,onodes1,olengths1);
2432: recvtotal = 0; for (i=0; i<nrecvs; i++) recvtotal += olengths1[i];
2434: /* post receives: */
2435: PetscMalloc5(2*recvtotal,&rvalues,2*nx,&svalues,nrecvs,&recv_waits,nsends,&send_waits,nsends,&send_status);
2437: count = 0;
2438: for (i=0; i<nrecvs; i++) {
2439: MPI_Irecv((rvalues+2*count),2*olengths1[i],MPIU_INT,onodes1[i],tag,comm,recv_waits+i);
2440: count += olengths1[i];
2441: }
2442: PetscFree(onodes1);
2444: /* do sends:
2445: 1) starts[i] gives the starting index in svalues for stuff going to
2446: the ith processor
2447: */
2448: starts[0]= 0;
2449: for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[i-1];
2450: for (i=0; i<nx; i++) {
2451: svalues[2*starts[owner[i]]] = bs*inidx[i];
2452: svalues[1 + 2*starts[owner[i]]++] = bs*inidy[i];
2453: }
2455: starts[0] = 0;
2456: for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[i-1];
2457: count = 0;
2458: for (i=0; i<size; i++) {
2459: if (nprocs[i]) {
2460: MPI_Isend(svalues+2*starts[i],2*nprocs[i],MPIU_INT,i,tag,comm,send_waits+count);
2461: count++;
2462: }
2463: }
2464: PetscFree3(nprocs,owner,starts);
2466: /* wait on receives */
2467: count = nrecvs;
2468: slen = 0;
2469: while (count) {
2470: MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
2471: /* unpack receives into our local space */
2472: MPI_Get_count(&recv_status,MPIU_INT,&n);
2473: slen += n/2;
2474: count--;
2475: }
2476: if (slen != recvtotal) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Total message lengths %D not as expected %D",slen,recvtotal);
2478: PetscMalloc2(slen,&local_inidx,slen,&local_inidy);
2479: base = owners[rank];
2480: count = 0;
2481: rsvalues = rvalues;
2482: for (i=0; i<nrecvs; i++) {
2483: values = rsvalues;
2484: rsvalues += 2*olengths1[i];
2485: for (j=0; j<olengths1[i]; j++) {
2486: local_inidx[count] = values[2*j] - base;
2487: local_inidy[count++] = values[2*j+1];
2488: }
2489: }
2490: PetscFree(olengths1);
2492: /* wait on sends */
2493: if (nsends) {MPI_Waitall(nsends,send_waits,send_status);}
2494: PetscFree5(rvalues,svalues,recv_waits,send_waits,send_status);
2496: /*
2497: should sort and remove duplicates from local_inidx,local_inidy
2498: */
2500: #if defined(do_it_slow)
2501: /* sort on the from index */
2502: PetscSortIntWithArray(slen,local_inidx,local_inidy);
2503: start = 0;
2504: while (start < slen) {
2505: count = start+1;
2506: last = local_inidx[start];
2507: while (count < slen && last == local_inidx[count]) count++;
2508: if (count > start + 1) { /* found 2 or more same local_inidx[] in a row */
2509: /* sort on to index */
2510: PetscSortInt(count-start,local_inidy+start);
2511: }
2512: /* remove duplicates; not most efficient way, but probably good enough */
2513: i = start;
2514: while (i < count-1) {
2515: if (local_inidy[i] != local_inidy[i+1]) i++;
2516: else { /* found a duplicate */
2517: duplicate = PETSC_TRUE;
2518: for (j=i; j<slen-1; j++) {
2519: local_inidx[j] = local_inidx[j+1];
2520: local_inidy[j] = local_inidy[j+1];
2521: }
2522: slen--;
2523: count--;
2524: }
2525: }
2526: start = count;
2527: }
2528: #endif
2529: if (duplicate) {
2530: PetscInfo(ctx,"Duplicate from to indices passed in VecScatterCreate(), they are ignored\n");
2531: }
2532: VecScatterCreateLocal_StoP_MPI1(slen,local_inidx,slen,local_inidy,xin,yin,bs,ctx);
2533: PetscFree2(local_inidx,local_inidy);
2534: return(0);
2535: }