Actual source code: vpscat.c
petsc-3.3-p7 2013-05-11
2: /*
3: Defines parallel vector scatters.
4: */
6: #include <petsc-private/isimpl.h>
7: #include <petsc-private/vecimpl.h> /*I "petscvec.h" I*/
8: #include <../src/vec/vec/impls/dvecimpl.h>
9: #include <../src/vec/vec/impls/mpi/pvecimpl.h>
13: PetscErrorCode VecScatterView_MPI(VecScatter ctx,PetscViewer viewer)
14: {
15: VecScatter_MPI_General *to=(VecScatter_MPI_General*)ctx->todata;
16: VecScatter_MPI_General *from=(VecScatter_MPI_General*)ctx->fromdata;
17: PetscErrorCode ierr;
18: PetscInt i;
19: PetscMPIInt rank;
20: PetscViewerFormat format;
21: PetscBool iascii;
24: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
25: if (iascii) {
26: MPI_Comm_rank(((PetscObject)ctx)->comm,&rank);
27: PetscViewerGetFormat(viewer,&format);
28: if (format == PETSC_VIEWER_ASCII_INFO) {
29: PetscInt nsend_max,nrecv_max,lensend_max,lenrecv_max,alldata,itmp;
31: MPI_Reduce(&to->n,&nsend_max,1,MPIU_INT,MPI_MAX,0,((PetscObject)ctx)->comm);
32: MPI_Reduce(&from->n,&nrecv_max,1,MPIU_INT,MPI_MAX,0,((PetscObject)ctx)->comm);
33: itmp = to->starts[to->n+1];
34: MPI_Reduce(&itmp,&lensend_max,1,MPIU_INT,MPI_MAX,0,((PetscObject)ctx)->comm);
35: itmp = from->starts[from->n+1];
36: MPI_Reduce(&itmp,&lenrecv_max,1,MPIU_INT,MPI_MAX,0,((PetscObject)ctx)->comm);
37: MPI_Reduce(&itmp,&alldata,1,MPIU_INT,MPI_SUM,0,((PetscObject)ctx)->comm);
39: PetscViewerASCIIPrintf(viewer,"VecScatter statistics\n");
40: PetscViewerASCIIPrintf(viewer," Maximum number sends %D\n",nsend_max);
41: PetscViewerASCIIPrintf(viewer," Maximum number receives %D\n",nrecv_max);
42: PetscViewerASCIIPrintf(viewer," Maximum data sent %D\n",(int)(lensend_max*to->bs*sizeof(PetscScalar)));
43: PetscViewerASCIIPrintf(viewer," Maximum data received %D\n",(int)(lenrecv_max*to->bs*sizeof(PetscScalar)));
44: PetscViewerASCIIPrintf(viewer," Total data sent %D\n",(int)(alldata*to->bs*sizeof(PetscScalar)));
46: } else {
47: PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);
48: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number sends = %D; Number to self = %D\n",rank,to->n,to->local.n);
49: if (to->n) {
50: for (i=0; i<to->n; i++){
51: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D length = %D to whom %D\n",rank,i,to->starts[i+1]-to->starts[i],to->procs[i]);
52: }
53: PetscViewerASCIISynchronizedPrintf(viewer,"Now the indices for all remote sends (in order by process sent to)\n");
54: for (i=0; i<to->starts[to->n]; i++){
55: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D \n",rank,to->indices[i]);
56: }
57: }
59: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number receives = %D; Number from self = %D\n",rank,from->n,from->local.n);
60: if (from->n) {
61: for (i=0; i<from->n; i++){
62: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D length %D from whom %D\n",rank,i,from->starts[i+1]-from->starts[i],from->procs[i]);
63: }
65: PetscViewerASCIISynchronizedPrintf(viewer,"Now the indices for all remote receives (in order by process received from)\n");
66: for (i=0; i<from->starts[from->n]; i++){
67: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D \n",rank,from->indices[i]);
68: }
69: }
70: if (to->local.n) {
71: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Indices for local part of scatter\n",rank);
72: for (i=0; i<to->local.n; i++){
73: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] From %D to %D \n",rank,from->local.vslots[i],to->local.vslots[i]);
74: }
75: }
77: PetscViewerFlush(viewer);
78: PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);
79: }
80: } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported for this scatter",((PetscObject)viewer)->type_name);
81: return(0);
82: }
84: /* -----------------------------------------------------------------------------------*/
85: /*
86: The next routine determines what part of the local part of the scatter is an
87: exact copy of values into their current location. We check this here and
88: then know that we need not perform that portion of the scatter when the vector is
89: scattering to itself with INSERT_VALUES.
91: This is currently not used but would speed up, for example DMDALocalToLocalBegin/End()
93: */
96: PetscErrorCode VecScatterLocalOptimize_Private(VecScatter scatter,VecScatter_Seq_General *to,VecScatter_Seq_General *from)
97: {
98: PetscInt n = to->n,n_nonmatching = 0,i,*to_slots = to->vslots,*from_slots = from->vslots;
100: PetscInt *nto_slots,*nfrom_slots,j = 0;
101:
103: for (i=0; i<n; i++) {
104: if (to_slots[i] != from_slots[i]) n_nonmatching++;
105: }
107: if (!n_nonmatching) {
108: to->nonmatching_computed = PETSC_TRUE;
109: to->n_nonmatching = from->n_nonmatching = 0;
110: PetscInfo1(scatter,"Reduced %D to 0\n", n);
111: } else if (n_nonmatching == n) {
112: to->nonmatching_computed = PETSC_FALSE;
113: PetscInfo(scatter,"All values non-matching\n");
114: } else {
115: to->nonmatching_computed= PETSC_TRUE;
116: to->n_nonmatching = from->n_nonmatching = n_nonmatching;
117: PetscMalloc(n_nonmatching*sizeof(PetscInt),&nto_slots);
118: PetscMalloc(n_nonmatching*sizeof(PetscInt),&nfrom_slots);
119: to->slots_nonmatching = nto_slots;
120: from->slots_nonmatching = nfrom_slots;
121: for (i=0; i<n; i++) {
122: if (to_slots[i] != from_slots[i]) {
123: nto_slots[j] = to_slots[i];
124: nfrom_slots[j] = from_slots[i];
125: j++;
126: }
127: }
128: PetscInfo2(scatter,"Reduced %D to %D\n",n,n_nonmatching);
129: }
130: return(0);
131: }
133: /* --------------------------------------------------------------------------------------*/
135: /* -------------------------------------------------------------------------------------*/
138: PetscErrorCode VecScatterDestroy_PtoP(VecScatter ctx)
139: {
140: VecScatter_MPI_General *to = (VecScatter_MPI_General*)ctx->todata;
141: VecScatter_MPI_General *from = (VecScatter_MPI_General*)ctx->fromdata;
142: PetscErrorCode ierr;
143: PetscInt i;
146: if (to->use_readyreceiver) {
147: /*
148: Since we have already posted sends we must cancel them before freeing
149: the requests
150: */
151: for (i=0; i<from->n; i++) {
152: MPI_Cancel(from->requests+i);
153: }
154: for (i=0; i<to->n; i++) {
155: MPI_Cancel(to->rev_requests+i);
156: }
157: MPI_Waitall(from->n,from->requests,to->rstatus);
158: MPI_Waitall(to->n,to->rev_requests,to->rstatus);
159: }
161: #if defined(PETSC_HAVE_MPI_ALLTOALLW) && !defined(PETSC_USE_64BIT_INDICES)
162: if (to->use_alltoallw) {
163: PetscFree3(to->wcounts,to->wdispls,to->types);
164: PetscFree3(from->wcounts,from->wdispls,from->types);
165: }
166: #endif
168: #if defined(PETSC_HAVE_MPI_WIN_CREATE)
169: if (to->use_window) {
170: MPI_Win_free(&from->window);
171: MPI_Win_free(&to->window);
172: PetscFree(from->winstarts);
173: PetscFree(to->winstarts);
174: }
175: #endif
177: if (to->use_alltoallv) {
178: PetscFree2(to->counts,to->displs);
179: PetscFree2(from->counts,from->displs);
180: }
182: /* release MPI resources obtained with MPI_Send_init() and MPI_Recv_init() */
183: /*
184: IBM's PE version of MPI has a bug where freeing these guys will screw up later
185: message passing.
186: */
187: #if !defined(PETSC_HAVE_BROKEN_REQUEST_FREE)
188: if (!to->use_alltoallv && !to->use_window) { /* currently the to->requests etc are ALWAYS allocated even if not used */
189: if (to->requests) {
190: for (i=0; i<to->n; i++) {
191: MPI_Request_free(to->requests + i);
192: }
193: }
194: if (to->rev_requests) {
195: for (i=0; i<to->n; i++) {
196: MPI_Request_free(to->rev_requests + i);
197: }
198: }
199: }
200: /*
201: MPICH could not properly cancel requests thus with ready receiver mode we
202: cannot free the requests. It may be fixed now, if not then put the following
203: code inside a if !to->use_readyreceiver) {
204: */
205: if (!to->use_alltoallv && !to->use_window) { /* currently the from->requests etc are ALWAYS allocated even if not used */
206: if (from->requests) {
207: for (i=0; i<from->n; i++) {
208: MPI_Request_free(from->requests + i);
209: }
210: }
212: if (from->rev_requests) {
213: for (i=0; i<from->n; i++) {
214: MPI_Request_free(from->rev_requests + i);
215: }
216: }
217: }
218: #endif
220: PetscFree(to->local.vslots);
221: PetscFree(from->local.vslots);
222: PetscFree2(to->counts,to->displs);
223: PetscFree2(from->counts,from->displs);
224: PetscFree(to->local.slots_nonmatching);
225: PetscFree(from->local.slots_nonmatching);
226: PetscFree(to->rev_requests);
227: PetscFree(from->rev_requests);
228: PetscFree(to->requests);
229: PetscFree(from->requests);
230: PetscFree4(to->values,to->indices,to->starts,to->procs);
231: PetscFree2(to->sstatus,to->rstatus);
232: PetscFree4(from->values,from->indices,from->starts,from->procs);
233: PetscFree(from);
234: PetscFree(to);
235: #if defined(PETSC_HAVE_CUSP)
236: PetscCUSPIndicesDestroy((PetscCUSPIndices*)&ctx->spptr);
237: #endif
238: return(0);
239: }
243: /* --------------------------------------------------------------------------------------*/
244: /*
245: Special optimization to see if the local part of the scatter is actually
246: a copy. The scatter routines call PetscMemcpy() instead.
247:
248: */
251: PetscErrorCode VecScatterLocalOptimizeCopy_Private(VecScatter scatter,VecScatter_Seq_General *to,VecScatter_Seq_General *from,PetscInt bs)
252: {
253: PetscInt n = to->n,i,*to_slots = to->vslots,*from_slots = from->vslots;
254: PetscInt to_start,from_start;
258: to_start = to_slots[0];
259: from_start = from_slots[0];
261: for (i=1; i<n; i++) {
262: to_start += bs;
263: from_start += bs;
264: if (to_slots[i] != to_start) return(0);
265: if (from_slots[i] != from_start) return(0);
266: }
267: to->is_copy = PETSC_TRUE;
268: to->copy_start = to_slots[0];
269: to->copy_length = bs*sizeof(PetscScalar)*n;
270: from->is_copy = PETSC_TRUE;
271: from->copy_start = from_slots[0];
272: from->copy_length = bs*sizeof(PetscScalar)*n;
273: PetscInfo(scatter,"Local scatter is a copy, optimizing for it\n");
274: return(0);
275: }
277: /* --------------------------------------------------------------------------------------*/
281: PetscErrorCode VecScatterCopy_PtoP_X(VecScatter in,VecScatter out)
282: {
283: VecScatter_MPI_General *in_to = (VecScatter_MPI_General*)in->todata;
284: VecScatter_MPI_General *in_from = (VecScatter_MPI_General*)in->fromdata,*out_to,*out_from;
285: PetscErrorCode ierr;
286: PetscInt ny,bs = in_from->bs;
289: out->begin = in->begin;
290: out->end = in->end;
291: out->copy = in->copy;
292: out->destroy = in->destroy;
293: out->view = in->view;
295: /* allocate entire send scatter context */
296: PetscNewLog(out,VecScatter_MPI_General,&out_to);
297: PetscNewLog(out,VecScatter_MPI_General,&out_from);
299: ny = in_to->starts[in_to->n];
300: out_to->n = in_to->n;
301: out_to->type = in_to->type;
302: out_to->sendfirst = in_to->sendfirst;
304: PetscMalloc(out_to->n*sizeof(MPI_Request),&out_to->requests);
305: PetscMalloc4(bs*ny,PetscScalar,&out_to->values,ny,PetscInt,&out_to->indices,out_to->n+1,PetscInt,&out_to->starts,out_to->n,PetscMPIInt,&out_to->procs);
306: PetscMalloc2(PetscMax(in_to->n,in_from->n),MPI_Status,&out_to->sstatus,PetscMax(in_to->n,in_from->n),MPI_Status,&out_to->rstatus);
307: PetscMemcpy(out_to->indices,in_to->indices,ny*sizeof(PetscInt));
308: PetscMemcpy(out_to->starts,in_to->starts,(out_to->n+1)*sizeof(PetscInt));
309: PetscMemcpy(out_to->procs,in_to->procs,(out_to->n)*sizeof(PetscMPIInt));
310:
311: out->todata = (void*)out_to;
312: out_to->local.n = in_to->local.n;
313: out_to->local.nonmatching_computed = PETSC_FALSE;
314: out_to->local.n_nonmatching = 0;
315: out_to->local.slots_nonmatching = 0;
316: if (in_to->local.n) {
317: PetscMalloc(in_to->local.n*sizeof(PetscInt),&out_to->local.vslots);
318: PetscMalloc(in_from->local.n*sizeof(PetscInt),&out_from->local.vslots);
319: PetscMemcpy(out_to->local.vslots,in_to->local.vslots,in_to->local.n*sizeof(PetscInt));
320: PetscMemcpy(out_from->local.vslots,in_from->local.vslots,in_from->local.n*sizeof(PetscInt));
321: } else {
322: out_to->local.vslots = 0;
323: out_from->local.vslots = 0;
324: }
326: /* allocate entire receive context */
327: out_from->type = in_from->type;
328: ny = in_from->starts[in_from->n];
329: out_from->n = in_from->n;
330: out_from->sendfirst = in_from->sendfirst;
332: PetscMalloc(out_from->n*sizeof(MPI_Request),&out_from->requests);
333: PetscMalloc4(ny*bs,PetscScalar,&out_from->values,ny,PetscInt,&out_from->indices,out_from->n+1,PetscInt,&out_from->starts,out_from->n,PetscMPIInt,&out_from->procs);
334: PetscMemcpy(out_from->indices,in_from->indices,ny*sizeof(PetscInt));
335: PetscMemcpy(out_from->starts,in_from->starts,(out_from->n+1)*sizeof(PetscInt));
336: PetscMemcpy(out_from->procs,in_from->procs,(out_from->n)*sizeof(PetscMPIInt));
337: out->fromdata = (void*)out_from;
338: out_from->local.n = in_from->local.n;
339: out_from->local.nonmatching_computed = PETSC_FALSE;
340: out_from->local.n_nonmatching = 0;
341: out_from->local.slots_nonmatching = 0;
343: /*
344: set up the request arrays for use with isend_init() and irecv_init()
345: */
346: {
347: PetscMPIInt tag;
348: MPI_Comm comm;
349: PetscInt *sstarts = out_to->starts, *rstarts = out_from->starts;
350: PetscMPIInt *sprocs = out_to->procs, *rprocs = out_from->procs;
351: PetscInt i;
352: PetscBool flg;
353: MPI_Request *swaits = out_to->requests,*rwaits = out_from->requests;
354: MPI_Request *rev_swaits,*rev_rwaits;
355: PetscScalar *Ssvalues = out_to->values, *Srvalues = out_from->values;
357: PetscMalloc(in_to->n*sizeof(MPI_Request),&out_to->rev_requests);
358: PetscMalloc(in_from->n*sizeof(MPI_Request),&out_from->rev_requests);
360: rev_rwaits = out_to->rev_requests;
361: rev_swaits = out_from->rev_requests;
363: out_from->bs = out_to->bs = bs;
364: tag = ((PetscObject)out)->tag;
365: comm = ((PetscObject)out)->comm;
367: /* Register the receives that you will use later (sends for scatter reverse) */
368: for (i=0; i<out_from->n; i++) {
369: MPI_Recv_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);
370: MPI_Send_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tag,comm,rev_swaits+i);
371: }
373: flg = PETSC_FALSE;
374: PetscOptionsGetBool(PETSC_NULL,"-vecscatter_rsend",&flg,PETSC_NULL);
375: if (flg) {
376: out_to->use_readyreceiver = PETSC_TRUE;
377: out_from->use_readyreceiver = PETSC_TRUE;
378: for (i=0; i<out_to->n; i++) {
379: MPI_Rsend_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
380: }
381: if (out_from->n) {MPI_Startall_irecv(out_from->starts[out_from->n]*out_from->bs,out_from->n,out_from->requests);}
382: MPI_Barrier(comm);
383: PetscInfo(in,"Using VecScatter ready receiver mode\n");
384: } else {
385: out_to->use_readyreceiver = PETSC_FALSE;
386: out_from->use_readyreceiver = PETSC_FALSE;
387: flg = PETSC_FALSE;
388: PetscOptionsGetBool(PETSC_NULL,"-vecscatter_ssend",&flg,PETSC_NULL);
389: if (flg) {
390: PetscInfo(in,"Using VecScatter Ssend mode\n");
391: }
392: for (i=0; i<out_to->n; i++) {
393: if (!flg) {
394: MPI_Send_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
395: } else {
396: MPI_Ssend_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
397: }
398: }
399: }
400: /* Register receives for scatter reverse */
401: for (i=0; i<out_to->n; i++) {
402: MPI_Recv_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,rev_rwaits+i);
403: }
404: }
406: return(0);
407: }
411: PetscErrorCode VecScatterCopy_PtoP_AllToAll(VecScatter in,VecScatter out)
412: {
413: VecScatter_MPI_General *in_to = (VecScatter_MPI_General*)in->todata;
414: VecScatter_MPI_General *in_from = (VecScatter_MPI_General*)in->fromdata,*out_to,*out_from;
415: PetscErrorCode ierr;
416: PetscInt ny,bs = in_from->bs;
417: PetscMPIInt size;
420: MPI_Comm_size(((PetscObject)in)->comm,&size);
421: out->begin = in->begin;
422: out->end = in->end;
423: out->copy = in->copy;
424: out->destroy = in->destroy;
425: out->view = in->view;
427: /* allocate entire send scatter context */
428: PetscNewLog(out,VecScatter_MPI_General,&out_to);
429: PetscNewLog(out,VecScatter_MPI_General,&out_from);
431: ny = in_to->starts[in_to->n];
432: out_to->n = in_to->n;
433: out_to->type = in_to->type;
434: out_to->sendfirst = in_to->sendfirst;
436: PetscMalloc(out_to->n*sizeof(MPI_Request),&out_to->requests);
437: PetscMalloc4(bs*ny,PetscScalar,&out_to->values,ny,PetscInt,&out_to->indices,out_to->n+1,PetscInt,&out_to->starts,out_to->n,PetscMPIInt,&out_to->procs);
438: PetscMalloc2(PetscMax(in_to->n,in_from->n),MPI_Status,&out_to->sstatus,PetscMax(in_to->n,in_from->n),MPI_Status,&out_to->rstatus);
439: PetscMemcpy(out_to->indices,in_to->indices,ny*sizeof(PetscInt));
440: PetscMemcpy(out_to->starts,in_to->starts,(out_to->n+1)*sizeof(PetscInt));
441: PetscMemcpy(out_to->procs,in_to->procs,(out_to->n)*sizeof(PetscMPIInt));
442:
443: out->todata = (void*)out_to;
444: out_to->local.n = in_to->local.n;
445: out_to->local.nonmatching_computed = PETSC_FALSE;
446: out_to->local.n_nonmatching = 0;
447: out_to->local.slots_nonmatching = 0;
448: if (in_to->local.n) {
449: PetscMalloc(in_to->local.n*sizeof(PetscInt),&out_to->local.vslots);
450: PetscMalloc(in_from->local.n*sizeof(PetscInt),&out_from->local.vslots);
451: PetscMemcpy(out_to->local.vslots,in_to->local.vslots,in_to->local.n*sizeof(PetscInt));
452: PetscMemcpy(out_from->local.vslots,in_from->local.vslots,in_from->local.n*sizeof(PetscInt));
453: } else {
454: out_to->local.vslots = 0;
455: out_from->local.vslots = 0;
456: }
458: /* allocate entire receive context */
459: out_from->type = in_from->type;
460: ny = in_from->starts[in_from->n];
461: out_from->n = in_from->n;
462: out_from->sendfirst = in_from->sendfirst;
464: PetscMalloc(out_from->n*sizeof(MPI_Request),&out_from->requests);
465: PetscMalloc4(ny*bs,PetscScalar,&out_from->values,ny,PetscInt,&out_from->indices,out_from->n+1,PetscInt,&out_from->starts,out_from->n,PetscMPIInt,&out_from->procs);
466: PetscMemcpy(out_from->indices,in_from->indices,ny*sizeof(PetscInt));
467: PetscMemcpy(out_from->starts,in_from->starts,(out_from->n+1)*sizeof(PetscInt));
468: PetscMemcpy(out_from->procs,in_from->procs,(out_from->n)*sizeof(PetscMPIInt));
469: out->fromdata = (void*)out_from;
470: out_from->local.n = in_from->local.n;
471: out_from->local.nonmatching_computed = PETSC_FALSE;
472: out_from->local.n_nonmatching = 0;
473: out_from->local.slots_nonmatching = 0;
475: out_to->use_alltoallv = out_from->use_alltoallv = PETSC_TRUE;
477: PetscMalloc2(size,PetscMPIInt,&out_to->counts,size,PetscMPIInt,&out_to->displs);
478: PetscMemcpy(out_to->counts,in_to->counts,size*sizeof(PetscMPIInt));
479: PetscMemcpy(out_to->displs,in_to->displs,size*sizeof(PetscMPIInt));
481: PetscMalloc2(size,PetscMPIInt,&out_from->counts,size,PetscMPIInt,&out_from->displs);
482: PetscMemcpy(out_from->counts,in_from->counts,size*sizeof(PetscMPIInt));
483: PetscMemcpy(out_from->displs,in_from->displs,size*sizeof(PetscMPIInt));
484: return(0);
485: }
486: /* --------------------------------------------------------------------------------------------------
487: Packs and unpacks the message data into send or from receive buffers.
489: These could be generated automatically.
491: Fortran kernels etc. could be used.
492: */
493: PETSC_STATIC_INLINE void Pack_1(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y)
494: {
495: PetscInt i;
496: for (i=0; i<n; i++) {
497: y[i] = x[indicesx[i]];
498: }
499: }
500: PETSC_STATIC_INLINE void UnPack_1(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
501: {
502: PetscInt i;
503: switch (addv) {
504: case INSERT_VALUES:
505: case INSERT_ALL_VALUES:
506: for (i=0; i<n; i++) {
507: y[indicesy[i]] = x[i];
508: }
509: break;
510: case ADD_VALUES:
511: case ADD_ALL_VALUES:
512: for (i=0; i<n; i++) {
513: y[indicesy[i]] += x[i];
514: }
515: break;
516: #if !defined(PETSC_USE_COMPLEX)
517: case MAX_VALUES:
518: for (i=0; i<n; i++) {
519: y[indicesy[i]] = PetscMax(y[indicesy[i]],x[i]);
520: }
521: #else
522: case MAX_VALUES:
523: #endif
524: case NOT_SET_VALUES:
525: break;
526: }
527: }
529: PETSC_STATIC_INLINE void Scatter_1(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
530: {
531: PetscInt i;
532: switch (addv) {
533: case INSERT_VALUES:
534: case INSERT_ALL_VALUES:
535: for (i=0; i<n; i++) {
536: y[indicesy[i]] = x[indicesx[i]];
537: }
538: break;
539: case ADD_VALUES:
540: case ADD_ALL_VALUES:
541: for (i=0; i<n; i++) {
542: y[indicesy[i]] += x[indicesx[i]];
543: }
544: break;
545: #if !defined(PETSC_USE_COMPLEX)
546: case MAX_VALUES:
547: for (i=0; i<n; i++) {
548: y[indicesy[i]] = PetscMax(y[indicesy[i]],x[indicesx[i]]);
549: }
550: #else
551: case MAX_VALUES:
552: #endif
553: case NOT_SET_VALUES:
554: break;
555: }
556: }
558: /* ----------------------------------------------------------------------------------------------- */
559: PETSC_STATIC_INLINE void Pack_2(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y)
560: {
561: PetscInt i,idx;
563: for (i=0; i<n; i++) {
564: idx = *indicesx++;
565: y[0] = x[idx];
566: y[1] = x[idx+1];
567: y += 2;
568: }
569: }
570: PETSC_STATIC_INLINE void UnPack_2(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
571: {
572: PetscInt i,idy;
574: switch (addv) {
575: case INSERT_VALUES:
576: case INSERT_ALL_VALUES:
577: for (i=0; i<n; i++) {
578: idy = *indicesy++;
579: y[idy] = x[0];
580: y[idy+1] = x[1];
581: x += 2;
582: }
583: break;
584: case ADD_VALUES:
585: case ADD_ALL_VALUES:
586: for (i=0; i<n; i++) {
587: idy = *indicesy++;
588: y[idy] += x[0];
589: y[idy+1] += x[1];
590: x += 2;
591: }
592: break;
593: #if !defined(PETSC_USE_COMPLEX)
594: case MAX_VALUES:
595: for (i=0; i<n; i++) {
596: idy = *indicesy++;
597: y[idy] = PetscMax(y[idy],x[0]);
598: y[idy+1] = PetscMax(y[idy+1],x[1]);
599: x += 2;
600: }
601: #else
602: case MAX_VALUES:
603: #endif
604: case NOT_SET_VALUES:
605: break;
606: }
607: }
609: PETSC_STATIC_INLINE void Scatter_2(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
610: {
611: PetscInt i,idx,idy;
613: switch (addv) {
614: case INSERT_VALUES:
615: case INSERT_ALL_VALUES:
616: for (i=0; i<n; i++) {
617: idx = *indicesx++;
618: idy = *indicesy++;
619: y[idy] = x[idx];
620: y[idy+1] = x[idx+1];
621: }
622: break;
623: case ADD_VALUES:
624: case ADD_ALL_VALUES:
625: for (i=0; i<n; i++) {
626: idx = *indicesx++;
627: idy = *indicesy++;
628: y[idy] += x[idx];
629: y[idy+1] += x[idx+1];
630: }
631: break;
632: #if !defined(PETSC_USE_COMPLEX)
633: case MAX_VALUES:
634: for (i=0; i<n; i++) {
635: idx = *indicesx++;
636: idy = *indicesy++;
637: y[idy] = PetscMax(y[idy],x[idx]);
638: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
639: }
640: #else
641: case MAX_VALUES:
642: #endif
643: case NOT_SET_VALUES:
644: break;
645: }
646: }
647: /* ----------------------------------------------------------------------------------------------- */
648: PETSC_STATIC_INLINE void Pack_3(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y)
649: {
650: PetscInt i,idx;
652: for (i=0; i<n; i++) {
653: idx = *indicesx++;
654: y[0] = x[idx];
655: y[1] = x[idx+1];
656: y[2] = x[idx+2];
657: y += 3;
658: }
659: }
660: PETSC_STATIC_INLINE void UnPack_3(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
661: {
662: PetscInt i,idy;
664: switch (addv) {
665: case INSERT_VALUES:
666: case INSERT_ALL_VALUES:
667: for (i=0; i<n; i++) {
668: idy = *indicesy++;
669: y[idy] = x[0];
670: y[idy+1] = x[1];
671: y[idy+2] = x[2];
672: x += 3;
673: }
674: break;
675: case ADD_VALUES:
676: case ADD_ALL_VALUES:
677: for (i=0; i<n; i++) {
678: idy = *indicesy++;
679: y[idy] += x[0];
680: y[idy+1] += x[1];
681: y[idy+2] += x[2];
682: x += 3;
683: }
684: break;
685: #if !defined(PETSC_USE_COMPLEX)
686: case MAX_VALUES:
687: for (i=0; i<n; i++) {
688: idy = *indicesy++;
689: y[idy] = PetscMax(y[idy],x[0]);
690: y[idy+1] = PetscMax(y[idy+1],x[1]);
691: y[idy+2] = PetscMax(y[idy+2],x[2]);
692: x += 3;
693: }
694: #else
695: case MAX_VALUES:
696: #endif
697: case NOT_SET_VALUES:
698: break;
699: }
700: }
702: PETSC_STATIC_INLINE void Scatter_3(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
703: {
704: PetscInt i,idx,idy;
706: switch (addv) {
707: case INSERT_VALUES:
708: case INSERT_ALL_VALUES:
709: for (i=0; i<n; i++) {
710: idx = *indicesx++;
711: idy = *indicesy++;
712: y[idy] = x[idx];
713: y[idy+1] = x[idx+1];
714: y[idy+2] = x[idx+2];
715: }
716: break;
717: case ADD_VALUES:
718: case ADD_ALL_VALUES:
719: for (i=0; i<n; i++) {
720: idx = *indicesx++;
721: idy = *indicesy++;
722: y[idy] += x[idx];
723: y[idy+1] += x[idx+1];
724: y[idy+2] += x[idx+2];
725: }
726: break;
727: #if !defined(PETSC_USE_COMPLEX)
728: case MAX_VALUES:
729: for (i=0; i<n; i++) {
730: idx = *indicesx++;
731: idy = *indicesy++;
732: y[idy] = PetscMax(y[idy],x[idx]);
733: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
734: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
735: }
736: #else
737: case MAX_VALUES:
738: #endif
739: case NOT_SET_VALUES:
740: break;
741: }
742: }
743: /* ----------------------------------------------------------------------------------------------- */
744: PETSC_STATIC_INLINE void Pack_4(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y)
745: {
746: PetscInt i,idx;
748: for (i=0; i<n; i++) {
749: idx = *indicesx++;
750: y[0] = x[idx];
751: y[1] = x[idx+1];
752: y[2] = x[idx+2];
753: y[3] = x[idx+3];
754: y += 4;
755: }
756: }
757: PETSC_STATIC_INLINE void UnPack_4(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
758: {
759: PetscInt i,idy;
761: switch (addv) {
762: case INSERT_VALUES:
763: case INSERT_ALL_VALUES:
764: for (i=0; i<n; i++) {
765: idy = *indicesy++;
766: y[idy] = x[0];
767: y[idy+1] = x[1];
768: y[idy+2] = x[2];
769: y[idy+3] = x[3];
770: x += 4;
771: }
772: break;
773: case ADD_VALUES:
774: case ADD_ALL_VALUES:
775: for (i=0; i<n; i++) {
776: idy = *indicesy++;
777: y[idy] += x[0];
778: y[idy+1] += x[1];
779: y[idy+2] += x[2];
780: y[idy+3] += x[3];
781: x += 4;
782: }
783: break;
784: #if !defined(PETSC_USE_COMPLEX)
785: case MAX_VALUES:
786: for (i=0; i<n; i++) {
787: idy = *indicesy++;
788: y[idy] = PetscMax(y[idy],x[0]);
789: y[idy+1] = PetscMax(y[idy+1],x[1]);
790: y[idy+2] = PetscMax(y[idy+2],x[2]);
791: y[idy+3] = PetscMax(y[idy+3],x[3]);
792: x += 4;
793: }
794: #else
795: case MAX_VALUES:
796: #endif
797: case NOT_SET_VALUES:
798: break;
799: }
800: }
802: PETSC_STATIC_INLINE void Scatter_4(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
803: {
804: PetscInt i,idx,idy;
806: switch (addv) {
807: case INSERT_VALUES:
808: case INSERT_ALL_VALUES:
809: for (i=0; i<n; i++) {
810: idx = *indicesx++;
811: idy = *indicesy++;
812: y[idy] = x[idx];
813: y[idy+1] = x[idx+1];
814: y[idy+2] = x[idx+2];
815: y[idy+3] = x[idx+3];
816: }
817: break;
818: case ADD_VALUES:
819: case ADD_ALL_VALUES:
820: for (i=0; i<n; i++) {
821: idx = *indicesx++;
822: idy = *indicesy++;
823: y[idy] += x[idx];
824: y[idy+1] += x[idx+1];
825: y[idy+2] += x[idx+2];
826: y[idy+3] += x[idx+3];
827: }
828: break;
829: #if !defined(PETSC_USE_COMPLEX)
830: case MAX_VALUES:
831: for (i=0; i<n; i++) {
832: idx = *indicesx++;
833: idy = *indicesy++;
834: y[idy] = PetscMax(y[idy],x[idx]);
835: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
836: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
837: y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
838: }
839: #else
840: case MAX_VALUES:
841: #endif
842: case NOT_SET_VALUES:
843: break;
844: }
845: }
846: /* ----------------------------------------------------------------------------------------------- */
847: PETSC_STATIC_INLINE void Pack_5(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y)
848: {
849: PetscInt i,idx;
851: for (i=0; i<n; i++) {
852: idx = *indicesx++;
853: y[0] = x[idx];
854: y[1] = x[idx+1];
855: y[2] = x[idx+2];
856: y[3] = x[idx+3];
857: y[4] = x[idx+4];
858: y += 5;
859: }
860: }
861: PETSC_STATIC_INLINE void UnPack_5(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
862: {
863: PetscInt i,idy;
865: switch (addv) {
866: case INSERT_VALUES:
867: case INSERT_ALL_VALUES:
868: for (i=0; i<n; i++) {
869: idy = *indicesy++;
870: y[idy] = x[0];
871: y[idy+1] = x[1];
872: y[idy+2] = x[2];
873: y[idy+3] = x[3];
874: y[idy+4] = x[4];
875: x += 5;
876: }
877: break;
878: case ADD_VALUES:
879: case ADD_ALL_VALUES:
880: for (i=0; i<n; i++) {
881: idy = *indicesy++;
882: y[idy] += x[0];
883: y[idy+1] += x[1];
884: y[idy+2] += x[2];
885: y[idy+3] += x[3];
886: y[idy+4] += x[4];
887: x += 5;
888: }
889: break;
890: #if !defined(PETSC_USE_COMPLEX)
891: case MAX_VALUES:
892: for (i=0; i<n; i++) {
893: idy = *indicesy++;
894: y[idy] = PetscMax(y[idy],x[0]);
895: y[idy+1] = PetscMax(y[idy+1],x[1]);
896: y[idy+2] = PetscMax(y[idy+2],x[2]);
897: y[idy+3] = PetscMax(y[idy+3],x[3]);
898: y[idy+4] = PetscMax(y[idy+4],x[4]);
899: x += 5;
900: }
901: #else
902: case MAX_VALUES:
903: #endif
904: case NOT_SET_VALUES:
905: break;
906: }
907: }
909: PETSC_STATIC_INLINE void Scatter_5(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
910: {
911: PetscInt i,idx,idy;
913: switch (addv) {
914: case INSERT_VALUES:
915: case INSERT_ALL_VALUES:
916: for (i=0; i<n; i++) {
917: idx = *indicesx++;
918: idy = *indicesy++;
919: y[idy] = x[idx];
920: y[idy+1] = x[idx+1];
921: y[idy+2] = x[idx+2];
922: y[idy+3] = x[idx+3];
923: y[idy+4] = x[idx+4];
924: }
925: break;
926: case ADD_VALUES:
927: case ADD_ALL_VALUES:
928: for (i=0; i<n; i++) {
929: idx = *indicesx++;
930: idy = *indicesy++;
931: y[idy] += x[idx];
932: y[idy+1] += x[idx+1];
933: y[idy+2] += x[idx+2];
934: y[idy+3] += x[idx+3];
935: y[idy+4] += x[idx+4];
936: }
937: break;
938: #if !defined(PETSC_USE_COMPLEX)
939: case MAX_VALUES:
940: for (i=0; i<n; i++) {
941: idx = *indicesx++;
942: idy = *indicesy++;
943: y[idy] = PetscMax(y[idy],x[idx]);
944: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
945: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
946: y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
947: y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
948: }
949: #else
950: case MAX_VALUES:
951: #endif
952: case NOT_SET_VALUES:
953: break;
954: }
955: }
956: /* ----------------------------------------------------------------------------------------------- */
957: PETSC_STATIC_INLINE void Pack_6(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y)
958: {
959: PetscInt i,idx;
961: for (i=0; i<n; i++) {
962: idx = *indicesx++;
963: y[0] = x[idx];
964: y[1] = x[idx+1];
965: y[2] = x[idx+2];
966: y[3] = x[idx+3];
967: y[4] = x[idx+4];
968: y[5] = x[idx+5];
969: y += 6;
970: }
971: }
972: PETSC_STATIC_INLINE void UnPack_6(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
973: {
974: PetscInt i,idy;
976: switch (addv) {
977: case INSERT_VALUES:
978: case INSERT_ALL_VALUES:
979: for (i=0; i<n; i++) {
980: idy = *indicesy++;
981: y[idy] = x[0];
982: y[idy+1] = x[1];
983: y[idy+2] = x[2];
984: y[idy+3] = x[3];
985: y[idy+4] = x[4];
986: y[idy+5] = x[5];
987: x += 6;
988: }
989: break;
990: case ADD_VALUES:
991: case ADD_ALL_VALUES:
992: for (i=0; i<n; i++) {
993: idy = *indicesy++;
994: y[idy] += x[0];
995: y[idy+1] += x[1];
996: y[idy+2] += x[2];
997: y[idy+3] += x[3];
998: y[idy+4] += x[4];
999: y[idy+5] += x[5];
1000: x += 6;
1001: }
1002: break;
1003: #if !defined(PETSC_USE_COMPLEX)
1004: case MAX_VALUES:
1005: for (i=0; i<n; i++) {
1006: idy = *indicesy++;
1007: y[idy] = PetscMax(y[idy],x[0]);
1008: y[idy+1] = PetscMax(y[idy+1],x[1]);
1009: y[idy+2] = PetscMax(y[idy+2],x[2]);
1010: y[idy+3] = PetscMax(y[idy+3],x[3]);
1011: y[idy+4] = PetscMax(y[idy+4],x[4]);
1012: y[idy+5] = PetscMax(y[idy+5],x[5]);
1013: x += 6;
1014: }
1015: #else
1016: case MAX_VALUES:
1017: #endif
1018: case NOT_SET_VALUES:
1019: break;
1020: }
1021: }
1023: PETSC_STATIC_INLINE void Scatter_6(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
1024: {
1025: PetscInt i,idx,idy;
1027: switch (addv) {
1028: case INSERT_VALUES:
1029: case INSERT_ALL_VALUES:
1030: for (i=0; i<n; i++) {
1031: idx = *indicesx++;
1032: idy = *indicesy++;
1033: y[idy] = x[idx];
1034: y[idy+1] = x[idx+1];
1035: y[idy+2] = x[idx+2];
1036: y[idy+3] = x[idx+3];
1037: y[idy+4] = x[idx+4];
1038: y[idy+5] = x[idx+5];
1039: }
1040: break;
1041: case ADD_VALUES:
1042: case ADD_ALL_VALUES:
1043: for (i=0; i<n; i++) {
1044: idx = *indicesx++;
1045: idy = *indicesy++;
1046: y[idy] += x[idx];
1047: y[idy+1] += x[idx+1];
1048: y[idy+2] += x[idx+2];
1049: y[idy+3] += x[idx+3];
1050: y[idy+4] += x[idx+4];
1051: y[idy+5] += x[idx+5];
1052: }
1053: break;
1054: #if !defined(PETSC_USE_COMPLEX)
1055: case MAX_VALUES:
1056: for (i=0; i<n; i++) {
1057: idx = *indicesx++;
1058: idy = *indicesy++;
1059: y[idy] = PetscMax(y[idy],x[idx]);
1060: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
1061: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
1062: y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
1063: y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1064: y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
1065: }
1066: #else
1067: case MAX_VALUES:
1068: #endif
1069: case NOT_SET_VALUES:
1070: break;
1071: }
1072: }
1073: /* ----------------------------------------------------------------------------------------------- */
1074: PETSC_STATIC_INLINE void Pack_7(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y)
1075: {
1076: PetscInt i,idx;
1078: for (i=0; i<n; i++) {
1079: idx = *indicesx++;
1080: y[0] = x[idx];
1081: y[1] = x[idx+1];
1082: y[2] = x[idx+2];
1083: y[3] = x[idx+3];
1084: y[4] = x[idx+4];
1085: y[5] = x[idx+5];
1086: y[6] = x[idx+6];
1087: y += 7;
1088: }
1089: }
1090: PETSC_STATIC_INLINE void UnPack_7(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
1091: {
1092: PetscInt i,idy;
1094: switch (addv) {
1095: case INSERT_VALUES:
1096: case INSERT_ALL_VALUES:
1097: for (i=0; i<n; i++) {
1098: idy = *indicesy++;
1099: y[idy] = x[0];
1100: y[idy+1] = x[1];
1101: y[idy+2] = x[2];
1102: y[idy+3] = x[3];
1103: y[idy+4] = x[4];
1104: y[idy+5] = x[5];
1105: y[idy+6] = x[6];
1106: x += 7;
1107: }
1108: break;
1109: case ADD_VALUES:
1110: case ADD_ALL_VALUES:
1111: for (i=0; i<n; i++) {
1112: idy = *indicesy++;
1113: y[idy] += x[0];
1114: y[idy+1] += x[1];
1115: y[idy+2] += x[2];
1116: y[idy+3] += x[3];
1117: y[idy+4] += x[4];
1118: y[idy+5] += x[5];
1119: y[idy+6] += x[6];
1120: x += 7;
1121: }
1122: break;
1123: #if !defined(PETSC_USE_COMPLEX)
1124: case MAX_VALUES:
1125: for (i=0; i<n; i++) {
1126: idy = *indicesy++;
1127: y[idy] = PetscMax(y[idy],x[0]);
1128: y[idy+1] = PetscMax(y[idy+1],x[1]);
1129: y[idy+2] = PetscMax(y[idy+2],x[2]);
1130: y[idy+3] = PetscMax(y[idy+3],x[3]);
1131: y[idy+4] = PetscMax(y[idy+4],x[4]);
1132: y[idy+5] = PetscMax(y[idy+5],x[5]);
1133: y[idy+6] = PetscMax(y[idy+6],x[6]);
1134: x += 7;
1135: }
1136: #else
1137: case MAX_VALUES:
1138: #endif
1139: case NOT_SET_VALUES:
1140: break;
1141: }
1142: }
1144: PETSC_STATIC_INLINE void Scatter_7(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
1145: {
1146: PetscInt i,idx,idy;
1148: switch (addv) {
1149: case INSERT_VALUES:
1150: case INSERT_ALL_VALUES:
1151: for (i=0; i<n; i++) {
1152: idx = *indicesx++;
1153: idy = *indicesy++;
1154: y[idy] = x[idx];
1155: y[idy+1] = x[idx+1];
1156: y[idy+2] = x[idx+2];
1157: y[idy+3] = x[idx+3];
1158: y[idy+4] = x[idx+4];
1159: y[idy+5] = x[idx+5];
1160: y[idy+6] = x[idx+6];
1161: }
1162: break;
1163: case ADD_VALUES:
1164: case ADD_ALL_VALUES:
1165: for (i=0; i<n; i++) {
1166: idx = *indicesx++;
1167: idy = *indicesy++;
1168: y[idy] += x[idx];
1169: y[idy+1] += x[idx+1];
1170: y[idy+2] += x[idx+2];
1171: y[idy+3] += x[idx+3];
1172: y[idy+4] += x[idx+4];
1173: y[idy+5] += x[idx+5];
1174: y[idy+6] += x[idx+6];
1175: }
1176: break;
1177: #if !defined(PETSC_USE_COMPLEX)
1178: case MAX_VALUES:
1179: for (i=0; i<n; i++) {
1180: idx = *indicesx++;
1181: idy = *indicesy++;
1182: y[idy] = PetscMax(y[idy],x[idx]);
1183: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
1184: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
1185: y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
1186: y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1187: y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
1188: y[idy+6] = PetscMax(y[idy+6],x[idx+6]);
1189: }
1190: #else
1191: case MAX_VALUES:
1192: #endif
1193: case NOT_SET_VALUES:
1194: break;
1195: }
1196: }
1197: /* ----------------------------------------------------------------------------------------------- */
1198: PETSC_STATIC_INLINE void Pack_8(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y)
1199: {
1200: PetscInt i,idx;
1202: for (i=0; i<n; i++) {
1203: idx = *indicesx++;
1204: y[0] = x[idx];
1205: y[1] = x[idx+1];
1206: y[2] = x[idx+2];
1207: y[3] = x[idx+3];
1208: y[4] = x[idx+4];
1209: y[5] = x[idx+5];
1210: y[6] = x[idx+6];
1211: y[7] = x[idx+7];
1212: y += 8;
1213: }
1214: }
1215: PETSC_STATIC_INLINE void UnPack_8(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
1216: {
1217: PetscInt i,idy;
1219: switch (addv) {
1220: case INSERT_VALUES:
1221: case INSERT_ALL_VALUES:
1222: for (i=0; i<n; i++) {
1223: idy = *indicesy++;
1224: y[idy] = x[0];
1225: y[idy+1] = x[1];
1226: y[idy+2] = x[2];
1227: y[idy+3] = x[3];
1228: y[idy+4] = x[4];
1229: y[idy+5] = x[5];
1230: y[idy+6] = x[6];
1231: y[idy+7] = x[7];
1232: x += 8;
1233: }
1234: break;
1235: case ADD_VALUES:
1236: case ADD_ALL_VALUES:
1237: for (i=0; i<n; i++) {
1238: idy = *indicesy++;
1239: y[idy] += x[0];
1240: y[idy+1] += x[1];
1241: y[idy+2] += x[2];
1242: y[idy+3] += x[3];
1243: y[idy+4] += x[4];
1244: y[idy+5] += x[5];
1245: y[idy+6] += x[6];
1246: y[idy+7] += x[7];
1247: x += 8;
1248: }
1249: break;
1250: #if !defined(PETSC_USE_COMPLEX)
1251: case MAX_VALUES:
1252: for (i=0; i<n; i++) {
1253: idy = *indicesy++;
1254: y[idy] = PetscMax(y[idy],x[0]);
1255: y[idy+1] = PetscMax(y[idy+1],x[1]);
1256: y[idy+2] = PetscMax(y[idy+2],x[2]);
1257: y[idy+3] = PetscMax(y[idy+3],x[3]);
1258: y[idy+4] = PetscMax(y[idy+4],x[4]);
1259: y[idy+5] = PetscMax(y[idy+5],x[5]);
1260: y[idy+6] = PetscMax(y[idy+6],x[6]);
1261: y[idy+7] = PetscMax(y[idy+7],x[7]);
1262: x += 8;
1263: }
1264: #else
1265: case MAX_VALUES:
1266: #endif
1267: case NOT_SET_VALUES:
1268: break;
1269: }
1270: }
1272: PETSC_STATIC_INLINE void Scatter_8(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
1273: {
1274: PetscInt i,idx,idy;
1276: switch (addv) {
1277: case INSERT_VALUES:
1278: case INSERT_ALL_VALUES:
1279: for (i=0; i<n; i++) {
1280: idx = *indicesx++;
1281: idy = *indicesy++;
1282: y[idy] = x[idx];
1283: y[idy+1] = x[idx+1];
1284: y[idy+2] = x[idx+2];
1285: y[idy+3] = x[idx+3];
1286: y[idy+4] = x[idx+4];
1287: y[idy+5] = x[idx+5];
1288: y[idy+6] = x[idx+6];
1289: y[idy+7] = x[idx+7];
1290: }
1291: break;
1292: case ADD_VALUES:
1293: case ADD_ALL_VALUES:
1294: for (i=0; i<n; i++) {
1295: idx = *indicesx++;
1296: idy = *indicesy++;
1297: y[idy] += x[idx];
1298: y[idy+1] += x[idx+1];
1299: y[idy+2] += x[idx+2];
1300: y[idy+3] += x[idx+3];
1301: y[idy+4] += x[idx+4];
1302: y[idy+5] += x[idx+5];
1303: y[idy+6] += x[idx+6];
1304: y[idy+7] += x[idx+7];
1305: }
1306: break;
1307: #if !defined(PETSC_USE_COMPLEX)
1308: case MAX_VALUES:
1309: for (i=0; i<n; i++) {
1310: idx = *indicesx++;
1311: idy = *indicesy++;
1312: y[idy] = PetscMax(y[idy],x[idx]);
1313: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
1314: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
1315: y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
1316: y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1317: y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
1318: y[idy+6] = PetscMax(y[idy+6],x[idx+6]);
1319: y[idy+7] = PetscMax(y[idy+7],x[idx+7]);
1320: }
1321: #else
1322: case MAX_VALUES:
1323: #endif
1324: case NOT_SET_VALUES:
1325: break;
1326: }
1327: }
1329: /* ----------------------------------------------------------------------------------------------- */
1330: PETSC_STATIC_INLINE void Pack_12(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y)
1331: {
1332: PetscInt i,idx;
1334: for (i=0; i<n; i++) {
1335: idx = *indicesx++;
1336: y[0] = x[idx];
1337: y[1] = x[idx+1];
1338: y[2] = x[idx+2];
1339: y[3] = x[idx+3];
1340: y[4] = x[idx+4];
1341: y[5] = x[idx+5];
1342: y[6] = x[idx+6];
1343: y[7] = x[idx+7];
1344: y[8] = x[idx+8];
1345: y[9] = x[idx+9];
1346: y[10] = x[idx+10];
1347: y[11] = x[idx+11];
1348: y += 12;
1349: }
1350: }
1351: PETSC_STATIC_INLINE void UnPack_12(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
1352: {
1353: PetscInt i,idy;
1355: switch (addv) {
1356: case INSERT_VALUES:
1357: case INSERT_ALL_VALUES:
1358: for (i=0; i<n; i++) {
1359: idy = *indicesy++;
1360: y[idy] = x[0];
1361: y[idy+1] = x[1];
1362: y[idy+2] = x[2];
1363: y[idy+3] = x[3];
1364: y[idy+4] = x[4];
1365: y[idy+5] = x[5];
1366: y[idy+6] = x[6];
1367: y[idy+7] = x[7];
1368: y[idy+8] = x[8];
1369: y[idy+9] = x[9];
1370: y[idy+10] = x[10];
1371: y[idy+11] = x[11];
1372: x += 12;
1373: }
1374: break;
1375: case ADD_VALUES:
1376: case ADD_ALL_VALUES:
1377: for (i=0; i<n; i++) {
1378: idy = *indicesy++;
1379: y[idy] += x[0];
1380: y[idy+1] += x[1];
1381: y[idy+2] += x[2];
1382: y[idy+3] += x[3];
1383: y[idy+4] += x[4];
1384: y[idy+5] += x[5];
1385: y[idy+6] += x[6];
1386: y[idy+7] += x[7];
1387: y[idy+8] += x[8];
1388: y[idy+9] += x[9];
1389: y[idy+10] += x[10];
1390: y[idy+11] += x[11];
1391: x += 12;
1392: }
1393: break;
1394: #if !defined(PETSC_USE_COMPLEX)
1395: case MAX_VALUES:
1396: for (i=0; i<n; i++) {
1397: idy = *indicesy++;
1398: y[idy] = PetscMax(y[idy],x[0]);
1399: y[idy+1] = PetscMax(y[idy+1],x[1]);
1400: y[idy+2] = PetscMax(y[idy+2],x[2]);
1401: y[idy+3] = PetscMax(y[idy+3],x[3]);
1402: y[idy+4] = PetscMax(y[idy+4],x[4]);
1403: y[idy+5] = PetscMax(y[idy+5],x[5]);
1404: y[idy+6] = PetscMax(y[idy+6],x[6]);
1405: y[idy+7] = PetscMax(y[idy+7],x[7]);
1406: y[idy+8] = PetscMax(y[idy+8],x[8]);
1407: y[idy+9] = PetscMax(y[idy+9],x[9]);
1408: y[idy+10] = PetscMax(y[idy+10],x[10]);
1409: y[idy+11] = PetscMax(y[idy+11],x[11]);
1410: x += 12;
1411: }
1412: #else
1413: case MAX_VALUES:
1414: #endif
1415: case NOT_SET_VALUES:
1416: break;
1417: }
1418: }
1420: PETSC_STATIC_INLINE void Scatter_12(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
1421: {
1422: PetscInt i,idx,idy;
1424: switch (addv) {
1425: case INSERT_VALUES:
1426: case INSERT_ALL_VALUES:
1427: for (i=0; i<n; i++) {
1428: idx = *indicesx++;
1429: idy = *indicesy++;
1430: y[idy] = x[idx];
1431: y[idy+1] = x[idx+1];
1432: y[idy+2] = x[idx+2];
1433: y[idy+3] = x[idx+3];
1434: y[idy+4] = x[idx+4];
1435: y[idy+5] = x[idx+5];
1436: y[idy+6] = x[idx+6];
1437: y[idy+7] = x[idx+7];
1438: y[idy+8] = x[idx+8];
1439: y[idy+9] = x[idx+9];
1440: y[idy+10] = x[idx+10];
1441: y[idy+11] = x[idx+11];
1442: }
1443: break;
1444: case ADD_VALUES:
1445: case ADD_ALL_VALUES:
1446: for (i=0; i<n; i++) {
1447: idx = *indicesx++;
1448: idy = *indicesy++;
1449: y[idy] += x[idx];
1450: y[idy+1] += x[idx+1];
1451: y[idy+2] += x[idx+2];
1452: y[idy+3] += x[idx+3];
1453: y[idy+4] += x[idx+4];
1454: y[idy+5] += x[idx+5];
1455: y[idy+6] += x[idx+6];
1456: y[idy+7] += x[idx+7];
1457: y[idy+8] += x[idx+8];
1458: y[idy+9] += x[idx+9];
1459: y[idy+10] += x[idx+10];
1460: y[idy+11] += x[idx+11];
1461: }
1462: break;
1463: #if !defined(PETSC_USE_COMPLEX)
1464: case MAX_VALUES:
1465: for (i=0; i<n; i++) {
1466: idx = *indicesx++;
1467: idy = *indicesy++;
1468: y[idy] = PetscMax(y[idy],x[idx]);
1469: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
1470: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
1471: y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
1472: y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1473: y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
1474: y[idy+6] = PetscMax(y[idy+6],x[idx+6]);
1475: y[idy+7] = PetscMax(y[idy+7],x[idx+7]);
1476: y[idy+8] = PetscMax(y[idy+8],x[idx+8]);
1477: y[idy+9] = PetscMax(y[idy+9],x[idx+9]);
1478: y[idy+10] = PetscMax(y[idy+10],x[idx+10]);
1479: y[idy+11] = PetscMax(y[idy+11],x[idx+11]);
1480: }
1481: #else
1482: case MAX_VALUES:
1483: #endif
1484: case NOT_SET_VALUES:
1485: break;
1486: }
1487: }
1489: /* Create the VecScatterBegin/End_P for our chosen block sizes */
1490: #define BS 1
1491: #include <../src/vec/vec/utils/vpscat.h>
1492: #define BS 2
1493: #include <../src/vec/vec/utils/vpscat.h>
1494: #define BS 3
1495: #include <../src/vec/vec/utils/vpscat.h>
1496: #define BS 4
1497: #include <../src/vec/vec/utils/vpscat.h>
1498: #define BS 5
1499: #include <../src/vec/vec/utils/vpscat.h>
1500: #define BS 6
1501: #include <../src/vec/vec/utils/vpscat.h>
1502: #define BS 7
1503: #include <../src/vec/vec/utils/vpscat.h>
1504: #define BS 8
1505: #include <../src/vec/vec/utils/vpscat.h>
1506: #define BS 12
1507: #include <../src/vec/vec/utils/vpscat.h>
1509: /* ==========================================================================================*/
1511: /* create parallel to sequential scatter context */
1513: PetscErrorCode VecScatterCreateCommon_PtoS(VecScatter_MPI_General *,VecScatter_MPI_General *,VecScatter);
1517: /*@
1518: VecScatterCreateLocal - Creates a VecScatter from a list of messages it must send and receive.
1520: Collective on VecScatter
1522: Input Parameters:
1523: + VecScatter - obtained with VecScatterCreateEmpty()
1524: . nsends -
1525: . sendSizes -
1526: . sendProcs -
1527: . sendIdx - indices where the sent entries are obtained from (in local, on process numbering), this is one long array of size \sum_{i=0,i<nsends} sendSizes[i]
1528: . nrecvs - number of receives to expect
1529: . recvSizes -
1530: . recvProcs - processes who are sending to me
1531: . recvIdx - indices of where received entries are to be put, (in local, on process numbering), this is one long array of size \sum_{i=0,i<nrecvs} recvSizes[i]
1532: - bs - size of block
1534: Notes: sendSizes[] and recvSizes[] cannot have any 0 entries. If you want to support having 0 entries you need
1535: to change the code below to "compress out" the sendProcs[] and recvProcs[] entries that have 0 entries.
1537: Probably does not handle sends to self properly. It should remove those from the counts that are used
1538: in allocating space inside of the from struct
1540: Level: intermediate
1542: @*/
1543: PetscErrorCode VecScatterCreateLocal(VecScatter ctx,PetscInt nsends,const PetscInt sendSizes[],const PetscInt sendProcs[],const PetscInt sendIdx[],PetscInt nrecvs,const PetscInt recvSizes[],const PetscInt recvProcs[],const PetscInt recvIdx[],PetscInt bs)
1544: {
1545: VecScatter_MPI_General *from, *to;
1546: PetscInt sendSize, recvSize;
1547: PetscInt n, i;
1548: PetscErrorCode ierr;
1550: /* allocate entire send scatter context */
1551: PetscNewLog(ctx,VecScatter_MPI_General,&to);
1552: to->n = nsends;
1553: for(n = 0, sendSize = 0; n < to->n; n++) {sendSize += sendSizes[n];}
1554: PetscMalloc(to->n*sizeof(MPI_Request),&to->requests);
1555: PetscMalloc4(bs*sendSize,PetscScalar,&to->values,sendSize,PetscInt,&to->indices,to->n+1,PetscInt,&to->starts,to->n,PetscMPIInt,&to->procs);
1556: PetscMalloc2(PetscMax(to->n,nrecvs),MPI_Status,&to->sstatus,PetscMax(to->n,nrecvs),MPI_Status,&to->rstatus);
1557: to->starts[0] = 0;
1558: for(n = 0; n < to->n; n++) {
1559: if (sendSizes[n] <=0 ) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"sendSizes[n=%D] = %D cannot be less than 1",n,sendSizes[n]);
1560: to->starts[n+1] = to->starts[n] + sendSizes[n];
1561: to->procs[n] = sendProcs[n];
1562: for(i = to->starts[n]; i < to->starts[n]+sendSizes[n]; i++) {
1563: to->indices[i] = sendIdx[i];
1564: }
1565: }
1566: ctx->todata = (void *) to;
1568: /* allocate entire receive scatter context */
1569: PetscNewLog(ctx,VecScatter_MPI_General,&from);
1570: from->n = nrecvs;
1571: for(n = 0, recvSize = 0; n < from->n; n++) {recvSize += recvSizes[n];}
1572: PetscMalloc(from->n*sizeof(MPI_Request),&from->requests);
1573: PetscMalloc4(bs*recvSize,PetscScalar,&from->values,recvSize,PetscInt,&from->indices,from->n+1,PetscInt,&from->starts,from->n,PetscMPIInt,&from->procs);
1574: from->starts[0] = 0;
1575: for(n = 0; n < from->n; n++) {
1576: if (recvSizes[n] <=0 ) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"recvSizes[n=%D] = %D cannot be less than 1",n,recvSizes[n]);
1577: from->starts[n+1] = from->starts[n] + recvSizes[n];
1578: from->procs[n] = recvProcs[n];
1579: for(i = from->starts[n]; i < from->starts[n]+recvSizes[n]; i++) {
1580: from->indices[i] = recvIdx[i];
1581: }
1582: }
1583: ctx->fromdata = (void *)from;
1585: /* No local scatter optimization */
1586: from->local.n = 0;
1587: from->local.vslots = 0;
1588: to->local.n = 0;
1589: to->local.vslots = 0;
1590: from->local.nonmatching_computed = PETSC_FALSE;
1591: from->local.n_nonmatching = 0;
1592: from->local.slots_nonmatching = 0;
1593: to->local.nonmatching_computed = PETSC_FALSE;
1594: to->local.n_nonmatching = 0;
1595: to->local.slots_nonmatching = 0;
1597: from->type = VEC_SCATTER_MPI_GENERAL;
1598: to->type = VEC_SCATTER_MPI_GENERAL;
1599: from->bs = bs;
1600: to->bs = bs;
1601: VecScatterCreateCommon_PtoS(from, to, ctx);
1603: /* mark lengths as negative so it won't check local vector lengths */
1604: ctx->from_n = ctx->to_n = -1;
1605: return(0);
1606: }
1608: /*
1609: bs indicates how many elements there are in each block. Normally this would be 1.
1611: contains check that PetscMPIInt can handle the sizes needed
1612: */
1615: PetscErrorCode VecScatterCreate_PtoS(PetscInt nx,const PetscInt *inidx,PetscInt ny,const PetscInt *inidy,Vec xin,Vec yin,PetscInt bs,VecScatter ctx)
1616: {
1617: VecScatter_MPI_General *from,*to;
1618: PetscMPIInt size,rank,imdex,tag,n;
1619: PetscInt *source = PETSC_NULL,*owners = PETSC_NULL;
1620: PetscInt *lowner = PETSC_NULL,*start = PETSC_NULL,lengthy,lengthx;
1621: PetscMPIInt *nprocs = PETSC_NULL,nrecvs;
1622: PetscInt i,j,idx,nsends;
1623: PetscInt *owner = PETSC_NULL,*starts = PETSC_NULL,count,slen;
1624: PetscInt *rvalues,*svalues,base,*values,nprocslocal,recvtotal,*rsvalues;
1625: PetscMPIInt *onodes1,*olengths1;
1626: MPI_Comm comm;
1627: MPI_Request *send_waits = PETSC_NULL,*recv_waits = PETSC_NULL;
1628: MPI_Status recv_status,*send_status;
1629: PetscErrorCode ierr;
1632: PetscObjectGetNewTag((PetscObject)ctx,&tag);
1633: PetscObjectGetComm((PetscObject)xin,&comm);
1634: MPI_Comm_rank(comm,&rank);
1635: MPI_Comm_size(comm,&size);
1636: owners = xin->map->range;
1637: VecGetSize(yin,&lengthy);
1638: VecGetSize(xin,&lengthx);
1640: /* first count number of contributors to each processor */
1641: PetscMalloc2(size,PetscMPIInt,&nprocs,nx,PetscInt,&owner);
1642: PetscMemzero(nprocs,size*sizeof(PetscMPIInt));
1644: j = 0;
1645: nsends = 0;
1646: for (i=0; i<nx; i++) {
1647: idx = bs*inidx[i];
1648: if (idx < owners[j]) j = 0;
1649: for (; j<size; j++) {
1650: if (idx < owners[j+1]) {
1651: if (!nprocs[j]++) nsends++;
1652: owner[i] = j;
1653: break;
1654: }
1655: }
1656: if (j == size) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"ith %D block entry %D not owned by any process, upper bound %D",i,idx,owners[size]);
1657: }
1659: nprocslocal = nprocs[rank];
1660: nprocs[rank] = 0;
1661: if (nprocslocal) nsends--;
1662: /* inform other processors of number of messages and max length*/
1663: PetscGatherNumberOfMessages(comm,PETSC_NULL,nprocs,&nrecvs);
1664: PetscGatherMessageLengths(comm,nsends,nrecvs,nprocs,&onodes1,&olengths1);
1665: PetscSortMPIIntWithArray(nrecvs,onodes1,olengths1);
1666: recvtotal = 0; for (i=0; i<nrecvs; i++) recvtotal += olengths1[i];
1668: /* post receives: */
1669: PetscMalloc3(recvtotal,PetscInt,&rvalues,nrecvs,PetscInt,&source,nrecvs,MPI_Request,&recv_waits);
1670: count = 0;
1671: for (i=0; i<nrecvs; i++) {
1672: MPI_Irecv((rvalues+count),olengths1[i],MPIU_INT,onodes1[i],tag,comm,recv_waits+i);
1673: count += olengths1[i];
1674: }
1676: /* do sends:
1677: 1) starts[i] gives the starting index in svalues for stuff going to
1678: the ith processor
1679: */
1680: PetscMalloc3(nx,PetscInt,&svalues,nsends,MPI_Request,&send_waits,size+1,PetscInt,&starts);
1682: starts[0] = 0;
1683: for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
1684: for (i=0; i<nx; i++) {
1685: if (owner[i] != rank) {
1686: svalues[starts[owner[i]]++] = bs*inidx[i];
1687: }
1688: }
1689: starts[0] = 0;
1690: for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
1691: count = 0;
1692: for (i=0; i<size; i++) {
1693: if (nprocs[i]) {
1694: MPI_Isend(svalues+starts[i],nprocs[i],MPIU_INT,i,tag,comm,send_waits+count++);
1695: }
1696: }
1698: /* wait on receives */
1699: count = nrecvs;
1700: slen = 0;
1701: while (count) {
1702: MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
1703: /* unpack receives into our local space */
1704: MPI_Get_count(&recv_status,MPIU_INT,&n);
1705: slen += n;
1706: count--;
1707: }
1709: if (slen != recvtotal) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Total message lengths %D not expected %D",slen,recvtotal);
1710:
1711: /* allocate entire send scatter context */
1712: PetscNewLog(ctx,VecScatter_MPI_General,&to);
1713: to->n = nrecvs;
1714: PetscMalloc(nrecvs*sizeof(MPI_Request),&to->requests);
1715: PetscMalloc4(bs*slen,PetscScalar,&to->values,slen,PetscInt,&to->indices,nrecvs+1,PetscInt,&to->starts,nrecvs,PetscMPIInt,&to->procs);
1716: PetscMalloc2(PetscMax(to->n,nsends),MPI_Status,&to->sstatus,PetscMax(to->n,nsends),MPI_Status,&to->rstatus);
1717: ctx->todata = (void*)to;
1718: to->starts[0] = 0;
1720: if (nrecvs) {
1722: /* move the data into the send scatter */
1723: base = owners[rank];
1724: rsvalues = rvalues;
1725: for (i=0; i<nrecvs; i++) {
1726: to->starts[i+1] = to->starts[i] + olengths1[i];
1727: to->procs[i] = onodes1[i];
1728: values = rsvalues;
1729: rsvalues += olengths1[i];
1730: for (j=0; j<olengths1[i]; j++) {
1731: to->indices[to->starts[i] + j] = values[j] - base;
1732: }
1733: }
1734: }
1735: PetscFree(olengths1);
1736: PetscFree(onodes1);
1737: PetscFree3(rvalues,source,recv_waits);
1739: /* allocate entire receive scatter context */
1740: PetscNewLog(ctx,VecScatter_MPI_General,&from);
1741: from->n = nsends;
1743: PetscMalloc(nsends*sizeof(MPI_Request),&from->requests);
1744: PetscMalloc4((ny-nprocslocal)*bs,PetscScalar,&from->values,ny-nprocslocal,PetscInt,&from->indices,nsends+1,PetscInt,&from->starts,from->n,PetscMPIInt,&from->procs);
1745: ctx->fromdata = (void*)from;
1747: /* move data into receive scatter */
1748: PetscMalloc2(size,PetscInt,&lowner,nsends+1,PetscInt,&start);
1749: count = 0; from->starts[0] = start[0] = 0;
1750: for (i=0; i<size; i++) {
1751: if (nprocs[i]) {
1752: lowner[i] = count;
1753: from->procs[count++] = i;
1754: from->starts[count] = start[count] = start[count-1] + nprocs[i];
1755: }
1756: }
1758: for (i=0; i<nx; i++) {
1759: if (owner[i] != rank) {
1760: from->indices[start[lowner[owner[i]]]++] = bs*inidy[i];
1761: if (bs*inidy[i] >= lengthy) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Scattering past end of TO vector");
1762: }
1763: }
1764: PetscFree2(lowner,start);
1765: PetscFree2(nprocs,owner);
1766:
1767: /* wait on sends */
1768: if (nsends) {
1769: PetscMalloc(nsends*sizeof(MPI_Status),&send_status);
1770: MPI_Waitall(nsends,send_waits,send_status);
1771: PetscFree(send_status);
1772: }
1773: PetscFree3(svalues,send_waits,starts);
1775: if (nprocslocal) {
1776: PetscInt nt = from->local.n = to->local.n = nprocslocal;
1777: /* we have a scatter to ourselves */
1778: PetscMalloc(nt*sizeof(PetscInt),&to->local.vslots);
1779: PetscMalloc(nt*sizeof(PetscInt),&from->local.vslots);
1780: nt = 0;
1781: for (i=0; i<nx; i++) {
1782: idx = bs*inidx[i];
1783: if (idx >= owners[rank] && idx < owners[rank+1]) {
1784: to->local.vslots[nt] = idx - owners[rank];
1785: from->local.vslots[nt++] = bs*inidy[i];
1786: if (bs*inidy[i] >= lengthy) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Scattering past end of TO vector");
1787: }
1788: }
1789: } else {
1790: from->local.n = 0;
1791: from->local.vslots = 0;
1792: to->local.n = 0;
1793: to->local.vslots = 0;
1794: }
1796: from->local.nonmatching_computed = PETSC_FALSE;
1797: from->local.n_nonmatching = 0;
1798: from->local.slots_nonmatching = 0;
1799: to->local.nonmatching_computed = PETSC_FALSE;
1800: to->local.n_nonmatching = 0;
1801: to->local.slots_nonmatching = 0;
1803: from->type = VEC_SCATTER_MPI_GENERAL;
1804: to->type = VEC_SCATTER_MPI_GENERAL;
1805: from->bs = bs;
1806: to->bs = bs;
1807: VecScatterCreateCommon_PtoS(from,to,ctx);
1808: return(0);
1809: }
1811: /*
1812: bs indicates how many elements there are in each block. Normally this would be 1.
1813: */
1816: PetscErrorCode VecScatterCreateCommon_PtoS(VecScatter_MPI_General *from,VecScatter_MPI_General *to,VecScatter ctx)
1817: {
1818: MPI_Comm comm = ((PetscObject)ctx)->comm;
1819: PetscMPIInt tag = ((PetscObject)ctx)->tag, tagr;
1820: PetscInt bs = to->bs;
1821: PetscMPIInt size;
1822: PetscInt i, n;
1824:
1826: PetscObjectGetNewTag((PetscObject)ctx,&tagr);
1827: ctx->destroy = VecScatterDestroy_PtoP;
1829: ctx->reproduce = PETSC_FALSE;
1830: to->sendfirst = PETSC_FALSE;
1831: PetscOptionsGetBool(PETSC_NULL,"-vecscatter_reproduce",&ctx->reproduce,PETSC_NULL);
1832: PetscOptionsGetBool(PETSC_NULL,"-vecscatter_sendfirst",&to->sendfirst,PETSC_NULL);
1833: from->sendfirst = to->sendfirst;
1835: MPI_Comm_size(comm,&size);
1836: /* check if the receives are ALL going into contiguous locations; if so can skip indexing */
1837: to->contiq = PETSC_FALSE;
1838: n = from->starts[from->n];
1839: from->contiq = PETSC_TRUE;
1840: for (i=1; i<n; i++) {
1841: if (from->indices[i] != from->indices[i-1] + bs) {
1842: from->contiq = PETSC_FALSE;
1843: break;
1844: }
1845: }
1847: to->use_alltoallv = PETSC_FALSE;
1848: PetscOptionsGetBool(PETSC_NULL,"-vecscatter_alltoall",&to->use_alltoallv,PETSC_NULL);
1849: from->use_alltoallv = to->use_alltoallv;
1850: if (from->use_alltoallv) PetscInfo(ctx,"Using MPI_Alltoallv() for scatter\n");
1851: #if defined(PETSC_HAVE_MPI_ALLTOALLW) && !defined(PETSC_USE_64BIT_INDICES)
1852: if (to->use_alltoallv) {
1853: to->use_alltoallw = PETSC_FALSE;
1854: PetscOptionsGetBool(PETSC_NULL,"-vecscatter_nopack",&to->use_alltoallw,PETSC_NULL);
1855: }
1856: from->use_alltoallw = to->use_alltoallw;
1857: if (from->use_alltoallw) PetscInfo(ctx,"Using MPI_Alltoallw() for scatter\n");
1858: #endif
1860: #if defined(PETSC_HAVE_MPI_WIN_CREATE)
1861: to->use_window = PETSC_FALSE;
1862: PetscOptionsGetBool(PETSC_NULL,"-vecscatter_window",&to->use_window,PETSC_NULL);
1863: from->use_window = to->use_window;
1864: #endif
1866: if (to->use_alltoallv) {
1868: PetscMalloc2(size,PetscMPIInt,&to->counts,size,PetscMPIInt,&to->displs);
1869: PetscMemzero(to->counts,size*sizeof(PetscMPIInt));
1870: for (i=0; i<to->n; i++) {
1871: to->counts[to->procs[i]] = bs*(to->starts[i+1] - to->starts[i]);
1872: }
1873: to->displs[0] = 0;
1874: for (i=1; i<size; i++) {
1875: to->displs[i] = to->displs[i-1] + to->counts[i-1];
1876: }
1878: PetscMalloc2(size,PetscMPIInt,&from->counts,size,PetscMPIInt,&from->displs);
1879: PetscMemzero(from->counts,size*sizeof(PetscMPIInt));
1880: for (i=0; i<from->n; i++) {
1881: from->counts[from->procs[i]] = bs*(from->starts[i+1] - from->starts[i]);
1882: }
1883: from->displs[0] = 0;
1884: for (i=1; i<size; i++) {
1885: from->displs[i] = from->displs[i-1] + from->counts[i-1];
1886: }
1887: #if defined(PETSC_HAVE_MPI_ALLTOALLW) && !defined(PETSC_USE_64BIT_INDICES)
1888: if (to->use_alltoallw) {
1889: PetscMPIInt mpibs = PetscMPIIntCast(bs), mpilen;
1890: ctx->packtogether = PETSC_FALSE;
1891: PetscMalloc3(size,PetscMPIInt,&to->wcounts,size,PetscMPIInt,&to->wdispls,size,MPI_Datatype,&to->types);
1892: PetscMemzero(to->wcounts,size*sizeof(PetscMPIInt));
1893: PetscMemzero(to->wdispls,size*sizeof(PetscMPIInt));
1894: for (i=0; i<size; i++) {
1895: to->types[i] = MPIU_SCALAR;
1896: }
1898: for (i=0; i<to->n; i++) {
1899: to->wcounts[to->procs[i]] = 1;
1900: mpilen = PetscMPIIntCast(to->starts[i+1]-to->starts[i]);
1901: MPI_Type_create_indexed_block(mpilen,mpibs,to->indices+to->starts[i],MPIU_SCALAR,to->types+to->procs[i]);
1902: MPI_Type_commit(to->types+to->procs[i]);
1903: }
1904: PetscMalloc3(size,PetscMPIInt,&from->wcounts,size,PetscMPIInt,&from->wdispls,size,MPI_Datatype,&from->types);
1905: PetscMemzero(from->wcounts,size*sizeof(PetscMPIInt));
1906: PetscMemzero(from->wdispls,size*sizeof(PetscMPIInt));
1907: for (i=0; i<size; i++) {
1908: from->types[i] = MPIU_SCALAR;
1909: }
1910: if (from->contiq) {
1911: PetscInfo(ctx,"Scattered vector entries are stored contiquously, taking advantage of this with -vecscatter_alltoall\n");
1912: for (i=0; i<from->n; i++) {
1913: from->wcounts[from->procs[i]] = bs*(from->starts[i+1] - from->starts[i]);
1914: }
1915: if (from->n) from->wdispls[from->procs[0]] = sizeof(PetscScalar)*from->indices[0];
1916: for (i=1; i<from->n; i++) {
1917: from->wdispls[from->procs[i]] = from->wdispls[from->procs[i-1]] + sizeof(PetscScalar)*from->wcounts[from->procs[i-1]];
1918: }
1919: } else {
1920: for (i=0; i<from->n; i++) {
1921: from->wcounts[from->procs[i]] = 1;
1922: mpilen = PetscMPIIntCast(from->starts[i+1]-from->starts[i]);
1923: MPI_Type_create_indexed_block(mpilen,mpibs,from->indices+from->starts[i],MPIU_SCALAR,from->types+from->procs[i]);
1924: MPI_Type_commit(from->types+from->procs[i]);
1925: }
1926: }
1927: } else {
1928: ctx->copy = VecScatterCopy_PtoP_AllToAll;
1929: }
1930: #else
1931: to->use_alltoallw = PETSC_FALSE;
1932: from->use_alltoallw = PETSC_FALSE;
1933: ctx->copy = VecScatterCopy_PtoP_AllToAll;
1934: #endif
1935: #if defined(PETSC_HAVE_MPI_WIN_CREATE)
1936: } else if (to->use_window) {
1937: PetscMPIInt temptag,winsize;
1938: MPI_Request *request;
1939: MPI_Status *status;
1940:
1941: PetscObjectGetNewTag((PetscObject)ctx,&temptag);
1942: winsize = (to->n ? to->starts[to->n] : 0)*bs*sizeof(PetscScalar);
1943: MPI_Win_create(to->values ? to->values : MPI_BOTTOM,winsize,sizeof(PetscScalar),MPI_INFO_NULL,comm,&to->window);
1944: PetscMalloc(to->n*sizeof(PetscInt),&to->winstarts);
1945: PetscMalloc2(to->n,MPI_Request,&request,to->n,MPI_Status,&status);
1946: for (i=0; i<to->n; i++) {
1947: MPI_Irecv(to->winstarts+i,1,MPIU_INT,to->procs[i],temptag,comm,request+i);
1948: }
1949: for (i=0; i<from->n; i++) {
1950: MPI_Send(from->starts+i,1,MPIU_INT,from->procs[i],temptag,comm);
1951: }
1952: MPI_Waitall(to->n,request,status);
1953: PetscFree2(request,status);
1955: winsize = (from->n ? from->starts[from->n] : 0)*bs*sizeof(PetscScalar);
1956: MPI_Win_create(from->values ? from->values : MPI_BOTTOM,winsize,sizeof(PetscScalar),MPI_INFO_NULL,comm,&from->window);
1957: PetscMalloc(from->n*sizeof(PetscInt),&from->winstarts);
1958: PetscMalloc2(from->n,MPI_Request,&request,from->n,MPI_Status,&status);
1959: for (i=0; i<from->n; i++) {
1960: MPI_Irecv(from->winstarts+i,1,MPIU_INT,from->procs[i],temptag,comm,request+i);
1961: }
1962: for (i=0; i<to->n; i++) {
1963: MPI_Send(to->starts+i,1,MPIU_INT,to->procs[i],temptag,comm);
1964: }
1965: MPI_Waitall(from->n,request,status);
1966: PetscFree2(request,status);
1967: #endif
1968: } else {
1969: PetscBool use_rsend = PETSC_FALSE, use_ssend = PETSC_FALSE;
1970: PetscInt *sstarts = to->starts, *rstarts = from->starts;
1971: PetscMPIInt *sprocs = to->procs, *rprocs = from->procs;
1972: MPI_Request *swaits = to->requests,*rwaits = from->requests;
1973: MPI_Request *rev_swaits,*rev_rwaits;
1974: PetscScalar *Ssvalues = to->values, *Srvalues = from->values;
1976: /* allocate additional wait variables for the "reverse" scatter */
1977: PetscMalloc(to->n*sizeof(MPI_Request),&rev_rwaits);
1978: PetscMalloc(from->n*sizeof(MPI_Request),&rev_swaits);
1979: to->rev_requests = rev_rwaits;
1980: from->rev_requests = rev_swaits;
1982: /* Register the receives that you will use later (sends for scatter reverse) */
1983: PetscOptionsGetBool(PETSC_NULL,"-vecscatter_rsend",&use_rsend,PETSC_NULL);
1984: PetscOptionsGetBool(PETSC_NULL,"-vecscatter_ssend",&use_ssend,PETSC_NULL);
1985: if (use_rsend) {
1986: PetscInfo(ctx,"Using VecScatter ready receiver mode\n");
1987: to->use_readyreceiver = PETSC_TRUE;
1988: from->use_readyreceiver = PETSC_TRUE;
1989: } else {
1990: to->use_readyreceiver = PETSC_FALSE;
1991: from->use_readyreceiver = PETSC_FALSE;
1992: }
1993: if (use_ssend) {
1994: PetscInfo(ctx,"Using VecScatter Ssend mode\n");
1995: }
1997: for (i=0; i<from->n; i++) {
1998: if (use_rsend) {
1999: MPI_Rsend_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tagr,comm,rev_swaits+i);
2000: } else if (use_ssend) {
2001: MPI_Ssend_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tagr,comm,rev_swaits+i);
2002: } else {
2003: MPI_Send_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tagr,comm,rev_swaits+i);
2004: }
2005: }
2007: for (i=0; i<to->n; i++) {
2008: if (use_rsend) {
2009: MPI_Rsend_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
2010: } else if (use_ssend) {
2011: MPI_Ssend_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
2012: } else {
2013: MPI_Send_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
2014: }
2015: }
2016: /* Register receives for scatter and reverse */
2017: for (i=0; i<from->n; i++) {
2018: MPI_Recv_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);
2019: }
2020: for (i=0; i<to->n; i++) {
2021: MPI_Recv_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tagr,comm,rev_rwaits+i);
2022: }
2023: if (use_rsend) {
2024: if (to->n) {MPI_Startall_irecv(to->starts[to->n]*to->bs,to->n,to->rev_requests);}
2025: if (from->n) {MPI_Startall_irecv(from->starts[from->n]*from->bs,from->n,from->requests);}
2026: MPI_Barrier(comm);
2027: }
2029: ctx->copy = VecScatterCopy_PtoP_X;
2030: }
2031: PetscInfo1(ctx,"Using blocksize %D scatter\n",bs);
2032:
2033: #if defined(PETSC_USE_DEBUG)
2034: MPI_Allreduce(&bs,&i,1,MPIU_INT,MPI_MIN,((PetscObject)ctx)->comm);
2035: MPI_Allreduce(&bs,&n,1,MPIU_INT,MPI_MAX,((PetscObject)ctx)->comm);
2036: if(bs!=i || bs!=n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Blocks size %D != %D or %D",bs,i,n);
2037: #endif
2039: switch (bs) {
2040: case 12:
2041: ctx->begin = VecScatterBegin_12;
2042: ctx->end = VecScatterEnd_12;
2043: break;
2044: case 8:
2045: ctx->begin = VecScatterBegin_8;
2046: ctx->end = VecScatterEnd_8;
2047: break;
2048: case 7:
2049: ctx->begin = VecScatterBegin_7;
2050: ctx->end = VecScatterEnd_7;
2051: break;
2052: case 6:
2053: ctx->begin = VecScatterBegin_6;
2054: ctx->end = VecScatterEnd_6;
2055: break;
2056: case 5:
2057: ctx->begin = VecScatterBegin_5;
2058: ctx->end = VecScatterEnd_5;
2059: break;
2060: case 4:
2061: ctx->begin = VecScatterBegin_4;
2062: ctx->end = VecScatterEnd_4;
2063: break;
2064: case 3:
2065: ctx->begin = VecScatterBegin_3;
2066: ctx->end = VecScatterEnd_3;
2067: break;
2068: case 2:
2069: ctx->begin = VecScatterBegin_2;
2070: ctx->end = VecScatterEnd_2;
2071: break;
2072: case 1:
2073: ctx->begin = VecScatterBegin_1;
2074: ctx->end = VecScatterEnd_1;
2075: break;
2076: default:
2077: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Blocksize not supported");
2078: }
2079: ctx->view = VecScatterView_MPI;
2080: /* Check if the local scatter is actually a copy; important special case */
2081: if (to->local.n) {
2082: VecScatterLocalOptimizeCopy_Private(ctx,&to->local,&from->local,bs);
2083: }
2084: return(0);
2085: }
2089: /* ------------------------------------------------------------------------------------*/
2090: /*
2091: Scatter from local Seq vectors to a parallel vector.
2092: Reverses the order of the arguments, calls VecScatterCreate_PtoS() then
2093: reverses the result.
2094: */
2097: PetscErrorCode VecScatterCreate_StoP(PetscInt nx,const PetscInt *inidx,PetscInt ny,const PetscInt *inidy,Vec xin,Vec yin,PetscInt bs,VecScatter ctx)
2098: {
2099: PetscErrorCode ierr;
2100: MPI_Request *waits;
2101: VecScatter_MPI_General *to,*from;
2104: VecScatterCreate_PtoS(ny,inidy,nx,inidx,yin,xin,bs,ctx);
2105: to = (VecScatter_MPI_General*)ctx->fromdata;
2106: from = (VecScatter_MPI_General*)ctx->todata;
2107: ctx->todata = (void*)to;
2108: ctx->fromdata = (void*)from;
2109: /* these two are special, they are ALWAYS stored in to struct */
2110: to->sstatus = from->sstatus;
2111: to->rstatus = from->rstatus;
2113: from->sstatus = 0;
2114: from->rstatus = 0;
2116: waits = from->rev_requests;
2117: from->rev_requests = from->requests;
2118: from->requests = waits;
2119: waits = to->rev_requests;
2120: to->rev_requests = to->requests;
2121: to->requests = waits;
2122: return(0);
2123: }
2125: /* ---------------------------------------------------------------------------------*/
2128: PetscErrorCode VecScatterCreate_PtoP(PetscInt nx,const PetscInt *inidx,PetscInt ny,const PetscInt *inidy,Vec xin,Vec yin,PetscInt bs,VecScatter ctx)
2129: {
2131: PetscMPIInt size,rank,tag,imdex,n;
2132: PetscInt *owners = xin->map->range;
2133: PetscMPIInt *nprocs = PETSC_NULL;
2134: PetscInt i,j,idx,nsends,*local_inidx = PETSC_NULL,*local_inidy = PETSC_NULL;
2135: PetscInt *owner = PETSC_NULL,*starts = PETSC_NULL,count,slen;
2136: PetscInt *rvalues = PETSC_NULL,*svalues = PETSC_NULL,base,*values = PETSC_NULL,*rsvalues,recvtotal,lastidx;
2137: PetscMPIInt *onodes1,*olengths1,nrecvs;
2138: MPI_Comm comm;
2139: MPI_Request *send_waits = PETSC_NULL,*recv_waits = PETSC_NULL;
2140: MPI_Status recv_status,*send_status = PETSC_NULL;
2141: PetscBool duplicate = PETSC_FALSE;
2142: #if defined(PETSC_USE_DEBUG)
2143: PetscBool found = PETSC_FALSE;
2144: #endif
2147: PetscObjectGetNewTag((PetscObject)ctx,&tag);
2148: PetscObjectGetComm((PetscObject)xin,&comm);
2149: MPI_Comm_size(comm,&size);
2150: MPI_Comm_rank(comm,&rank);
2151: if (size == 1) {
2152: VecScatterCreate_StoP(nx,inidx,ny,inidy,xin,yin,bs,ctx);
2153: return(0);
2154: }
2156: /*
2157: Each processor ships off its inidx[j] and inidy[j] to the appropriate processor
2158: They then call the StoPScatterCreate()
2159: */
2160: /* first count number of contributors to each processor */
2161: PetscMalloc3(size,PetscMPIInt,&nprocs,nx,PetscInt,&owner,(size+1),PetscInt,&starts);
2162: PetscMemzero(nprocs,size*sizeof(PetscMPIInt));
2163: lastidx = -1;
2164: j = 0;
2165: for (i=0; i<nx; i++) {
2166: /* if indices are NOT locally sorted, need to start search at the beginning */
2167: if (lastidx > (idx = bs*inidx[i])) j = 0;
2168: lastidx = idx;
2169: for (; j<size; j++) {
2170: if (idx >= owners[j] && idx < owners[j+1]) {
2171: nprocs[j]++;
2172: owner[i] = j;
2173: #if defined(PETSC_USE_DEBUG)
2174: found = PETSC_TRUE;
2175: #endif
2176: break;
2177: }
2178: }
2179: #if defined(PETSC_USE_DEBUG)
2180: if (!found) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index %D out of range",idx);
2181: found = PETSC_FALSE;
2182: #endif
2183: }
2184: nsends = 0; for (i=0; i<size; i++) { nsends += (nprocs[i] > 0);}
2186: /* inform other processors of number of messages and max length*/
2187: PetscGatherNumberOfMessages(comm,PETSC_NULL,nprocs,&nrecvs);
2188: PetscGatherMessageLengths(comm,nsends,nrecvs,nprocs,&onodes1,&olengths1);
2189: PetscSortMPIIntWithArray(nrecvs,onodes1,olengths1);
2190: recvtotal = 0; for (i=0; i<nrecvs; i++) recvtotal += olengths1[i];
2192: /* post receives: */
2193: PetscMalloc5(2*recvtotal,PetscInt,&rvalues,2*nx,PetscInt,&svalues,nrecvs,MPI_Request,&recv_waits,nsends,MPI_Request,&send_waits,nsends,MPI_Status,&send_status);
2195: count = 0;
2196: for (i=0; i<nrecvs; i++) {
2197: MPI_Irecv((rvalues+2*count),2*olengths1[i],MPIU_INT,onodes1[i],tag,comm,recv_waits+i);
2198: count += olengths1[i];
2199: }
2200: PetscFree(onodes1);
2202: /* do sends:
2203: 1) starts[i] gives the starting index in svalues for stuff going to
2204: the ith processor
2205: */
2206: starts[0]= 0;
2207: for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
2208: for (i=0; i<nx; i++) {
2209: svalues[2*starts[owner[i]]] = bs*inidx[i];
2210: svalues[1 + 2*starts[owner[i]]++] = bs*inidy[i];
2211: }
2213: starts[0] = 0;
2214: for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
2215: count = 0;
2216: for (i=0; i<size; i++) {
2217: if (nprocs[i]) {
2218: MPI_Isend(svalues+2*starts[i],2*nprocs[i],MPIU_INT,i,tag,comm,send_waits+count);
2219: count++;
2220: }
2221: }
2222: PetscFree3(nprocs,owner,starts);
2224: /* wait on receives */
2225: count = nrecvs;
2226: slen = 0;
2227: while (count) {
2228: MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
2229: /* unpack receives into our local space */
2230: MPI_Get_count(&recv_status,MPIU_INT,&n);
2231: slen += n/2;
2232: count--;
2233: }
2234: if (slen != recvtotal) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Total message lengths %D not as expected %D",slen,recvtotal);
2235:
2236: PetscMalloc2(slen,PetscInt,&local_inidx,slen,PetscInt,&local_inidy);
2237: base = owners[rank];
2238: count = 0;
2239: rsvalues = rvalues;
2240: for (i=0; i<nrecvs; i++) {
2241: values = rsvalues;
2242: rsvalues += 2*olengths1[i];
2243: for (j=0; j<olengths1[i]; j++) {
2244: local_inidx[count] = values[2*j] - base;
2245: local_inidy[count++] = values[2*j+1];
2246: }
2247: }
2248: PetscFree(olengths1);
2250: /* wait on sends */
2251: if (nsends) {MPI_Waitall(nsends,send_waits,send_status);}
2252: PetscFree5(rvalues,svalues,recv_waits,send_waits,send_status);
2254: /*
2255: should sort and remove duplicates from local_inidx,local_inidy
2256: */
2258: #if defined(do_it_slow)
2259: /* sort on the from index */
2260: PetscSortIntWithArray(slen,local_inidx,local_inidy);
2261: start = 0;
2262: while (start < slen) {
2263: count = start+1;
2264: last = local_inidx[start];
2265: while (count < slen && last == local_inidx[count]) count++;
2266: if (count > start + 1) { /* found 2 or more same local_inidx[] in a row */
2267: /* sort on to index */
2268: PetscSortInt(count-start,local_inidy+start);
2269: }
2270: /* remove duplicates; not most efficient way, but probably good enough */
2271: i = start;
2272: while (i < count-1) {
2273: if (local_inidy[i] != local_inidy[i+1]) {
2274: i++;
2275: } else { /* found a duplicate */
2276: duplicate = PETSC_TRUE;
2277: for (j=i; j<slen-1; j++) {
2278: local_inidx[j] = local_inidx[j+1];
2279: local_inidy[j] = local_inidy[j+1];
2280: }
2281: slen--;
2282: count--;
2283: }
2284: }
2285: start = count;
2286: }
2287: #endif
2288: if (duplicate) {
2289: PetscInfo(ctx,"Duplicate from to indices passed in VecScatterCreate(), they are ignored\n");
2290: }
2291: VecScatterCreate_StoP(slen,local_inidx,slen,local_inidy,xin,yin,bs,ctx);
2292: PetscFree2(local_inidx,local_inidy);
2293: return(0);
2294: }