Actual source code: vpscat.c
petsc-3.4.5 2014-06-29
2: /*
3: Defines parallel vector scatters.
4: */
6: #include <../src/vec/vec/impls/dvecimpl.h> /*I "petscvec.h" I*/
7: #include <../src/vec/vec/impls/mpi/pvecimpl.h>
11: PetscErrorCode VecScatterView_MPI(VecScatter ctx,PetscViewer viewer)
12: {
13: VecScatter_MPI_General *to =(VecScatter_MPI_General*)ctx->todata;
14: VecScatter_MPI_General *from=(VecScatter_MPI_General*)ctx->fromdata;
15: PetscErrorCode ierr;
16: PetscInt i;
17: PetscMPIInt rank;
18: PetscViewerFormat format;
19: PetscBool iascii;
22: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
23: if (iascii) {
24: MPI_Comm_rank(PetscObjectComm((PetscObject)ctx),&rank);
25: PetscViewerGetFormat(viewer,&format);
26: if (format == PETSC_VIEWER_ASCII_INFO) {
27: PetscInt nsend_max,nrecv_max,lensend_max,lenrecv_max,alldata,itmp;
29: MPI_Reduce(&to->n,&nsend_max,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)ctx));
30: MPI_Reduce(&from->n,&nrecv_max,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)ctx));
31: itmp = to->starts[to->n+1];
32: MPI_Reduce(&itmp,&lensend_max,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)ctx));
33: itmp = from->starts[from->n+1];
34: MPI_Reduce(&itmp,&lenrecv_max,1,MPIU_INT,MPI_MAX,0,PetscObjectComm((PetscObject)ctx));
35: MPI_Reduce(&itmp,&alldata,1,MPIU_INT,MPI_SUM,0,PetscObjectComm((PetscObject)ctx));
37: PetscViewerASCIIPrintf(viewer,"VecScatter statistics\n");
38: PetscViewerASCIIPrintf(viewer," Maximum number sends %D\n",nsend_max);
39: PetscViewerASCIIPrintf(viewer," Maximum number receives %D\n",nrecv_max);
40: PetscViewerASCIIPrintf(viewer," Maximum data sent %D\n",(int)(lensend_max*to->bs*sizeof(PetscScalar)));
41: PetscViewerASCIIPrintf(viewer," Maximum data received %D\n",(int)(lenrecv_max*to->bs*sizeof(PetscScalar)));
42: PetscViewerASCIIPrintf(viewer," Total data sent %D\n",(int)(alldata*to->bs*sizeof(PetscScalar)));
44: } else {
45: PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);
46: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number sends = %D; Number to self = %D\n",rank,to->n,to->local.n);
47: if (to->n) {
48: for (i=0; i<to->n; i++) {
49: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D length = %D to whom %D\n",rank,i,to->starts[i+1]-to->starts[i],to->procs[i]);
50: }
51: PetscViewerASCIISynchronizedPrintf(viewer,"Now the indices for all remote sends (in order by process sent to)\n");
52: for (i=0; i<to->starts[to->n]; i++) {
53: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D \n",rank,to->indices[i]);
54: }
55: }
57: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number receives = %D; Number from self = %D\n",rank,from->n,from->local.n);
58: if (from->n) {
59: for (i=0; i<from->n; i++) {
60: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D length %D from whom %D\n",rank,i,from->starts[i+1]-from->starts[i],from->procs[i]);
61: }
63: PetscViewerASCIISynchronizedPrintf(viewer,"Now the indices for all remote receives (in order by process received from)\n");
64: for (i=0; i<from->starts[from->n]; i++) {
65: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %D \n",rank,from->indices[i]);
66: }
67: }
68: if (to->local.n) {
69: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Indices for local part of scatter\n",rank);
70: for (i=0; i<to->local.n; i++) {
71: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] From %D to %D \n",rank,from->local.vslots[i],to->local.vslots[i]);
72: }
73: }
75: PetscViewerFlush(viewer);
76: PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);
77: }
78: }
79: return(0);
80: }
82: /* -----------------------------------------------------------------------------------*/
83: /*
84: The next routine determines what part of the local part of the scatter is an
85: exact copy of values into their current location. We check this here and
86: then know that we need not perform that portion of the scatter when the vector is
87: scattering to itself with INSERT_VALUES.
89: This is currently not used but would speed up, for example DMDALocalToLocalBegin/End()
91: */
94: PetscErrorCode VecScatterLocalOptimize_Private(VecScatter scatter,VecScatter_Seq_General *to,VecScatter_Seq_General *from)
95: {
96: PetscInt n = to->n,n_nonmatching = 0,i,*to_slots = to->vslots,*from_slots = from->vslots;
98: PetscInt *nto_slots,*nfrom_slots,j = 0;
101: for (i=0; i<n; i++) {
102: if (to_slots[i] != from_slots[i]) n_nonmatching++;
103: }
105: if (!n_nonmatching) {
106: to->nonmatching_computed = PETSC_TRUE;
107: to->n_nonmatching = from->n_nonmatching = 0;
109: PetscInfo1(scatter,"Reduced %D to 0\n", n);
110: } else if (n_nonmatching == n) {
111: to->nonmatching_computed = PETSC_FALSE;
113: PetscInfo(scatter,"All values non-matching\n");
114: } else {
115: to->nonmatching_computed= PETSC_TRUE;
116: to->n_nonmatching = from->n_nonmatching = n_nonmatching;
118: PetscMalloc(n_nonmatching*sizeof(PetscInt),&nto_slots);
119: PetscMalloc(n_nonmatching*sizeof(PetscInt),&nfrom_slots);
121: to->slots_nonmatching = nto_slots;
122: from->slots_nonmatching = nfrom_slots;
123: for (i=0; i<n; i++) {
124: if (to_slots[i] != from_slots[i]) {
125: nto_slots[j] = to_slots[i];
126: nfrom_slots[j] = from_slots[i];
127: j++;
128: }
129: }
130: PetscInfo2(scatter,"Reduced %D to %D\n",n,n_nonmatching);
131: }
132: return(0);
133: }
135: /* --------------------------------------------------------------------------------------*/
137: /* -------------------------------------------------------------------------------------*/
140: PetscErrorCode VecScatterDestroy_PtoP(VecScatter ctx)
141: {
142: VecScatter_MPI_General *to = (VecScatter_MPI_General*)ctx->todata;
143: VecScatter_MPI_General *from = (VecScatter_MPI_General*)ctx->fromdata;
144: PetscErrorCode ierr;
145: PetscInt i;
148: if (to->use_readyreceiver) {
149: /*
150: Since we have already posted sends we must cancel them before freeing
151: the requests
152: */
153: for (i=0; i<from->n; i++) {
154: MPI_Cancel(from->requests+i);
155: }
156: for (i=0; i<to->n; i++) {
157: MPI_Cancel(to->rev_requests+i);
158: }
159: MPI_Waitall(from->n,from->requests,to->rstatus);
160: MPI_Waitall(to->n,to->rev_requests,to->rstatus);
161: }
163: #if defined(PETSC_HAVE_MPI_ALLTOALLW) && !defined(PETSC_USE_64BIT_INDICES)
164: if (to->use_alltoallw) {
165: PetscFree3(to->wcounts,to->wdispls,to->types);
166: PetscFree3(from->wcounts,from->wdispls,from->types);
167: }
168: #endif
170: #if defined(PETSC_HAVE_MPI_WIN_CREATE)
171: if (to->use_window) {
172: MPI_Win_free(&from->window);
173: MPI_Win_free(&to->window);
174: PetscFree(from->winstarts);
175: PetscFree(to->winstarts);
176: }
177: #endif
179: if (to->use_alltoallv) {
180: PetscFree2(to->counts,to->displs);
181: PetscFree2(from->counts,from->displs);
182: }
184: /* release MPI resources obtained with MPI_Send_init() and MPI_Recv_init() */
185: /*
186: IBM's PE version of MPI has a bug where freeing these guys will screw up later
187: message passing.
188: */
189: #if !defined(PETSC_HAVE_BROKEN_REQUEST_FREE)
190: if (!to->use_alltoallv && !to->use_window) { /* currently the to->requests etc are ALWAYS allocated even if not used */
191: if (to->requests) {
192: for (i=0; i<to->n; i++) {
193: MPI_Request_free(to->requests + i);
194: }
195: }
196: if (to->rev_requests) {
197: for (i=0; i<to->n; i++) {
198: MPI_Request_free(to->rev_requests + i);
199: }
200: }
201: }
202: /*
203: MPICH could not properly cancel requests thus with ready receiver mode we
204: cannot free the requests. It may be fixed now, if not then put the following
205: code inside a if (!to->use_readyreceiver) {
206: */
207: if (!to->use_alltoallv && !to->use_window) { /* currently the from->requests etc are ALWAYS allocated even if not used */
208: if (from->requests) {
209: for (i=0; i<from->n; i++) {
210: MPI_Request_free(from->requests + i);
211: }
212: }
214: if (from->rev_requests) {
215: for (i=0; i<from->n; i++) {
216: MPI_Request_free(from->rev_requests + i);
217: }
218: }
219: }
220: #endif
222: PetscFree(to->local.vslots);
223: PetscFree(from->local.vslots);
224: PetscFree2(to->counts,to->displs);
225: PetscFree2(from->counts,from->displs);
226: PetscFree(to->local.slots_nonmatching);
227: PetscFree(from->local.slots_nonmatching);
228: PetscFree(to->rev_requests);
229: PetscFree(from->rev_requests);
230: PetscFree(to->requests);
231: PetscFree(from->requests);
232: PetscFree4(to->values,to->indices,to->starts,to->procs);
233: PetscFree2(to->sstatus,to->rstatus);
234: PetscFree4(from->values,from->indices,from->starts,from->procs);
235: PetscFree(from);
236: PetscFree(to);
237: #if defined(PETSC_HAVE_CUSP)
238: PetscCUSPIndicesDestroy((PetscCUSPIndices*)&ctx->spptr);
239: #endif
240: return(0);
241: }
245: /* --------------------------------------------------------------------------------------*/
246: /*
247: Special optimization to see if the local part of the scatter is actually
248: a copy. The scatter routines call PetscMemcpy() instead.
250: */
253: PetscErrorCode VecScatterLocalOptimizeCopy_Private(VecScatter scatter,VecScatter_Seq_General *to,VecScatter_Seq_General *from,PetscInt bs)
254: {
255: PetscInt n = to->n,i,*to_slots = to->vslots,*from_slots = from->vslots;
256: PetscInt to_start,from_start;
260: to_start = to_slots[0];
261: from_start = from_slots[0];
263: for (i=1; i<n; i++) {
264: to_start += bs;
265: from_start += bs;
266: if (to_slots[i] != to_start) return(0);
267: if (from_slots[i] != from_start) return(0);
268: }
269: to->is_copy = PETSC_TRUE;
270: to->copy_start = to_slots[0];
271: to->copy_length = bs*sizeof(PetscScalar)*n;
272: from->is_copy = PETSC_TRUE;
273: from->copy_start = from_slots[0];
274: from->copy_length = bs*sizeof(PetscScalar)*n;
275: PetscInfo(scatter,"Local scatter is a copy, optimizing for it\n");
276: return(0);
277: }
279: /* --------------------------------------------------------------------------------------*/
283: PetscErrorCode VecScatterCopy_PtoP_X(VecScatter in,VecScatter out)
284: {
285: VecScatter_MPI_General *in_to = (VecScatter_MPI_General*)in->todata;
286: VecScatter_MPI_General *in_from = (VecScatter_MPI_General*)in->fromdata,*out_to,*out_from;
287: PetscErrorCode ierr;
288: PetscInt ny,bs = in_from->bs;
291: out->begin = in->begin;
292: out->end = in->end;
293: out->copy = in->copy;
294: out->destroy = in->destroy;
295: out->view = in->view;
297: /* allocate entire send scatter context */
298: PetscNewLog(out,VecScatter_MPI_General,&out_to);
299: PetscNewLog(out,VecScatter_MPI_General,&out_from);
301: ny = in_to->starts[in_to->n];
302: out_to->n = in_to->n;
303: out_to->type = in_to->type;
304: out_to->sendfirst = in_to->sendfirst;
306: PetscMalloc(out_to->n*sizeof(MPI_Request),&out_to->requests);
307: PetscMalloc4(bs*ny,PetscScalar,&out_to->values,ny,PetscInt,&out_to->indices,out_to->n+1,PetscInt,&out_to->starts,out_to->n,PetscMPIInt,&out_to->procs);
308: PetscMalloc2(PetscMax(in_to->n,in_from->n),MPI_Status,&out_to->sstatus,PetscMax(in_to->n,in_from->n),MPI_Status,&out_to->rstatus);
309: PetscMemcpy(out_to->indices,in_to->indices,ny*sizeof(PetscInt));
310: PetscMemcpy(out_to->starts,in_to->starts,(out_to->n+1)*sizeof(PetscInt));
311: PetscMemcpy(out_to->procs,in_to->procs,(out_to->n)*sizeof(PetscMPIInt));
313: out->todata = (void*)out_to;
314: out_to->local.n = in_to->local.n;
315: out_to->local.nonmatching_computed = PETSC_FALSE;
316: out_to->local.n_nonmatching = 0;
317: out_to->local.slots_nonmatching = 0;
318: if (in_to->local.n) {
319: PetscMalloc(in_to->local.n*sizeof(PetscInt),&out_to->local.vslots);
320: PetscMalloc(in_from->local.n*sizeof(PetscInt),&out_from->local.vslots);
321: PetscMemcpy(out_to->local.vslots,in_to->local.vslots,in_to->local.n*sizeof(PetscInt));
322: PetscMemcpy(out_from->local.vslots,in_from->local.vslots,in_from->local.n*sizeof(PetscInt));
323: } else {
324: out_to->local.vslots = 0;
325: out_from->local.vslots = 0;
326: }
328: /* allocate entire receive context */
329: out_from->type = in_from->type;
330: ny = in_from->starts[in_from->n];
331: out_from->n = in_from->n;
332: out_from->sendfirst = in_from->sendfirst;
334: PetscMalloc(out_from->n*sizeof(MPI_Request),&out_from->requests);
335: PetscMalloc4(ny*bs,PetscScalar,&out_from->values,ny,PetscInt,&out_from->indices,out_from->n+1,PetscInt,&out_from->starts,out_from->n,PetscMPIInt,&out_from->procs);
336: PetscMemcpy(out_from->indices,in_from->indices,ny*sizeof(PetscInt));
337: PetscMemcpy(out_from->starts,in_from->starts,(out_from->n+1)*sizeof(PetscInt));
338: PetscMemcpy(out_from->procs,in_from->procs,(out_from->n)*sizeof(PetscMPIInt));
340: out->fromdata = (void*)out_from;
341: out_from->local.n = in_from->local.n;
342: out_from->local.nonmatching_computed = PETSC_FALSE;
343: out_from->local.n_nonmatching = 0;
344: out_from->local.slots_nonmatching = 0;
346: /*
347: set up the request arrays for use with isend_init() and irecv_init()
348: */
349: {
350: PetscMPIInt tag;
351: MPI_Comm comm;
352: PetscInt *sstarts = out_to->starts, *rstarts = out_from->starts;
353: PetscMPIInt *sprocs = out_to->procs, *rprocs = out_from->procs;
354: PetscInt i;
355: PetscBool flg;
356: MPI_Request *swaits = out_to->requests,*rwaits = out_from->requests;
357: MPI_Request *rev_swaits,*rev_rwaits;
358: PetscScalar *Ssvalues = out_to->values, *Srvalues = out_from->values;
360: PetscMalloc(in_to->n*sizeof(MPI_Request),&out_to->rev_requests);
361: PetscMalloc(in_from->n*sizeof(MPI_Request),&out_from->rev_requests);
363: rev_rwaits = out_to->rev_requests;
364: rev_swaits = out_from->rev_requests;
366: out_from->bs = out_to->bs = bs;
367: tag = ((PetscObject)out)->tag;
368: PetscObjectGetComm((PetscObject)out,&comm);
370: /* Register the receives that you will use later (sends for scatter reverse) */
371: for (i=0; i<out_from->n; i++) {
372: MPI_Recv_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);
373: MPI_Send_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tag,comm,rev_swaits+i);
374: }
376: flg = PETSC_FALSE;
377: PetscOptionsGetBool(NULL,"-vecscatter_rsend",&flg,NULL);
378: if (flg) {
379: out_to->use_readyreceiver = PETSC_TRUE;
380: out_from->use_readyreceiver = PETSC_TRUE;
381: for (i=0; i<out_to->n; i++) {
382: MPI_Rsend_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
383: }
384: if (out_from->n) {MPI_Startall_irecv(out_from->starts[out_from->n]*out_from->bs,out_from->n,out_from->requests);}
385: MPI_Barrier(comm);
386: PetscInfo(in,"Using VecScatter ready receiver mode\n");
387: } else {
388: out_to->use_readyreceiver = PETSC_FALSE;
389: out_from->use_readyreceiver = PETSC_FALSE;
390: flg = PETSC_FALSE;
391: PetscOptionsGetBool(NULL,"-vecscatter_ssend",&flg,NULL);
392: if (flg) {
393: PetscInfo(in,"Using VecScatter Ssend mode\n");
394: }
395: for (i=0; i<out_to->n; i++) {
396: if (!flg) {
397: MPI_Send_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
398: } else {
399: MPI_Ssend_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
400: }
401: }
402: }
403: /* Register receives for scatter reverse */
404: for (i=0; i<out_to->n; i++) {
405: MPI_Recv_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,rev_rwaits+i);
406: }
407: }
408: return(0);
409: }
413: PetscErrorCode VecScatterCopy_PtoP_AllToAll(VecScatter in,VecScatter out)
414: {
415: VecScatter_MPI_General *in_to = (VecScatter_MPI_General*)in->todata;
416: VecScatter_MPI_General *in_from = (VecScatter_MPI_General*)in->fromdata,*out_to,*out_from;
417: PetscErrorCode ierr;
418: PetscInt ny,bs = in_from->bs;
419: PetscMPIInt size;
422: MPI_Comm_size(PetscObjectComm((PetscObject)in),&size);
424: out->begin = in->begin;
425: out->end = in->end;
426: out->copy = in->copy;
427: out->destroy = in->destroy;
428: out->view = in->view;
430: /* allocate entire send scatter context */
431: PetscNewLog(out,VecScatter_MPI_General,&out_to);
432: PetscNewLog(out,VecScatter_MPI_General,&out_from);
434: ny = in_to->starts[in_to->n];
435: out_to->n = in_to->n;
436: out_to->type = in_to->type;
437: out_to->sendfirst = in_to->sendfirst;
439: PetscMalloc(out_to->n*sizeof(MPI_Request),&out_to->requests);
440: PetscMalloc4(bs*ny,PetscScalar,&out_to->values,ny,PetscInt,&out_to->indices,out_to->n+1,PetscInt,&out_to->starts,out_to->n,PetscMPIInt,&out_to->procs);
441: PetscMalloc2(PetscMax(in_to->n,in_from->n),MPI_Status,&out_to->sstatus,PetscMax(in_to->n,in_from->n),MPI_Status,&out_to->rstatus);
442: PetscMemcpy(out_to->indices,in_to->indices,ny*sizeof(PetscInt));
443: PetscMemcpy(out_to->starts,in_to->starts,(out_to->n+1)*sizeof(PetscInt));
444: PetscMemcpy(out_to->procs,in_to->procs,(out_to->n)*sizeof(PetscMPIInt));
446: out->todata = (void*)out_to;
447: out_to->local.n = in_to->local.n;
448: out_to->local.nonmatching_computed = PETSC_FALSE;
449: out_to->local.n_nonmatching = 0;
450: out_to->local.slots_nonmatching = 0;
451: if (in_to->local.n) {
452: PetscMalloc(in_to->local.n*sizeof(PetscInt),&out_to->local.vslots);
453: PetscMalloc(in_from->local.n*sizeof(PetscInt),&out_from->local.vslots);
454: PetscMemcpy(out_to->local.vslots,in_to->local.vslots,in_to->local.n*sizeof(PetscInt));
455: PetscMemcpy(out_from->local.vslots,in_from->local.vslots,in_from->local.n*sizeof(PetscInt));
456: } else {
457: out_to->local.vslots = 0;
458: out_from->local.vslots = 0;
459: }
461: /* allocate entire receive context */
462: out_from->type = in_from->type;
463: ny = in_from->starts[in_from->n];
464: out_from->n = in_from->n;
465: out_from->sendfirst = in_from->sendfirst;
467: PetscMalloc(out_from->n*sizeof(MPI_Request),&out_from->requests);
468: PetscMalloc4(ny*bs,PetscScalar,&out_from->values,ny,PetscInt,&out_from->indices,out_from->n+1,PetscInt,&out_from->starts,out_from->n,PetscMPIInt,&out_from->procs);
469: PetscMemcpy(out_from->indices,in_from->indices,ny*sizeof(PetscInt));
470: PetscMemcpy(out_from->starts,in_from->starts,(out_from->n+1)*sizeof(PetscInt));
471: PetscMemcpy(out_from->procs,in_from->procs,(out_from->n)*sizeof(PetscMPIInt));
473: out->fromdata = (void*)out_from;
474: out_from->local.n = in_from->local.n;
475: out_from->local.nonmatching_computed = PETSC_FALSE;
476: out_from->local.n_nonmatching = 0;
477: out_from->local.slots_nonmatching = 0;
479: out_to->use_alltoallv = out_from->use_alltoallv = PETSC_TRUE;
481: PetscMalloc2(size,PetscMPIInt,&out_to->counts,size,PetscMPIInt,&out_to->displs);
482: PetscMemcpy(out_to->counts,in_to->counts,size*sizeof(PetscMPIInt));
483: PetscMemcpy(out_to->displs,in_to->displs,size*sizeof(PetscMPIInt));
485: PetscMalloc2(size,PetscMPIInt,&out_from->counts,size,PetscMPIInt,&out_from->displs);
486: PetscMemcpy(out_from->counts,in_from->counts,size*sizeof(PetscMPIInt));
487: PetscMemcpy(out_from->displs,in_from->displs,size*sizeof(PetscMPIInt));
488: return(0);
489: }
490: /* --------------------------------------------------------------------------------------------------
491: Packs and unpacks the message data into send or from receive buffers.
493: These could be generated automatically.
495: Fortran kernels etc. could be used.
496: */
497: PETSC_STATIC_INLINE void Pack_1(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y)
498: {
499: PetscInt i;
500: for (i=0; i<n; i++) y[i] = x[indicesx[i]];
501: }
505: PETSC_STATIC_INLINE PetscErrorCode UnPack_1(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
506: {
507: PetscInt i;
510: switch (addv) {
511: case INSERT_VALUES:
512: case INSERT_ALL_VALUES:
513: for (i=0; i<n; i++) y[indicesy[i]] = x[i];
514: break;
515: case ADD_VALUES:
516: case ADD_ALL_VALUES:
517: for (i=0; i<n; i++) y[indicesy[i]] += x[i];
518: break;
519: #if !defined(PETSC_USE_COMPLEX)
520: case MAX_VALUES:
521: for (i=0; i<n; i++) y[indicesy[i]] = PetscMax(y[indicesy[i]],x[i]);
522: #else
523: case MAX_VALUES:
524: #endif
525: case NOT_SET_VALUES:
526: break;
527: default:
528: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
529: }
530: return(0);
531: }
535: PETSC_STATIC_INLINE PetscErrorCode Scatter_1(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
536: {
537: PetscInt i;
540: switch (addv) {
541: case INSERT_VALUES:
542: case INSERT_ALL_VALUES:
543: for (i=0; i<n; i++) y[indicesy[i]] = x[indicesx[i]];
544: break;
545: case ADD_VALUES:
546: case ADD_ALL_VALUES:
547: for (i=0; i<n; i++) y[indicesy[i]] += x[indicesx[i]];
548: break;
549: #if !defined(PETSC_USE_COMPLEX)
550: case MAX_VALUES:
551: for (i=0; i<n; i++) y[indicesy[i]] = PetscMax(y[indicesy[i]],x[indicesx[i]]);
552: #else
553: case MAX_VALUES:
554: #endif
555: case NOT_SET_VALUES:
556: break;
557: default:
558: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
559: }
560: return(0);
561: }
563: /* ----------------------------------------------------------------------------------------------- */
564: PETSC_STATIC_INLINE void Pack_2(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y)
565: {
566: PetscInt i,idx;
568: for (i=0; i<n; i++) {
569: idx = *indicesx++;
570: y[0] = x[idx];
571: y[1] = x[idx+1];
572: y += 2;
573: }
574: }
578: PETSC_STATIC_INLINE PetscErrorCode UnPack_2(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
579: {
580: PetscInt i,idy;
583: switch (addv) {
584: case INSERT_VALUES:
585: case INSERT_ALL_VALUES:
586: for (i=0; i<n; i++) {
587: idy = *indicesy++;
588: y[idy] = x[0];
589: y[idy+1] = x[1];
590: x += 2;
591: }
592: break;
593: case ADD_VALUES:
594: case ADD_ALL_VALUES:
595: for (i=0; i<n; i++) {
596: idy = *indicesy++;
597: y[idy] += x[0];
598: y[idy+1] += x[1];
599: x += 2;
600: }
601: break;
602: #if !defined(PETSC_USE_COMPLEX)
603: case MAX_VALUES:
604: for (i=0; i<n; i++) {
605: idy = *indicesy++;
606: y[idy] = PetscMax(y[idy],x[0]);
607: y[idy+1] = PetscMax(y[idy+1],x[1]);
608: x += 2;
609: }
610: #else
611: case MAX_VALUES:
612: #endif
613: case NOT_SET_VALUES:
614: break;
615: default:
616: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
617: }
618: return(0);
619: }
623: PETSC_STATIC_INLINE PetscErrorCode Scatter_2(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
624: {
625: PetscInt i,idx,idy;
628: switch (addv) {
629: case INSERT_VALUES:
630: case INSERT_ALL_VALUES:
631: for (i=0; i<n; i++) {
632: idx = *indicesx++;
633: idy = *indicesy++;
634: y[idy] = x[idx];
635: y[idy+1] = x[idx+1];
636: }
637: break;
638: case ADD_VALUES:
639: case ADD_ALL_VALUES:
640: for (i=0; i<n; i++) {
641: idx = *indicesx++;
642: idy = *indicesy++;
643: y[idy] += x[idx];
644: y[idy+1] += x[idx+1];
645: }
646: break;
647: #if !defined(PETSC_USE_COMPLEX)
648: case MAX_VALUES:
649: for (i=0; i<n; i++) {
650: idx = *indicesx++;
651: idy = *indicesy++;
652: y[idy] = PetscMax(y[idy],x[idx]);
653: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
654: }
655: #else
656: case MAX_VALUES:
657: #endif
658: case NOT_SET_VALUES:
659: break;
660: default:
661: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
662: }
663: return(0);
664: }
665: /* ----------------------------------------------------------------------------------------------- */
666: PETSC_STATIC_INLINE void Pack_3(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y)
667: {
668: PetscInt i,idx;
670: for (i=0; i<n; i++) {
671: idx = *indicesx++;
672: y[0] = x[idx];
673: y[1] = x[idx+1];
674: y[2] = x[idx+2];
675: y += 3;
676: }
677: }
680: PETSC_STATIC_INLINE PetscErrorCode UnPack_3(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
681: {
682: PetscInt i,idy;
685: switch (addv) {
686: case INSERT_VALUES:
687: case INSERT_ALL_VALUES:
688: for (i=0; i<n; i++) {
689: idy = *indicesy++;
690: y[idy] = x[0];
691: y[idy+1] = x[1];
692: y[idy+2] = x[2];
693: x += 3;
694: }
695: break;
696: case ADD_VALUES:
697: case ADD_ALL_VALUES:
698: for (i=0; i<n; i++) {
699: idy = *indicesy++;
700: y[idy] += x[0];
701: y[idy+1] += x[1];
702: y[idy+2] += x[2];
703: x += 3;
704: }
705: break;
706: #if !defined(PETSC_USE_COMPLEX)
707: case MAX_VALUES:
708: for (i=0; i<n; i++) {
709: idy = *indicesy++;
710: y[idy] = PetscMax(y[idy],x[0]);
711: y[idy+1] = PetscMax(y[idy+1],x[1]);
712: y[idy+2] = PetscMax(y[idy+2],x[2]);
713: x += 3;
714: }
715: #else
716: case MAX_VALUES:
717: #endif
718: case NOT_SET_VALUES:
719: break;
720: default:
721: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
722: }
723: return(0);
724: }
728: PETSC_STATIC_INLINE PetscErrorCode Scatter_3(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
729: {
730: PetscInt i,idx,idy;
733: switch (addv) {
734: case INSERT_VALUES:
735: case INSERT_ALL_VALUES:
736: for (i=0; i<n; i++) {
737: idx = *indicesx++;
738: idy = *indicesy++;
739: y[idy] = x[idx];
740: y[idy+1] = x[idx+1];
741: y[idy+2] = x[idx+2];
742: }
743: break;
744: case ADD_VALUES:
745: case ADD_ALL_VALUES:
746: for (i=0; i<n; i++) {
747: idx = *indicesx++;
748: idy = *indicesy++;
749: y[idy] += x[idx];
750: y[idy+1] += x[idx+1];
751: y[idy+2] += x[idx+2];
752: }
753: break;
754: #if !defined(PETSC_USE_COMPLEX)
755: case MAX_VALUES:
756: for (i=0; i<n; i++) {
757: idx = *indicesx++;
758: idy = *indicesy++;
759: y[idy] = PetscMax(y[idy],x[idx]);
760: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
761: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
762: }
763: #else
764: case MAX_VALUES:
765: #endif
766: case NOT_SET_VALUES:
767: break;
768: default:
769: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
770: }
771: return(0);
772: }
773: /* ----------------------------------------------------------------------------------------------- */
774: PETSC_STATIC_INLINE void Pack_4(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y)
775: {
776: PetscInt i,idx;
778: for (i=0; i<n; i++) {
779: idx = *indicesx++;
780: y[0] = x[idx];
781: y[1] = x[idx+1];
782: y[2] = x[idx+2];
783: y[3] = x[idx+3];
784: y += 4;
785: }
786: }
789: PETSC_STATIC_INLINE PetscErrorCode UnPack_4(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
790: {
791: PetscInt i,idy;
794: switch (addv) {
795: case INSERT_VALUES:
796: case INSERT_ALL_VALUES:
797: for (i=0; i<n; i++) {
798: idy = *indicesy++;
799: y[idy] = x[0];
800: y[idy+1] = x[1];
801: y[idy+2] = x[2];
802: y[idy+3] = x[3];
803: x += 4;
804: }
805: break;
806: case ADD_VALUES:
807: case ADD_ALL_VALUES:
808: for (i=0; i<n; i++) {
809: idy = *indicesy++;
810: y[idy] += x[0];
811: y[idy+1] += x[1];
812: y[idy+2] += x[2];
813: y[idy+3] += x[3];
814: x += 4;
815: }
816: break;
817: #if !defined(PETSC_USE_COMPLEX)
818: case MAX_VALUES:
819: for (i=0; i<n; i++) {
820: idy = *indicesy++;
821: y[idy] = PetscMax(y[idy],x[0]);
822: y[idy+1] = PetscMax(y[idy+1],x[1]);
823: y[idy+2] = PetscMax(y[idy+2],x[2]);
824: y[idy+3] = PetscMax(y[idy+3],x[3]);
825: x += 4;
826: }
827: #else
828: case MAX_VALUES:
829: #endif
830: case NOT_SET_VALUES:
831: break;
832: default:
833: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
834: }
835: return(0);
836: }
840: PETSC_STATIC_INLINE PetscErrorCode Scatter_4(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
841: {
842: PetscInt i,idx,idy;
845: switch (addv) {
846: case INSERT_VALUES:
847: case INSERT_ALL_VALUES:
848: for (i=0; i<n; i++) {
849: idx = *indicesx++;
850: idy = *indicesy++;
851: y[idy] = x[idx];
852: y[idy+1] = x[idx+1];
853: y[idy+2] = x[idx+2];
854: y[idy+3] = x[idx+3];
855: }
856: break;
857: case ADD_VALUES:
858: case ADD_ALL_VALUES:
859: for (i=0; i<n; i++) {
860: idx = *indicesx++;
861: idy = *indicesy++;
862: y[idy] += x[idx];
863: y[idy+1] += x[idx+1];
864: y[idy+2] += x[idx+2];
865: y[idy+3] += x[idx+3];
866: }
867: break;
868: #if !defined(PETSC_USE_COMPLEX)
869: case MAX_VALUES:
870: for (i=0; i<n; i++) {
871: idx = *indicesx++;
872: idy = *indicesy++;
873: y[idy] = PetscMax(y[idy],x[idx]);
874: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
875: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
876: y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
877: }
878: #else
879: case MAX_VALUES:
880: #endif
881: case NOT_SET_VALUES:
882: break;
883: default:
884: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
885: }
886: return(0);
887: }
888: /* ----------------------------------------------------------------------------------------------- */
889: PETSC_STATIC_INLINE void Pack_5(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y)
890: {
891: PetscInt i,idx;
893: for (i=0; i<n; i++) {
894: idx = *indicesx++;
895: y[0] = x[idx];
896: y[1] = x[idx+1];
897: y[2] = x[idx+2];
898: y[3] = x[idx+3];
899: y[4] = x[idx+4];
900: y += 5;
901: }
902: }
906: PETSC_STATIC_INLINE PetscErrorCode UnPack_5(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
907: {
908: PetscInt i,idy;
911: switch (addv) {
912: case INSERT_VALUES:
913: case INSERT_ALL_VALUES:
914: for (i=0; i<n; i++) {
915: idy = *indicesy++;
916: y[idy] = x[0];
917: y[idy+1] = x[1];
918: y[idy+2] = x[2];
919: y[idy+3] = x[3];
920: y[idy+4] = x[4];
921: x += 5;
922: }
923: break;
924: case ADD_VALUES:
925: case ADD_ALL_VALUES:
926: for (i=0; i<n; i++) {
927: idy = *indicesy++;
928: y[idy] += x[0];
929: y[idy+1] += x[1];
930: y[idy+2] += x[2];
931: y[idy+3] += x[3];
932: y[idy+4] += x[4];
933: x += 5;
934: }
935: break;
936: #if !defined(PETSC_USE_COMPLEX)
937: case MAX_VALUES:
938: for (i=0; i<n; i++) {
939: idy = *indicesy++;
940: y[idy] = PetscMax(y[idy],x[0]);
941: y[idy+1] = PetscMax(y[idy+1],x[1]);
942: y[idy+2] = PetscMax(y[idy+2],x[2]);
943: y[idy+3] = PetscMax(y[idy+3],x[3]);
944: y[idy+4] = PetscMax(y[idy+4],x[4]);
945: x += 5;
946: }
947: #else
948: case MAX_VALUES:
949: #endif
950: case NOT_SET_VALUES:
951: break;
952: default:
953: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
954: }
955: return(0);
956: }
960: PETSC_STATIC_INLINE PetscErrorCode Scatter_5(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
961: {
962: PetscInt i,idx,idy;
965: switch (addv) {
966: case INSERT_VALUES:
967: case INSERT_ALL_VALUES:
968: for (i=0; i<n; i++) {
969: idx = *indicesx++;
970: idy = *indicesy++;
971: y[idy] = x[idx];
972: y[idy+1] = x[idx+1];
973: y[idy+2] = x[idx+2];
974: y[idy+3] = x[idx+3];
975: y[idy+4] = x[idx+4];
976: }
977: break;
978: case ADD_VALUES:
979: case ADD_ALL_VALUES:
980: for (i=0; i<n; i++) {
981: idx = *indicesx++;
982: idy = *indicesy++;
983: y[idy] += x[idx];
984: y[idy+1] += x[idx+1];
985: y[idy+2] += x[idx+2];
986: y[idy+3] += x[idx+3];
987: y[idy+4] += x[idx+4];
988: }
989: break;
990: #if !defined(PETSC_USE_COMPLEX)
991: case MAX_VALUES:
992: for (i=0; i<n; i++) {
993: idx = *indicesx++;
994: idy = *indicesy++;
995: y[idy] = PetscMax(y[idy],x[idx]);
996: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
997: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
998: y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
999: y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1000: }
1001: #else
1002: case MAX_VALUES:
1003: #endif
1004: case NOT_SET_VALUES:
1005: break;
1006: default:
1007: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1008: }
1009: return(0);
1010: }
1011: /* ----------------------------------------------------------------------------------------------- */
1012: PETSC_STATIC_INLINE void Pack_6(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y)
1013: {
1014: PetscInt i,idx;
1016: for (i=0; i<n; i++) {
1017: idx = *indicesx++;
1018: y[0] = x[idx];
1019: y[1] = x[idx+1];
1020: y[2] = x[idx+2];
1021: y[3] = x[idx+3];
1022: y[4] = x[idx+4];
1023: y[5] = x[idx+5];
1024: y += 6;
1025: }
1026: }
1030: PETSC_STATIC_INLINE PetscErrorCode UnPack_6(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
1031: {
1032: PetscInt i,idy;
1035: switch (addv) {
1036: case INSERT_VALUES:
1037: case INSERT_ALL_VALUES:
1038: for (i=0; i<n; i++) {
1039: idy = *indicesy++;
1040: y[idy] = x[0];
1041: y[idy+1] = x[1];
1042: y[idy+2] = x[2];
1043: y[idy+3] = x[3];
1044: y[idy+4] = x[4];
1045: y[idy+5] = x[5];
1046: x += 6;
1047: }
1048: break;
1049: case ADD_VALUES:
1050: case ADD_ALL_VALUES:
1051: for (i=0; i<n; i++) {
1052: idy = *indicesy++;
1053: y[idy] += x[0];
1054: y[idy+1] += x[1];
1055: y[idy+2] += x[2];
1056: y[idy+3] += x[3];
1057: y[idy+4] += x[4];
1058: y[idy+5] += x[5];
1059: x += 6;
1060: }
1061: break;
1062: #if !defined(PETSC_USE_COMPLEX)
1063: case MAX_VALUES:
1064: for (i=0; i<n; i++) {
1065: idy = *indicesy++;
1066: y[idy] = PetscMax(y[idy],x[0]);
1067: y[idy+1] = PetscMax(y[idy+1],x[1]);
1068: y[idy+2] = PetscMax(y[idy+2],x[2]);
1069: y[idy+3] = PetscMax(y[idy+3],x[3]);
1070: y[idy+4] = PetscMax(y[idy+4],x[4]);
1071: y[idy+5] = PetscMax(y[idy+5],x[5]);
1072: x += 6;
1073: }
1074: #else
1075: case MAX_VALUES:
1076: #endif
1077: case NOT_SET_VALUES:
1078: break;
1079: default:
1080: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1081: }
1082: return(0);
1083: }
1087: PETSC_STATIC_INLINE PetscErrorCode Scatter_6(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
1088: {
1089: PetscInt i,idx,idy;
1092: switch (addv) {
1093: case INSERT_VALUES:
1094: case INSERT_ALL_VALUES:
1095: for (i=0; i<n; i++) {
1096: idx = *indicesx++;
1097: idy = *indicesy++;
1098: y[idy] = x[idx];
1099: y[idy+1] = x[idx+1];
1100: y[idy+2] = x[idx+2];
1101: y[idy+3] = x[idx+3];
1102: y[idy+4] = x[idx+4];
1103: y[idy+5] = x[idx+5];
1104: }
1105: break;
1106: case ADD_VALUES:
1107: case ADD_ALL_VALUES:
1108: for (i=0; i<n; i++) {
1109: idx = *indicesx++;
1110: idy = *indicesy++;
1111: y[idy] += x[idx];
1112: y[idy+1] += x[idx+1];
1113: y[idy+2] += x[idx+2];
1114: y[idy+3] += x[idx+3];
1115: y[idy+4] += x[idx+4];
1116: y[idy+5] += x[idx+5];
1117: }
1118: break;
1119: #if !defined(PETSC_USE_COMPLEX)
1120: case MAX_VALUES:
1121: for (i=0; i<n; i++) {
1122: idx = *indicesx++;
1123: idy = *indicesy++;
1124: y[idy] = PetscMax(y[idy],x[idx]);
1125: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
1126: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
1127: y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
1128: y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1129: y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
1130: }
1131: #else
1132: case MAX_VALUES:
1133: #endif
1134: case NOT_SET_VALUES:
1135: break;
1136: default:
1137: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1138: }
1139: return(0);
1140: }
1141: /* ----------------------------------------------------------------------------------------------- */
1142: PETSC_STATIC_INLINE void Pack_7(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y)
1143: {
1144: PetscInt i,idx;
1146: for (i=0; i<n; i++) {
1147: idx = *indicesx++;
1148: y[0] = x[idx];
1149: y[1] = x[idx+1];
1150: y[2] = x[idx+2];
1151: y[3] = x[idx+3];
1152: y[4] = x[idx+4];
1153: y[5] = x[idx+5];
1154: y[6] = x[idx+6];
1155: y += 7;
1156: }
1157: }
1161: PETSC_STATIC_INLINE PetscErrorCode UnPack_7(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
1162: {
1163: PetscInt i,idy;
1166: switch (addv) {
1167: case INSERT_VALUES:
1168: case INSERT_ALL_VALUES:
1169: for (i=0; i<n; i++) {
1170: idy = *indicesy++;
1171: y[idy] = x[0];
1172: y[idy+1] = x[1];
1173: y[idy+2] = x[2];
1174: y[idy+3] = x[3];
1175: y[idy+4] = x[4];
1176: y[idy+5] = x[5];
1177: y[idy+6] = x[6];
1178: x += 7;
1179: }
1180: break;
1181: case ADD_VALUES:
1182: case ADD_ALL_VALUES:
1183: for (i=0; i<n; i++) {
1184: idy = *indicesy++;
1185: y[idy] += x[0];
1186: y[idy+1] += x[1];
1187: y[idy+2] += x[2];
1188: y[idy+3] += x[3];
1189: y[idy+4] += x[4];
1190: y[idy+5] += x[5];
1191: y[idy+6] += x[6];
1192: x += 7;
1193: }
1194: break;
1195: #if !defined(PETSC_USE_COMPLEX)
1196: case MAX_VALUES:
1197: for (i=0; i<n; i++) {
1198: idy = *indicesy++;
1199: y[idy] = PetscMax(y[idy],x[0]);
1200: y[idy+1] = PetscMax(y[idy+1],x[1]);
1201: y[idy+2] = PetscMax(y[idy+2],x[2]);
1202: y[idy+3] = PetscMax(y[idy+3],x[3]);
1203: y[idy+4] = PetscMax(y[idy+4],x[4]);
1204: y[idy+5] = PetscMax(y[idy+5],x[5]);
1205: y[idy+6] = PetscMax(y[idy+6],x[6]);
1206: x += 7;
1207: }
1208: #else
1209: case MAX_VALUES:
1210: #endif
1211: case NOT_SET_VALUES:
1212: break;
1213: default:
1214: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1215: }
1216: return(0);
1217: }
1221: PETSC_STATIC_INLINE PetscErrorCode Scatter_7(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
1222: {
1223: PetscInt i,idx,idy;
1226: switch (addv) {
1227: case INSERT_VALUES:
1228: case INSERT_ALL_VALUES:
1229: for (i=0; i<n; i++) {
1230: idx = *indicesx++;
1231: idy = *indicesy++;
1232: y[idy] = x[idx];
1233: y[idy+1] = x[idx+1];
1234: y[idy+2] = x[idx+2];
1235: y[idy+3] = x[idx+3];
1236: y[idy+4] = x[idx+4];
1237: y[idy+5] = x[idx+5];
1238: y[idy+6] = x[idx+6];
1239: }
1240: break;
1241: case ADD_VALUES:
1242: case ADD_ALL_VALUES:
1243: for (i=0; i<n; i++) {
1244: idx = *indicesx++;
1245: idy = *indicesy++;
1246: y[idy] += x[idx];
1247: y[idy+1] += x[idx+1];
1248: y[idy+2] += x[idx+2];
1249: y[idy+3] += x[idx+3];
1250: y[idy+4] += x[idx+4];
1251: y[idy+5] += x[idx+5];
1252: y[idy+6] += x[idx+6];
1253: }
1254: break;
1255: #if !defined(PETSC_USE_COMPLEX)
1256: case MAX_VALUES:
1257: for (i=0; i<n; i++) {
1258: idx = *indicesx++;
1259: idy = *indicesy++;
1260: y[idy] = PetscMax(y[idy],x[idx]);
1261: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
1262: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
1263: y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
1264: y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1265: y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
1266: y[idy+6] = PetscMax(y[idy+6],x[idx+6]);
1267: }
1268: #else
1269: case MAX_VALUES:
1270: #endif
1271: case NOT_SET_VALUES:
1272: break;
1273: default:
1274: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1275: }
1276: return(0);
1277: }
1278: /* ----------------------------------------------------------------------------------------------- */
1279: PETSC_STATIC_INLINE void Pack_8(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y)
1280: {
1281: PetscInt i,idx;
1283: for (i=0; i<n; i++) {
1284: idx = *indicesx++;
1285: y[0] = x[idx];
1286: y[1] = x[idx+1];
1287: y[2] = x[idx+2];
1288: y[3] = x[idx+3];
1289: y[4] = x[idx+4];
1290: y[5] = x[idx+5];
1291: y[6] = x[idx+6];
1292: y[7] = x[idx+7];
1293: y += 8;
1294: }
1295: }
1299: PETSC_STATIC_INLINE PetscErrorCode UnPack_8(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
1300: {
1301: PetscInt i,idy;
1304: switch (addv) {
1305: case INSERT_VALUES:
1306: case INSERT_ALL_VALUES:
1307: for (i=0; i<n; i++) {
1308: idy = *indicesy++;
1309: y[idy] = x[0];
1310: y[idy+1] = x[1];
1311: y[idy+2] = x[2];
1312: y[idy+3] = x[3];
1313: y[idy+4] = x[4];
1314: y[idy+5] = x[5];
1315: y[idy+6] = x[6];
1316: y[idy+7] = x[7];
1317: x += 8;
1318: }
1319: break;
1320: case ADD_VALUES:
1321: case ADD_ALL_VALUES:
1322: for (i=0; i<n; i++) {
1323: idy = *indicesy++;
1324: y[idy] += x[0];
1325: y[idy+1] += x[1];
1326: y[idy+2] += x[2];
1327: y[idy+3] += x[3];
1328: y[idy+4] += x[4];
1329: y[idy+5] += x[5];
1330: y[idy+6] += x[6];
1331: y[idy+7] += x[7];
1332: x += 8;
1333: }
1334: break;
1335: #if !defined(PETSC_USE_COMPLEX)
1336: case MAX_VALUES:
1337: for (i=0; i<n; i++) {
1338: idy = *indicesy++;
1339: y[idy] = PetscMax(y[idy],x[0]);
1340: y[idy+1] = PetscMax(y[idy+1],x[1]);
1341: y[idy+2] = PetscMax(y[idy+2],x[2]);
1342: y[idy+3] = PetscMax(y[idy+3],x[3]);
1343: y[idy+4] = PetscMax(y[idy+4],x[4]);
1344: y[idy+5] = PetscMax(y[idy+5],x[5]);
1345: y[idy+6] = PetscMax(y[idy+6],x[6]);
1346: y[idy+7] = PetscMax(y[idy+7],x[7]);
1347: x += 8;
1348: }
1349: #else
1350: case MAX_VALUES:
1351: #endif
1352: case NOT_SET_VALUES:
1353: break;
1354: default:
1355: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1356: }
1357: return(0);
1358: }
1362: PETSC_STATIC_INLINE PetscErrorCode Scatter_8(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
1363: {
1364: PetscInt i,idx,idy;
1367: switch (addv) {
1368: case INSERT_VALUES:
1369: case INSERT_ALL_VALUES:
1370: for (i=0; i<n; i++) {
1371: idx = *indicesx++;
1372: idy = *indicesy++;
1373: y[idy] = x[idx];
1374: y[idy+1] = x[idx+1];
1375: y[idy+2] = x[idx+2];
1376: y[idy+3] = x[idx+3];
1377: y[idy+4] = x[idx+4];
1378: y[idy+5] = x[idx+5];
1379: y[idy+6] = x[idx+6];
1380: y[idy+7] = x[idx+7];
1381: }
1382: break;
1383: case ADD_VALUES:
1384: case ADD_ALL_VALUES:
1385: for (i=0; i<n; i++) {
1386: idx = *indicesx++;
1387: idy = *indicesy++;
1388: y[idy] += x[idx];
1389: y[idy+1] += x[idx+1];
1390: y[idy+2] += x[idx+2];
1391: y[idy+3] += x[idx+3];
1392: y[idy+4] += x[idx+4];
1393: y[idy+5] += x[idx+5];
1394: y[idy+6] += x[idx+6];
1395: y[idy+7] += x[idx+7];
1396: }
1397: break;
1398: #if !defined(PETSC_USE_COMPLEX)
1399: case MAX_VALUES:
1400: for (i=0; i<n; i++) {
1401: idx = *indicesx++;
1402: idy = *indicesy++;
1403: y[idy] = PetscMax(y[idy],x[idx]);
1404: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
1405: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
1406: y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
1407: y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1408: y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
1409: y[idy+6] = PetscMax(y[idy+6],x[idx+6]);
1410: y[idy+7] = PetscMax(y[idy+7],x[idx+7]);
1411: }
1412: #else
1413: case MAX_VALUES:
1414: #endif
1415: case NOT_SET_VALUES:
1416: break;
1417: default:
1418: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1419: }
1420: return(0);
1421: }
1423: /* ----------------------------------------------------------------------------------------------- */
1424: PETSC_STATIC_INLINE void Pack_12(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,PetscScalar *y)
1425: {
1426: PetscInt i,idx;
1428: for (i=0; i<n; i++) {
1429: idx = *indicesx++;
1430: y[0] = x[idx];
1431: y[1] = x[idx+1];
1432: y[2] = x[idx+2];
1433: y[3] = x[idx+3];
1434: y[4] = x[idx+4];
1435: y[5] = x[idx+5];
1436: y[6] = x[idx+6];
1437: y[7] = x[idx+7];
1438: y[8] = x[idx+8];
1439: y[9] = x[idx+9];
1440: y[10] = x[idx+10];
1441: y[11] = x[idx+11];
1442: y += 12;
1443: }
1444: }
1448: PETSC_STATIC_INLINE PetscErrorCode UnPack_12(PetscInt n,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
1449: {
1450: PetscInt i,idy;
1453: switch (addv) {
1454: case INSERT_VALUES:
1455: case INSERT_ALL_VALUES:
1456: for (i=0; i<n; i++) {
1457: idy = *indicesy++;
1458: y[idy] = x[0];
1459: y[idy+1] = x[1];
1460: y[idy+2] = x[2];
1461: y[idy+3] = x[3];
1462: y[idy+4] = x[4];
1463: y[idy+5] = x[5];
1464: y[idy+6] = x[6];
1465: y[idy+7] = x[7];
1466: y[idy+8] = x[8];
1467: y[idy+9] = x[9];
1468: y[idy+10] = x[10];
1469: y[idy+11] = x[11];
1470: x += 12;
1471: }
1472: break;
1473: case ADD_VALUES:
1474: case ADD_ALL_VALUES:
1475: for (i=0; i<n; i++) {
1476: idy = *indicesy++;
1477: y[idy] += x[0];
1478: y[idy+1] += x[1];
1479: y[idy+2] += x[2];
1480: y[idy+3] += x[3];
1481: y[idy+4] += x[4];
1482: y[idy+5] += x[5];
1483: y[idy+6] += x[6];
1484: y[idy+7] += x[7];
1485: y[idy+8] += x[8];
1486: y[idy+9] += x[9];
1487: y[idy+10] += x[10];
1488: y[idy+11] += x[11];
1489: x += 12;
1490: }
1491: break;
1492: #if !defined(PETSC_USE_COMPLEX)
1493: case MAX_VALUES:
1494: for (i=0; i<n; i++) {
1495: idy = *indicesy++;
1496: y[idy] = PetscMax(y[idy],x[0]);
1497: y[idy+1] = PetscMax(y[idy+1],x[1]);
1498: y[idy+2] = PetscMax(y[idy+2],x[2]);
1499: y[idy+3] = PetscMax(y[idy+3],x[3]);
1500: y[idy+4] = PetscMax(y[idy+4],x[4]);
1501: y[idy+5] = PetscMax(y[idy+5],x[5]);
1502: y[idy+6] = PetscMax(y[idy+6],x[6]);
1503: y[idy+7] = PetscMax(y[idy+7],x[7]);
1504: y[idy+8] = PetscMax(y[idy+8],x[8]);
1505: y[idy+9] = PetscMax(y[idy+9],x[9]);
1506: y[idy+10] = PetscMax(y[idy+10],x[10]);
1507: y[idy+11] = PetscMax(y[idy+11],x[11]);
1508: x += 12;
1509: }
1510: #else
1511: case MAX_VALUES:
1512: #endif
1513: case NOT_SET_VALUES:
1514: break;
1515: default:
1516: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1517: }
1518: return(0);
1519: }
1523: PETSC_STATIC_INLINE PetscErrorCode Scatter_12(PetscInt n,const PetscInt *indicesx,const PetscScalar *x,const PetscInt *indicesy,PetscScalar *y,InsertMode addv)
1524: {
1525: PetscInt i,idx,idy;
1528: switch (addv) {
1529: case INSERT_VALUES:
1530: case INSERT_ALL_VALUES:
1531: for (i=0; i<n; i++) {
1532: idx = *indicesx++;
1533: idy = *indicesy++;
1534: y[idy] = x[idx];
1535: y[idy+1] = x[idx+1];
1536: y[idy+2] = x[idx+2];
1537: y[idy+3] = x[idx+3];
1538: y[idy+4] = x[idx+4];
1539: y[idy+5] = x[idx+5];
1540: y[idy+6] = x[idx+6];
1541: y[idy+7] = x[idx+7];
1542: y[idy+8] = x[idx+8];
1543: y[idy+9] = x[idx+9];
1544: y[idy+10] = x[idx+10];
1545: y[idy+11] = x[idx+11];
1546: }
1547: break;
1548: case ADD_VALUES:
1549: case ADD_ALL_VALUES:
1550: for (i=0; i<n; i++) {
1551: idx = *indicesx++;
1552: idy = *indicesy++;
1553: y[idy] += x[idx];
1554: y[idy+1] += x[idx+1];
1555: y[idy+2] += x[idx+2];
1556: y[idy+3] += x[idx+3];
1557: y[idy+4] += x[idx+4];
1558: y[idy+5] += x[idx+5];
1559: y[idy+6] += x[idx+6];
1560: y[idy+7] += x[idx+7];
1561: y[idy+8] += x[idx+8];
1562: y[idy+9] += x[idx+9];
1563: y[idy+10] += x[idx+10];
1564: y[idy+11] += x[idx+11];
1565: }
1566: break;
1567: #if !defined(PETSC_USE_COMPLEX)
1568: case MAX_VALUES:
1569: for (i=0; i<n; i++) {
1570: idx = *indicesx++;
1571: idy = *indicesy++;
1572: y[idy] = PetscMax(y[idy],x[idx]);
1573: y[idy+1] = PetscMax(y[idy+1],x[idx+1]);
1574: y[idy+2] = PetscMax(y[idy+2],x[idx+2]);
1575: y[idy+3] = PetscMax(y[idy+3],x[idx+3]);
1576: y[idy+4] = PetscMax(y[idy+4],x[idx+4]);
1577: y[idy+5] = PetscMax(y[idy+5],x[idx+5]);
1578: y[idy+6] = PetscMax(y[idy+6],x[idx+6]);
1579: y[idy+7] = PetscMax(y[idy+7],x[idx+7]);
1580: y[idy+8] = PetscMax(y[idy+8],x[idx+8]);
1581: y[idy+9] = PetscMax(y[idy+9],x[idx+9]);
1582: y[idy+10] = PetscMax(y[idy+10],x[idx+10]);
1583: y[idy+11] = PetscMax(y[idy+11],x[idx+11]);
1584: }
1585: #else
1586: case MAX_VALUES:
1587: #endif
1588: case NOT_SET_VALUES:
1589: break;
1590: default:
1591: SETERRQ1(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Cannot handle insert mode %d", addv);
1592: }
1593: return(0);
1594: }
1596: /* Create the VecScatterBegin/End_P for our chosen block sizes */
1597: #define BS 1
1598: #include <../src/vec/vec/utils/vpscat.h>
1599: #define BS 2
1600: #include <../src/vec/vec/utils/vpscat.h>
1601: #define BS 3
1602: #include <../src/vec/vec/utils/vpscat.h>
1603: #define BS 4
1604: #include <../src/vec/vec/utils/vpscat.h>
1605: #define BS 5
1606: #include <../src/vec/vec/utils/vpscat.h>
1607: #define BS 6
1608: #include <../src/vec/vec/utils/vpscat.h>
1609: #define BS 7
1610: #include <../src/vec/vec/utils/vpscat.h>
1611: #define BS 8
1612: #include <../src/vec/vec/utils/vpscat.h>
1613: #define BS 12
1614: #include <../src/vec/vec/utils/vpscat.h>
1616: /* ==========================================================================================*/
1618: /* create parallel to sequential scatter context */
1620: PetscErrorCode VecScatterCreateCommon_PtoS(VecScatter_MPI_General*,VecScatter_MPI_General*,VecScatter);
1624: /*@
1625: VecScatterCreateLocal - Creates a VecScatter from a list of messages it must send and receive.
1627: Collective on VecScatter
1629: Input Parameters:
1630: + VecScatter - obtained with VecScatterCreateEmpty()
1631: . nsends -
1632: . sendSizes -
1633: . sendProcs -
1634: . sendIdx - indices where the sent entries are obtained from (in local, on process numbering), this is one long array of size \sum_{i=0,i<nsends} sendSizes[i]
1635: . nrecvs - number of receives to expect
1636: . recvSizes -
1637: . recvProcs - processes who are sending to me
1638: . recvIdx - indices of where received entries are to be put, (in local, on process numbering), this is one long array of size \sum_{i=0,i<nrecvs} recvSizes[i]
1639: - bs - size of block
1641: Notes: sendSizes[] and recvSizes[] cannot have any 0 entries. If you want to support having 0 entries you need
1642: to change the code below to "compress out" the sendProcs[] and recvProcs[] entries that have 0 entries.
1644: Probably does not handle sends to self properly. It should remove those from the counts that are used
1645: in allocating space inside of the from struct
1647: Level: intermediate
1649: @*/
1650: PetscErrorCode VecScatterCreateLocal(VecScatter ctx,PetscInt nsends,const PetscInt sendSizes[],const PetscInt sendProcs[],const PetscInt sendIdx[],PetscInt nrecvs,const PetscInt recvSizes[],const PetscInt recvProcs[],const PetscInt recvIdx[],PetscInt bs)
1651: {
1652: VecScatter_MPI_General *from, *to;
1653: PetscInt sendSize, recvSize;
1654: PetscInt n, i;
1655: PetscErrorCode ierr;
1657: /* allocate entire send scatter context */
1658: PetscNewLog(ctx,VecScatter_MPI_General,&to);
1659: to->n = nsends;
1660: for (n = 0, sendSize = 0; n < to->n; n++) sendSize += sendSizes[n];
1662: PetscMalloc(to->n*sizeof(MPI_Request),&to->requests);
1663: PetscMalloc4(bs*sendSize,PetscScalar,&to->values,sendSize,PetscInt,&to->indices,to->n+1,PetscInt,&to->starts,to->n,PetscMPIInt,&to->procs);
1664: PetscMalloc2(PetscMax(to->n,nrecvs),MPI_Status,&to->sstatus,PetscMax(to->n,nrecvs),MPI_Status,&to->rstatus);
1666: to->starts[0] = 0;
1667: for (n = 0; n < to->n; n++) {
1668: if (sendSizes[n] <=0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"sendSizes[n=%D] = %D cannot be less than 1",n,sendSizes[n]);
1669: to->starts[n+1] = to->starts[n] + sendSizes[n];
1670: to->procs[n] = sendProcs[n];
1671: for (i = to->starts[n]; i < to->starts[n]+sendSizes[n]; i++) to->indices[i] = sendIdx[i];
1672: }
1673: ctx->todata = (void*) to;
1675: /* allocate entire receive scatter context */
1676: PetscNewLog(ctx,VecScatter_MPI_General,&from);
1677: from->n = nrecvs;
1678: for (n = 0, recvSize = 0; n < from->n; n++) recvSize += recvSizes[n];
1680: PetscMalloc(from->n*sizeof(MPI_Request),&from->requests);
1681: PetscMalloc4(bs*recvSize,PetscScalar,&from->values,recvSize,PetscInt,&from->indices,from->n+1,PetscInt,&from->starts,from->n,PetscMPIInt,&from->procs);
1683: from->starts[0] = 0;
1684: for (n = 0; n < from->n; n++) {
1685: if (recvSizes[n] <=0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"recvSizes[n=%D] = %D cannot be less than 1",n,recvSizes[n]);
1686: from->starts[n+1] = from->starts[n] + recvSizes[n];
1687: from->procs[n] = recvProcs[n];
1688: for (i = from->starts[n]; i < from->starts[n]+recvSizes[n]; i++) from->indices[i] = recvIdx[i];
1689: }
1690: ctx->fromdata = (void*)from;
1692: /* No local scatter optimization */
1693: from->local.n = 0;
1694: from->local.vslots = 0;
1695: to->local.n = 0;
1696: to->local.vslots = 0;
1697: from->local.nonmatching_computed = PETSC_FALSE;
1698: from->local.n_nonmatching = 0;
1699: from->local.slots_nonmatching = 0;
1700: to->local.nonmatching_computed = PETSC_FALSE;
1701: to->local.n_nonmatching = 0;
1702: to->local.slots_nonmatching = 0;
1704: from->type = VEC_SCATTER_MPI_GENERAL;
1705: to->type = VEC_SCATTER_MPI_GENERAL;
1706: from->bs = bs;
1707: to->bs = bs;
1708: VecScatterCreateCommon_PtoS(from, to, ctx);
1710: /* mark lengths as negative so it won't check local vector lengths */
1711: ctx->from_n = ctx->to_n = -1;
1712: return(0);
1713: }
1715: /*
1716: bs indicates how many elements there are in each block. Normally this would be 1.
1718: contains check that PetscMPIInt can handle the sizes needed
1719: */
1722: PetscErrorCode VecScatterCreate_PtoS(PetscInt nx,const PetscInt *inidx,PetscInt ny,const PetscInt *inidy,Vec xin,Vec yin,PetscInt bs,VecScatter ctx)
1723: {
1724: VecScatter_MPI_General *from,*to;
1725: PetscMPIInt size,rank,imdex,tag,n;
1726: PetscInt *source = NULL,*owners = NULL;
1727: PetscInt *lowner = NULL,*start = NULL,lengthy,lengthx;
1728: PetscMPIInt *nprocs = NULL,nrecvs;
1729: PetscInt i,j,idx,nsends;
1730: PetscInt *owner = NULL,*starts = NULL,count,slen;
1731: PetscInt *rvalues,*svalues,base,*values,nprocslocal,recvtotal,*rsvalues;
1732: PetscMPIInt *onodes1,*olengths1;
1733: MPI_Comm comm;
1734: MPI_Request *send_waits = NULL,*recv_waits = NULL;
1735: MPI_Status recv_status,*send_status;
1736: PetscErrorCode ierr;
1739: PetscObjectGetNewTag((PetscObject)ctx,&tag);
1740: PetscObjectGetComm((PetscObject)xin,&comm);
1741: MPI_Comm_rank(comm,&rank);
1742: MPI_Comm_size(comm,&size);
1743: owners = xin->map->range;
1744: VecGetSize(yin,&lengthy);
1745: VecGetSize(xin,&lengthx);
1747: /* first count number of contributors to each processor */
1748: PetscMalloc2(size,PetscMPIInt,&nprocs,nx,PetscInt,&owner);
1749: PetscMemzero(nprocs,size*sizeof(PetscMPIInt));
1751: j = 0;
1752: nsends = 0;
1753: for (i=0; i<nx; i++) {
1754: idx = bs*inidx[i];
1755: if (idx < owners[j]) j = 0;
1756: for (; j<size; j++) {
1757: if (idx < owners[j+1]) {
1758: if (!nprocs[j]++) nsends++;
1759: owner[i] = j;
1760: break;
1761: }
1762: }
1763: if (j == size) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"ith %D block entry %D not owned by any process, upper bound %D",i,idx,owners[size]);
1764: }
1766: nprocslocal = nprocs[rank];
1767: nprocs[rank] = 0;
1768: if (nprocslocal) nsends--;
1769: /* inform other processors of number of messages and max length*/
1770: PetscGatherNumberOfMessages(comm,NULL,nprocs,&nrecvs);
1771: PetscGatherMessageLengths(comm,nsends,nrecvs,nprocs,&onodes1,&olengths1);
1772: PetscSortMPIIntWithArray(nrecvs,onodes1,olengths1);
1773: recvtotal = 0; for (i=0; i<nrecvs; i++) recvtotal += olengths1[i];
1775: /* post receives: */
1776: PetscMalloc3(recvtotal,PetscInt,&rvalues,nrecvs,PetscInt,&source,nrecvs,MPI_Request,&recv_waits);
1777: count = 0;
1778: for (i=0; i<nrecvs; i++) {
1779: MPI_Irecv((rvalues+count),olengths1[i],MPIU_INT,onodes1[i],tag,comm,recv_waits+i);
1780: count += olengths1[i];
1781: }
1783: /* do sends:
1784: 1) starts[i] gives the starting index in svalues for stuff going to
1785: the ith processor
1786: */
1787: PetscMalloc3(nx,PetscInt,&svalues,nsends,MPI_Request,&send_waits,size+1,PetscInt,&starts);
1789: starts[0] = 0;
1790: for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[i-1];
1791: for (i=0; i<nx; i++) {
1792: if (owner[i] != rank) svalues[starts[owner[i]]++] = bs*inidx[i];
1793: }
1794: starts[0] = 0;
1795: for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[i-1];
1796: count = 0;
1797: for (i=0; i<size; i++) {
1798: if (nprocs[i]) {
1799: MPI_Isend(svalues+starts[i],nprocs[i],MPIU_INT,i,tag,comm,send_waits+count++);
1800: }
1801: }
1803: /* wait on receives */
1804: count = nrecvs;
1805: slen = 0;
1806: while (count) {
1807: MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
1808: /* unpack receives into our local space */
1809: MPI_Get_count(&recv_status,MPIU_INT,&n);
1810: slen += n;
1811: count--;
1812: }
1814: if (slen != recvtotal) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Total message lengths %D not expected %D",slen,recvtotal);
1816: /* allocate entire send scatter context */
1817: PetscNewLog(ctx,VecScatter_MPI_General,&to);
1818: to->n = nrecvs;
1820: PetscMalloc(nrecvs*sizeof(MPI_Request),&to->requests);
1821: PetscMalloc4(bs*slen,PetscScalar,&to->values,slen,PetscInt,&to->indices,nrecvs+1,PetscInt,&to->starts,nrecvs,PetscMPIInt,&to->procs);
1822: PetscMalloc2(PetscMax(to->n,nsends),MPI_Status,&to->sstatus,PetscMax(to->n,nsends),MPI_Status,&to->rstatus);
1824: ctx->todata = (void*)to;
1825: to->starts[0] = 0;
1827: if (nrecvs) {
1828: /* move the data into the send scatter */
1829: base = owners[rank];
1830: rsvalues = rvalues;
1831: for (i=0; i<nrecvs; i++) {
1832: to->starts[i+1] = to->starts[i] + olengths1[i];
1833: to->procs[i] = onodes1[i];
1834: values = rsvalues;
1835: rsvalues += olengths1[i];
1836: for (j=0; j<olengths1[i]; j++) to->indices[to->starts[i] + j] = values[j] - base;
1837: }
1838: }
1839: PetscFree(olengths1);
1840: PetscFree(onodes1);
1841: PetscFree3(rvalues,source,recv_waits);
1843: /* allocate entire receive scatter context */
1844: PetscNewLog(ctx,VecScatter_MPI_General,&from);
1845: from->n = nsends;
1847: PetscMalloc(nsends*sizeof(MPI_Request),&from->requests);
1848: PetscMalloc4((ny-nprocslocal)*bs,PetscScalar,&from->values,ny-nprocslocal,PetscInt,&from->indices,nsends+1,PetscInt,&from->starts,from->n,PetscMPIInt,&from->procs);
1849: ctx->fromdata = (void*)from;
1851: /* move data into receive scatter */
1852: PetscMalloc2(size,PetscInt,&lowner,nsends+1,PetscInt,&start);
1853: count = 0; from->starts[0] = start[0] = 0;
1854: for (i=0; i<size; i++) {
1855: if (nprocs[i]) {
1856: lowner[i] = count;
1857: from->procs[count++] = i;
1858: from->starts[count] = start[count] = start[count-1] + nprocs[i];
1859: }
1860: }
1862: for (i=0; i<nx; i++) {
1863: if (owner[i] != rank) {
1864: from->indices[start[lowner[owner[i]]]++] = bs*inidy[i];
1865: if (bs*inidy[i] >= lengthy) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Scattering past end of TO vector");
1866: }
1867: }
1868: PetscFree2(lowner,start);
1869: PetscFree2(nprocs,owner);
1871: /* wait on sends */
1872: if (nsends) {
1873: PetscMalloc(nsends*sizeof(MPI_Status),&send_status);
1874: MPI_Waitall(nsends,send_waits,send_status);
1875: PetscFree(send_status);
1876: }
1877: PetscFree3(svalues,send_waits,starts);
1879: if (nprocslocal) {
1880: PetscInt nt = from->local.n = to->local.n = nprocslocal;
1881: /* we have a scatter to ourselves */
1882: PetscMalloc(nt*sizeof(PetscInt),&to->local.vslots);
1883: PetscMalloc(nt*sizeof(PetscInt),&from->local.vslots);
1884: nt = 0;
1885: for (i=0; i<nx; i++) {
1886: idx = bs*inidx[i];
1887: if (idx >= owners[rank] && idx < owners[rank+1]) {
1888: to->local.vslots[nt] = idx - owners[rank];
1889: from->local.vslots[nt++] = bs*inidy[i];
1890: if (bs*inidy[i] >= lengthy) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Scattering past end of TO vector");
1891: }
1892: }
1893: } else {
1894: from->local.n = 0;
1895: from->local.vslots = 0;
1896: to->local.n = 0;
1897: to->local.vslots = 0;
1898: }
1900: from->local.nonmatching_computed = PETSC_FALSE;
1901: from->local.n_nonmatching = 0;
1902: from->local.slots_nonmatching = 0;
1903: to->local.nonmatching_computed = PETSC_FALSE;
1904: to->local.n_nonmatching = 0;
1905: to->local.slots_nonmatching = 0;
1907: from->type = VEC_SCATTER_MPI_GENERAL;
1908: to->type = VEC_SCATTER_MPI_GENERAL;
1909: from->bs = bs;
1910: to->bs = bs;
1912: VecScatterCreateCommon_PtoS(from,to,ctx);
1913: return(0);
1914: }
1916: /*
1917: bs indicates how many elements there are in each block. Normally this would be 1.
1918: */
1921: PetscErrorCode VecScatterCreateCommon_PtoS(VecScatter_MPI_General *from,VecScatter_MPI_General *to,VecScatter ctx)
1922: {
1923: MPI_Comm comm;
1924: PetscMPIInt tag = ((PetscObject)ctx)->tag, tagr;
1925: PetscInt bs = to->bs;
1926: PetscMPIInt size;
1927: PetscInt i, n;
1931: PetscObjectGetComm((PetscObject)ctx,&comm);
1932: PetscObjectGetNewTag((PetscObject)ctx,&tagr);
1933: ctx->destroy = VecScatterDestroy_PtoP;
1935: ctx->reproduce = PETSC_FALSE;
1936: to->sendfirst = PETSC_FALSE;
1937: PetscOptionsGetBool(NULL,"-vecscatter_reproduce",&ctx->reproduce,NULL);
1938: PetscOptionsGetBool(NULL,"-vecscatter_sendfirst",&to->sendfirst,NULL);
1939: from->sendfirst = to->sendfirst;
1941: MPI_Comm_size(comm,&size);
1942: /* check if the receives are ALL going into contiguous locations; if so can skip indexing */
1943: to->contiq = PETSC_FALSE;
1944: n = from->starts[from->n];
1945: from->contiq = PETSC_TRUE;
1946: for (i=1; i<n; i++) {
1947: if (from->indices[i] != from->indices[i-1] + bs) {
1948: from->contiq = PETSC_FALSE;
1949: break;
1950: }
1951: }
1953: to->use_alltoallv = PETSC_FALSE;
1954: PetscOptionsGetBool(NULL,"-vecscatter_alltoall",&to->use_alltoallv,NULL);
1955: from->use_alltoallv = to->use_alltoallv;
1956: if (from->use_alltoallv) PetscInfo(ctx,"Using MPI_Alltoallv() for scatter\n");
1957: #if defined(PETSC_HAVE_MPI_ALLTOALLW) && !defined(PETSC_USE_64BIT_INDICES)
1958: if (to->use_alltoallv) {
1959: to->use_alltoallw = PETSC_FALSE;
1960: PetscOptionsGetBool(NULL,"-vecscatter_nopack",&to->use_alltoallw,NULL);
1961: }
1962: from->use_alltoallw = to->use_alltoallw;
1963: if (from->use_alltoallw) PetscInfo(ctx,"Using MPI_Alltoallw() for scatter\n");
1964: #endif
1966: #if defined(PETSC_HAVE_MPI_WIN_CREATE)
1967: to->use_window = PETSC_FALSE;
1968: PetscOptionsGetBool(NULL,"-vecscatter_window",&to->use_window,NULL);
1969: from->use_window = to->use_window;
1970: #endif
1972: if (to->use_alltoallv) {
1974: PetscMalloc2(size,PetscMPIInt,&to->counts,size,PetscMPIInt,&to->displs);
1975: PetscMemzero(to->counts,size*sizeof(PetscMPIInt));
1976: for (i=0; i<to->n; i++) to->counts[to->procs[i]] = bs*(to->starts[i+1] - to->starts[i]);
1978: to->displs[0] = 0;
1979: for (i=1; i<size; i++) to->displs[i] = to->displs[i-1] + to->counts[i-1];
1981: PetscMalloc2(size,PetscMPIInt,&from->counts,size,PetscMPIInt,&from->displs);
1982: PetscMemzero(from->counts,size*sizeof(PetscMPIInt));
1983: for (i=0; i<from->n; i++) from->counts[from->procs[i]] = bs*(from->starts[i+1] - from->starts[i]);
1984: from->displs[0] = 0;
1985: for (i=1; i<size; i++) from->displs[i] = from->displs[i-1] + from->counts[i-1];
1987: #if defined(PETSC_HAVE_MPI_ALLTOALLW) && !defined(PETSC_USE_64BIT_INDICES)
1988: if (to->use_alltoallw) {
1989: PetscMPIInt mpibs, mpilen;
1991: ctx->packtogether = PETSC_FALSE;
1992: PetscMPIIntCast(bs,&mpibs);
1993: PetscMalloc3(size,PetscMPIInt,&to->wcounts,size,PetscMPIInt,&to->wdispls,size,MPI_Datatype,&to->types);
1994: PetscMemzero(to->wcounts,size*sizeof(PetscMPIInt));
1995: PetscMemzero(to->wdispls,size*sizeof(PetscMPIInt));
1996: for (i=0; i<size; i++) to->types[i] = MPIU_SCALAR;
1998: for (i=0; i<to->n; i++) {
1999: to->wcounts[to->procs[i]] = 1;
2000: PetscMPIIntCast(to->starts[i+1]-to->starts[i],&mpilen);
2001: MPI_Type_create_indexed_block(mpilen,mpibs,to->indices+to->starts[i],MPIU_SCALAR,to->types+to->procs[i]);
2002: MPI_Type_commit(to->types+to->procs[i]);
2003: }
2004: PetscMalloc3(size,PetscMPIInt,&from->wcounts,size,PetscMPIInt,&from->wdispls,size,MPI_Datatype,&from->types);
2005: PetscMemzero(from->wcounts,size*sizeof(PetscMPIInt));
2006: PetscMemzero(from->wdispls,size*sizeof(PetscMPIInt));
2007: for (i=0; i<size; i++) from->types[i] = MPIU_SCALAR;
2009: if (from->contiq) {
2010: PetscInfo(ctx,"Scattered vector entries are stored contiquously, taking advantage of this with -vecscatter_alltoall\n");
2011: for (i=0; i<from->n; i++) from->wcounts[from->procs[i]] = bs*(from->starts[i+1] - from->starts[i]);
2013: if (from->n) from->wdispls[from->procs[0]] = sizeof(PetscScalar)*from->indices[0];
2014: for (i=1; i<from->n; i++) from->wdispls[from->procs[i]] = from->wdispls[from->procs[i-1]] + sizeof(PetscScalar)*from->wcounts[from->procs[i-1]];
2016: } else {
2017: for (i=0; i<from->n; i++) {
2018: from->wcounts[from->procs[i]] = 1;
2019: PetscMPIIntCast(from->starts[i+1]-from->starts[i],&mpilen);
2020: MPI_Type_create_indexed_block(mpilen,mpibs,from->indices+from->starts[i],MPIU_SCALAR,from->types+from->procs[i]);
2021: MPI_Type_commit(from->types+from->procs[i]);
2022: }
2023: }
2024: } else ctx->copy = VecScatterCopy_PtoP_AllToAll;
2026: #else
2027: to->use_alltoallw = PETSC_FALSE;
2028: from->use_alltoallw = PETSC_FALSE;
2029: ctx->copy = VecScatterCopy_PtoP_AllToAll;
2030: #endif
2031: #if defined(PETSC_HAVE_MPI_WIN_CREATE)
2032: } else if (to->use_window) {
2033: PetscMPIInt temptag,winsize;
2034: MPI_Request *request;
2035: MPI_Status *status;
2037: PetscObjectGetNewTag((PetscObject)ctx,&temptag);
2038: winsize = (to->n ? to->starts[to->n] : 0)*bs*sizeof(PetscScalar);
2039: MPI_Win_create(to->values ? to->values : MPI_BOTTOM,winsize,sizeof(PetscScalar),MPI_INFO_NULL,comm,&to->window);
2040: PetscMalloc(to->n*sizeof(PetscInt),&to->winstarts);
2041: PetscMalloc2(to->n,MPI_Request,&request,to->n,MPI_Status,&status);
2042: for (i=0; i<to->n; i++) {
2043: MPI_Irecv(to->winstarts+i,1,MPIU_INT,to->procs[i],temptag,comm,request+i);
2044: }
2045: for (i=0; i<from->n; i++) {
2046: MPI_Send(from->starts+i,1,MPIU_INT,from->procs[i],temptag,comm);
2047: }
2048: MPI_Waitall(to->n,request,status);
2049: PetscFree2(request,status);
2051: winsize = (from->n ? from->starts[from->n] : 0)*bs*sizeof(PetscScalar);
2052: MPI_Win_create(from->values ? from->values : MPI_BOTTOM,winsize,sizeof(PetscScalar),MPI_INFO_NULL,comm,&from->window);
2053: PetscMalloc(from->n*sizeof(PetscInt),&from->winstarts);
2054: PetscMalloc2(from->n,MPI_Request,&request,from->n,MPI_Status,&status);
2055: for (i=0; i<from->n; i++) {
2056: MPI_Irecv(from->winstarts+i,1,MPIU_INT,from->procs[i],temptag,comm,request+i);
2057: }
2058: for (i=0; i<to->n; i++) {
2059: MPI_Send(to->starts+i,1,MPIU_INT,to->procs[i],temptag,comm);
2060: }
2061: MPI_Waitall(from->n,request,status);
2062: PetscFree2(request,status);
2063: #endif
2064: } else {
2065: PetscBool use_rsend = PETSC_FALSE, use_ssend = PETSC_FALSE;
2066: PetscInt *sstarts = to->starts, *rstarts = from->starts;
2067: PetscMPIInt *sprocs = to->procs, *rprocs = from->procs;
2068: MPI_Request *swaits = to->requests,*rwaits = from->requests;
2069: MPI_Request *rev_swaits,*rev_rwaits;
2070: PetscScalar *Ssvalues = to->values, *Srvalues = from->values;
2072: /* allocate additional wait variables for the "reverse" scatter */
2073: PetscMalloc(to->n*sizeof(MPI_Request),&rev_rwaits);
2074: PetscMalloc(from->n*sizeof(MPI_Request),&rev_swaits);
2075: to->rev_requests = rev_rwaits;
2076: from->rev_requests = rev_swaits;
2078: /* Register the receives that you will use later (sends for scatter reverse) */
2079: PetscOptionsGetBool(NULL,"-vecscatter_rsend",&use_rsend,NULL);
2080: PetscOptionsGetBool(NULL,"-vecscatter_ssend",&use_ssend,NULL);
2081: if (use_rsend) {
2082: PetscInfo(ctx,"Using VecScatter ready receiver mode\n");
2083: to->use_readyreceiver = PETSC_TRUE;
2084: from->use_readyreceiver = PETSC_TRUE;
2085: } else {
2086: to->use_readyreceiver = PETSC_FALSE;
2087: from->use_readyreceiver = PETSC_FALSE;
2088: }
2089: if (use_ssend) {
2090: PetscInfo(ctx,"Using VecScatter Ssend mode\n");
2091: }
2093: for (i=0; i<from->n; i++) {
2094: if (use_rsend) {
2095: MPI_Rsend_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tagr,comm,rev_swaits+i);
2096: } else if (use_ssend) {
2097: MPI_Ssend_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tagr,comm,rev_swaits+i);
2098: } else {
2099: MPI_Send_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tagr,comm,rev_swaits+i);
2100: }
2101: }
2103: for (i=0; i<to->n; i++) {
2104: if (use_rsend) {
2105: MPI_Rsend_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
2106: } else if (use_ssend) {
2107: MPI_Ssend_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
2108: } else {
2109: MPI_Send_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tag,comm,swaits+i);
2110: }
2111: }
2112: /* Register receives for scatter and reverse */
2113: for (i=0; i<from->n; i++) {
2114: MPI_Recv_init(Srvalues+bs*rstarts[i],bs*rstarts[i+1]-bs*rstarts[i],MPIU_SCALAR,rprocs[i],tag,comm,rwaits+i);
2115: }
2116: for (i=0; i<to->n; i++) {
2117: MPI_Recv_init(Ssvalues+bs*sstarts[i],bs*sstarts[i+1]-bs*sstarts[i],MPIU_SCALAR,sprocs[i],tagr,comm,rev_rwaits+i);
2118: }
2119: if (use_rsend) {
2120: if (to->n) {MPI_Startall_irecv(to->starts[to->n]*to->bs,to->n,to->rev_requests);}
2121: if (from->n) {MPI_Startall_irecv(from->starts[from->n]*from->bs,from->n,from->requests);}
2122: MPI_Barrier(comm);
2123: }
2125: ctx->copy = VecScatterCopy_PtoP_X;
2126: }
2127: PetscInfo1(ctx,"Using blocksize %D scatter\n",bs);
2129: #if defined(PETSC_USE_DEBUG)
2130: MPI_Allreduce(&bs,&i,1,MPIU_INT,MPI_MIN,PetscObjectComm((PetscObject)ctx));
2131: MPI_Allreduce(&bs,&n,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)ctx));
2132: if (bs!=i || bs!=n) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Blocks size %D != %D or %D",bs,i,n);
2133: #endif
2135: switch (bs) {
2136: case 12:
2137: ctx->begin = VecScatterBegin_12;
2138: ctx->end = VecScatterEnd_12;
2139: break;
2140: case 8:
2141: ctx->begin = VecScatterBegin_8;
2142: ctx->end = VecScatterEnd_8;
2143: break;
2144: case 7:
2145: ctx->begin = VecScatterBegin_7;
2146: ctx->end = VecScatterEnd_7;
2147: break;
2148: case 6:
2149: ctx->begin = VecScatterBegin_6;
2150: ctx->end = VecScatterEnd_6;
2151: break;
2152: case 5:
2153: ctx->begin = VecScatterBegin_5;
2154: ctx->end = VecScatterEnd_5;
2155: break;
2156: case 4:
2157: ctx->begin = VecScatterBegin_4;
2158: ctx->end = VecScatterEnd_4;
2159: break;
2160: case 3:
2161: ctx->begin = VecScatterBegin_3;
2162: ctx->end = VecScatterEnd_3;
2163: break;
2164: case 2:
2165: ctx->begin = VecScatterBegin_2;
2166: ctx->end = VecScatterEnd_2;
2167: break;
2168: case 1:
2169: ctx->begin = VecScatterBegin_1;
2170: ctx->end = VecScatterEnd_1;
2171: break;
2172: default:
2173: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Blocksize not supported");
2174: }
2175: ctx->view = VecScatterView_MPI;
2176: /* Check if the local scatter is actually a copy; important special case */
2177: if (to->local.n) {
2178: VecScatterLocalOptimizeCopy_Private(ctx,&to->local,&from->local,bs);
2179: }
2180: return(0);
2181: }
2185: /* ------------------------------------------------------------------------------------*/
2186: /*
2187: Scatter from local Seq vectors to a parallel vector.
2188: Reverses the order of the arguments, calls VecScatterCreate_PtoS() then
2189: reverses the result.
2190: */
2193: PetscErrorCode VecScatterCreate_StoP(PetscInt nx,const PetscInt *inidx,PetscInt ny,const PetscInt *inidy,Vec xin,Vec yin,PetscInt bs,VecScatter ctx)
2194: {
2195: PetscErrorCode ierr;
2196: MPI_Request *waits;
2197: VecScatter_MPI_General *to,*from;
2200: VecScatterCreate_PtoS(ny,inidy,nx,inidx,yin,xin,bs,ctx);
2201: to = (VecScatter_MPI_General*)ctx->fromdata;
2202: from = (VecScatter_MPI_General*)ctx->todata;
2203: ctx->todata = (void*)to;
2204: ctx->fromdata = (void*)from;
2205: /* these two are special, they are ALWAYS stored in to struct */
2206: to->sstatus = from->sstatus;
2207: to->rstatus = from->rstatus;
2209: from->sstatus = 0;
2210: from->rstatus = 0;
2212: waits = from->rev_requests;
2213: from->rev_requests = from->requests;
2214: from->requests = waits;
2215: waits = to->rev_requests;
2216: to->rev_requests = to->requests;
2217: to->requests = waits;
2218: return(0);
2219: }
2221: /* ---------------------------------------------------------------------------------*/
2224: PetscErrorCode VecScatterCreate_PtoP(PetscInt nx,const PetscInt *inidx,PetscInt ny,const PetscInt *inidy,Vec xin,Vec yin,PetscInt bs,VecScatter ctx)
2225: {
2227: PetscMPIInt size,rank,tag,imdex,n;
2228: PetscInt *owners = xin->map->range;
2229: PetscMPIInt *nprocs = NULL;
2230: PetscInt i,j,idx,nsends,*local_inidx = NULL,*local_inidy = NULL;
2231: PetscInt *owner = NULL,*starts = NULL,count,slen;
2232: PetscInt *rvalues = NULL,*svalues = NULL,base,*values = NULL,*rsvalues,recvtotal,lastidx;
2233: PetscMPIInt *onodes1,*olengths1,nrecvs;
2234: MPI_Comm comm;
2235: MPI_Request *send_waits = NULL,*recv_waits = NULL;
2236: MPI_Status recv_status,*send_status = NULL;
2237: PetscBool duplicate = PETSC_FALSE;
2238: #if defined(PETSC_USE_DEBUG)
2239: PetscBool found = PETSC_FALSE;
2240: #endif
2243: PetscObjectGetNewTag((PetscObject)ctx,&tag);
2244: PetscObjectGetComm((PetscObject)xin,&comm);
2245: MPI_Comm_size(comm,&size);
2246: MPI_Comm_rank(comm,&rank);
2247: if (size == 1) {
2248: VecScatterCreate_StoP(nx,inidx,ny,inidy,xin,yin,bs,ctx);
2249: return(0);
2250: }
2252: /*
2253: Each processor ships off its inidx[j] and inidy[j] to the appropriate processor
2254: They then call the StoPScatterCreate()
2255: */
2256: /* first count number of contributors to each processor */
2257: PetscMalloc3(size,PetscMPIInt,&nprocs,nx,PetscInt,&owner,(size+1),PetscInt,&starts);
2258: PetscMemzero(nprocs,size*sizeof(PetscMPIInt));
2260: lastidx = -1;
2261: j = 0;
2262: for (i=0; i<nx; i++) {
2263: /* if indices are NOT locally sorted, need to start search at the beginning */
2264: if (lastidx > (idx = bs*inidx[i])) j = 0;
2265: lastidx = idx;
2266: for (; j<size; j++) {
2267: if (idx >= owners[j] && idx < owners[j+1]) {
2268: nprocs[j]++;
2269: owner[i] = j;
2270: #if defined(PETSC_USE_DEBUG)
2271: found = PETSC_TRUE;
2272: #endif
2273: break;
2274: }
2275: }
2276: #if defined(PETSC_USE_DEBUG)
2277: if (!found) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Index %D out of range",idx);
2278: found = PETSC_FALSE;
2279: #endif
2280: }
2281: nsends = 0;
2282: for (i=0; i<size; i++) nsends += (nprocs[i] > 0);
2284: /* inform other processors of number of messages and max length*/
2285: PetscGatherNumberOfMessages(comm,NULL,nprocs,&nrecvs);
2286: PetscGatherMessageLengths(comm,nsends,nrecvs,nprocs,&onodes1,&olengths1);
2287: PetscSortMPIIntWithArray(nrecvs,onodes1,olengths1);
2288: recvtotal = 0; for (i=0; i<nrecvs; i++) recvtotal += olengths1[i];
2290: /* post receives: */
2291: PetscMalloc5(2*recvtotal,PetscInt,&rvalues,2*nx,PetscInt,&svalues,nrecvs,MPI_Request,&recv_waits,nsends,MPI_Request,&send_waits,nsends,MPI_Status,&send_status);
2293: count = 0;
2294: for (i=0; i<nrecvs; i++) {
2295: MPI_Irecv((rvalues+2*count),2*olengths1[i],MPIU_INT,onodes1[i],tag,comm,recv_waits+i);
2296: count += olengths1[i];
2297: }
2298: PetscFree(onodes1);
2300: /* do sends:
2301: 1) starts[i] gives the starting index in svalues for stuff going to
2302: the ith processor
2303: */
2304: starts[0]= 0;
2305: for (i=1; i<size; i++) starts[i] = starts[i-1] + nprocs[i-1];
2306: for (i=0; i<nx; i++) {
2307: svalues[2*starts[owner[i]]] = bs*inidx[i];
2308: svalues[1 + 2*starts[owner[i]]++] = bs*inidy[i];
2309: }
2311: starts[0] = 0;
2312: for (i=1; i<size+1; i++) starts[i] = starts[i-1] + nprocs[i-1];
2313: count = 0;
2314: for (i=0; i<size; i++) {
2315: if (nprocs[i]) {
2316: MPI_Isend(svalues+2*starts[i],2*nprocs[i],MPIU_INT,i,tag,comm,send_waits+count);
2317: count++;
2318: }
2319: }
2320: PetscFree3(nprocs,owner,starts);
2322: /* wait on receives */
2323: count = nrecvs;
2324: slen = 0;
2325: while (count) {
2326: MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);
2327: /* unpack receives into our local space */
2328: MPI_Get_count(&recv_status,MPIU_INT,&n);
2329: slen += n/2;
2330: count--;
2331: }
2332: if (slen != recvtotal) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Total message lengths %D not as expected %D",slen,recvtotal);
2334: PetscMalloc2(slen,PetscInt,&local_inidx,slen,PetscInt,&local_inidy);
2335: base = owners[rank];
2336: count = 0;
2337: rsvalues = rvalues;
2338: for (i=0; i<nrecvs; i++) {
2339: values = rsvalues;
2340: rsvalues += 2*olengths1[i];
2341: for (j=0; j<olengths1[i]; j++) {
2342: local_inidx[count] = values[2*j] - base;
2343: local_inidy[count++] = values[2*j+1];
2344: }
2345: }
2346: PetscFree(olengths1);
2348: /* wait on sends */
2349: if (nsends) {MPI_Waitall(nsends,send_waits,send_status);}
2350: PetscFree5(rvalues,svalues,recv_waits,send_waits,send_status);
2352: /*
2353: should sort and remove duplicates from local_inidx,local_inidy
2354: */
2356: #if defined(do_it_slow)
2357: /* sort on the from index */
2358: PetscSortIntWithArray(slen,local_inidx,local_inidy);
2359: start = 0;
2360: while (start < slen) {
2361: count = start+1;
2362: last = local_inidx[start];
2363: while (count < slen && last == local_inidx[count]) count++;
2364: if (count > start + 1) { /* found 2 or more same local_inidx[] in a row */
2365: /* sort on to index */
2366: PetscSortInt(count-start,local_inidy+start);
2367: }
2368: /* remove duplicates; not most efficient way, but probably good enough */
2369: i = start;
2370: while (i < count-1) {
2371: if (local_inidy[i] != local_inidy[i+1]) i++;
2372: else { /* found a duplicate */
2373: duplicate = PETSC_TRUE;
2374: for (j=i; j<slen-1; j++) {
2375: local_inidx[j] = local_inidx[j+1];
2376: local_inidy[j] = local_inidy[j+1];
2377: }
2378: slen--;
2379: count--;
2380: }
2381: }
2382: start = count;
2383: }
2384: #endif
2385: if (duplicate) {
2386: PetscInfo(ctx,"Duplicate from to indices passed in VecScatterCreate(), they are ignored\n");
2387: }
2388: VecScatterCreate_StoP(slen,local_inidx,slen,local_inidy,xin,yin,bs,ctx);
2389: PetscFree2(local_inidx,local_inidy);
2390: return(0);
2391: }