2: #include <petsc/private/matimpl.h>
4: #define DEFAULT_STASH_SIZE 10000 6: static PetscErrorCode MatStashScatterBegin_Ref(Mat,MatStash*,PetscInt*);
7: static PetscErrorCode MatStashScatterGetMesg_Ref(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*);
8: static PetscErrorCode MatStashScatterEnd_Ref(MatStash*);
9: static PetscErrorCode MatStashScatterBegin_BTS(Mat,MatStash*,PetscInt*);
10: static PetscErrorCode MatStashScatterGetMesg_BTS(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*);
11: static PetscErrorCode MatStashScatterEnd_BTS(MatStash*);
12: static PetscErrorCode MatStashScatterDestroy_BTS(MatStash*);
14: /*
15: MatStashCreate_Private - Creates a stash,currently used for all the parallel
16: matrix implementations. The stash is where elements of a matrix destined
17: to be stored on other processors are kept until matrix assembly is done.
19: This is a simple minded stash. Simply adds entries to end of stash.
21: Input Parameters:
22: comm - communicator, required for scatters.
23: bs - stash block size. used when stashing blocks of values
25: Output Parameters:
26: stash - the newly created stash
27: */
30: PetscErrorCode MatStashCreate_Private(MPI_Comm comm,PetscInt bs,MatStash *stash) 31: {
33: PetscInt max,*opt,nopt,i;
34: PetscBool flg;
37: /* Require 2 tags,get the second using PetscCommGetNewTag() */
38: stash->comm = comm;
40: PetscCommGetNewTag(stash->comm,&stash->tag1);
41: PetscCommGetNewTag(stash->comm,&stash->tag2);
42: MPI_Comm_size(stash->comm,&stash->size);
43: MPI_Comm_rank(stash->comm,&stash->rank);
44: PetscMalloc1(2*stash->size,&stash->flg_v);
45: for (i=0; i<2*stash->size; i++) stash->flg_v[i] = -1;
48: nopt = stash->size;
49: PetscMalloc1(nopt,&opt);
50: PetscOptionsGetIntArray(NULL,NULL,"-matstash_initial_size",opt,&nopt,&flg);
51: if (flg) {
52: if (nopt == 1) max = opt[0];
53: else if (nopt == stash->size) max = opt[stash->rank];
54: else if (stash->rank < nopt) max = opt[stash->rank];
55: else max = 0; /* Use default */
56: stash->umax = max;
57: } else {
58: stash->umax = 0;
59: }
60: PetscFree(opt);
61: if (bs <= 0) bs = 1;
63: stash->bs = bs;
64: stash->nmax = 0;
65: stash->oldnmax = 0;
66: stash->n = 0;
67: stash->reallocs = -1;
68: stash->space_head = 0;
69: stash->space = 0;
71: stash->send_waits = 0;
72: stash->recv_waits = 0;
73: stash->send_status = 0;
74: stash->nsends = 0;
75: stash->nrecvs = 0;
76: stash->svalues = 0;
77: stash->rvalues = 0;
78: stash->rindices = 0;
79: stash->nprocessed = 0;
80: stash->reproduce = PETSC_FALSE;
81: stash->blocktype = MPI_DATATYPE_NULL;
83: PetscOptionsGetBool(NULL,NULL,"-matstash_reproduce",&stash->reproduce,NULL);
84: PetscOptionsGetBool(NULL,NULL,"-matstash_bts",&flg,NULL);
85: if (flg) {
86: stash->ScatterBegin = MatStashScatterBegin_BTS;
87: stash->ScatterGetMesg = MatStashScatterGetMesg_BTS;
88: stash->ScatterEnd = MatStashScatterEnd_BTS;
89: stash->ScatterDestroy = MatStashScatterDestroy_BTS;
90: } else {
91: stash->ScatterBegin = MatStashScatterBegin_Ref;
92: stash->ScatterGetMesg = MatStashScatterGetMesg_Ref;
93: stash->ScatterEnd = MatStashScatterEnd_Ref;
94: stash->ScatterDestroy = NULL;
95: }
96: return(0);
97: }
99: /*
100: MatStashDestroy_Private - Destroy the stash
101: */
104: PetscErrorCode MatStashDestroy_Private(MatStash *stash)105: {
109: PetscMatStashSpaceDestroy(&stash->space_head);
110: if (stash->ScatterDestroy) {(*stash->ScatterDestroy)(stash);}
112: stash->space = 0;
114: PetscFree(stash->flg_v);
115: return(0);
116: }
118: /*
119: MatStashScatterEnd_Private - This is called as the final stage of
120: scatter. The final stages of message passing is done here, and
121: all the memory used for message passing is cleaned up. This
122: routine also resets the stash, and deallocates the memory used
123: for the stash. It also keeps track of the current memory usage
124: so that the same value can be used the next time through.
125: */
128: PetscErrorCode MatStashScatterEnd_Private(MatStash *stash)129: {
133: (*stash->ScatterEnd)(stash);
134: return(0);
135: }
139: static PetscErrorCode MatStashScatterEnd_Ref(MatStash *stash)140: {
142: PetscInt nsends=stash->nsends,bs2,oldnmax,i;
143: MPI_Status *send_status;
146: for (i=0; i<2*stash->size; i++) stash->flg_v[i] = -1;
147: /* wait on sends */
148: if (nsends) {
149: PetscMalloc1(2*nsends,&send_status);
150: MPI_Waitall(2*nsends,stash->send_waits,send_status);
151: PetscFree(send_status);
152: }
154: /* Now update nmaxold to be app 10% more than max n used, this way the
155: wastage of space is reduced the next time this stash is used.
156: Also update the oldmax, only if it increases */
157: if (stash->n) {
158: bs2 = stash->bs*stash->bs;
159: oldnmax = ((int)(stash->n * 1.1) + 5)*bs2;
160: if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
161: }
163: stash->nmax = 0;
164: stash->n = 0;
165: stash->reallocs = -1;
166: stash->nprocessed = 0;
168: PetscMatStashSpaceDestroy(&stash->space_head);
170: stash->space = 0;
172: PetscFree(stash->send_waits);
173: PetscFree(stash->recv_waits);
174: PetscFree2(stash->svalues,stash->sindices);
175: PetscFree(stash->rvalues[0]);
176: PetscFree(stash->rvalues);
177: PetscFree(stash->rindices[0]);
178: PetscFree(stash->rindices);
179: return(0);
180: }
182: /*
183: MatStashGetInfo_Private - Gets the relavant statistics of the stash
185: Input Parameters:
186: stash - the stash
187: nstash - the size of the stash. Indicates the number of values stored.
188: reallocs - the number of additional mallocs incurred.
190: */
193: PetscErrorCode MatStashGetInfo_Private(MatStash *stash,PetscInt *nstash,PetscInt *reallocs)194: {
195: PetscInt bs2 = stash->bs*stash->bs;
198: if (nstash) *nstash = stash->n*bs2;
199: if (reallocs) {
200: if (stash->reallocs < 0) *reallocs = 0;
201: else *reallocs = stash->reallocs;
202: }
203: return(0);
204: }
206: /*
207: MatStashSetInitialSize_Private - Sets the initial size of the stash
209: Input Parameters:
210: stash - the stash
211: max - the value that is used as the max size of the stash.
212: this value is used while allocating memory.
213: */
216: PetscErrorCode MatStashSetInitialSize_Private(MatStash *stash,PetscInt max)217: {
219: stash->umax = max;
220: return(0);
221: }
223: /* MatStashExpand_Private - Expand the stash. This function is called
224: when the space in the stash is not sufficient to add the new values
225: being inserted into the stash.
227: Input Parameters:
228: stash - the stash
229: incr - the minimum increase requested
231: Notes:
232: This routine doubles the currently used memory.
233: */
236: static PetscErrorCode MatStashExpand_Private(MatStash *stash,PetscInt incr)237: {
239: PetscInt newnmax,bs2= stash->bs*stash->bs;
242: /* allocate a larger stash */
243: if (!stash->oldnmax && !stash->nmax) { /* new stash */
244: if (stash->umax) newnmax = stash->umax/bs2;
245: else newnmax = DEFAULT_STASH_SIZE/bs2;
246: } else if (!stash->nmax) { /* resuing stash */
247: if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs2;
248: else newnmax = stash->oldnmax/bs2;
249: } else newnmax = stash->nmax*2;
250: if (newnmax < (stash->nmax + incr)) newnmax += 2*incr;
252: /* Get a MatStashSpace and attach it to stash */
253: PetscMatStashSpaceGet(bs2,newnmax,&stash->space);
254: if (!stash->space_head) { /* new stash or resuing stash->oldnmax */
255: stash->space_head = stash->space;
256: }
258: stash->reallocs++;
259: stash->nmax = newnmax;
260: return(0);
261: }
262: /*
263: MatStashValuesRow_Private - inserts values into the stash. This function
264: expects the values to be roworiented. Multiple columns belong to the same row
265: can be inserted with a single call to this function.
267: Input Parameters:
268: stash - the stash
269: row - the global row correspoiding to the values
270: n - the number of elements inserted. All elements belong to the above row.
271: idxn - the global column indices corresponding to each of the values.
272: values - the values inserted
273: */
276: PetscErrorCode MatStashValuesRow_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscBool ignorezeroentries)277: {
278: PetscErrorCode ierr;
279: PetscInt i,k,cnt = 0;
280: PetscMatStashSpace space=stash->space;
283: /* Check and see if we have sufficient memory */
284: if (!space || space->local_remaining < n) {
285: MatStashExpand_Private(stash,n);
286: }
287: space = stash->space;
288: k = space->local_used;
289: for (i=0; i<n; i++) {
290: if (ignorezeroentries && (values[i] == 0.0)) continue;
291: space->idx[k] = row;
292: space->idy[k] = idxn[i];
293: space->val[k] = values[i];
294: k++;
295: cnt++;
296: }
297: stash->n += cnt;
298: space->local_used += cnt;
299: space->local_remaining -= cnt;
300: return(0);
301: }
303: /*
304: MatStashValuesCol_Private - inserts values into the stash. This function
305: expects the values to be columnoriented. Multiple columns belong to the same row
306: can be inserted with a single call to this function.
308: Input Parameters:
309: stash - the stash
310: row - the global row correspoiding to the values
311: n - the number of elements inserted. All elements belong to the above row.
312: idxn - the global column indices corresponding to each of the values.
313: values - the values inserted
314: stepval - the consecutive values are sepated by a distance of stepval.
315: this happens because the input is columnoriented.
316: */
319: PetscErrorCode MatStashValuesCol_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt stepval,PetscBool ignorezeroentries)320: {
321: PetscErrorCode ierr;
322: PetscInt i,k,cnt = 0;
323: PetscMatStashSpace space=stash->space;
326: /* Check and see if we have sufficient memory */
327: if (!space || space->local_remaining < n) {
328: MatStashExpand_Private(stash,n);
329: }
330: space = stash->space;
331: k = space->local_used;
332: for (i=0; i<n; i++) {
333: if (ignorezeroentries && (values[i*stepval] == 0.0)) continue;
334: space->idx[k] = row;
335: space->idy[k] = idxn[i];
336: space->val[k] = values[i*stepval];
337: k++;
338: cnt++;
339: }
340: stash->n += cnt;
341: space->local_used += cnt;
342: space->local_remaining -= cnt;
343: return(0);
344: }
346: /*
347: MatStashValuesRowBlocked_Private - inserts blocks of values into the stash.
348: This function expects the values to be roworiented. Multiple columns belong
349: to the same block-row can be inserted with a single call to this function.
350: This function extracts the sub-block of values based on the dimensions of
351: the original input block, and the row,col values corresponding to the blocks.
353: Input Parameters:
354: stash - the stash
355: row - the global block-row correspoiding to the values
356: n - the number of elements inserted. All elements belong to the above row.
357: idxn - the global block-column indices corresponding to each of the blocks of
358: values. Each block is of size bs*bs.
359: values - the values inserted
360: rmax - the number of block-rows in the original block.
361: cmax - the number of block-columsn on the original block.
362: idx - the index of the current block-row in the original block.
363: */
366: PetscErrorCode MatStashValuesRowBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)367: {
368: PetscErrorCode ierr;
369: PetscInt i,j,k,bs2,bs=stash->bs,l;
370: const PetscScalar *vals;
371: PetscScalar *array;
372: PetscMatStashSpace space=stash->space;
375: if (!space || space->local_remaining < n) {
376: MatStashExpand_Private(stash,n);
377: }
378: space = stash->space;
379: l = space->local_used;
380: bs2 = bs*bs;
381: for (i=0; i<n; i++) {
382: space->idx[l] = row;
383: space->idy[l] = idxn[i];
384: /* Now copy over the block of values. Store the values column oriented.
385: This enables inserting multiple blocks belonging to a row with a single
386: funtion call */
387: array = space->val + bs2*l;
388: vals = values + idx*bs2*n + bs*i;
389: for (j=0; j<bs; j++) {
390: for (k=0; k<bs; k++) array[k*bs] = vals[k];
391: array++;
392: vals += cmax*bs;
393: }
394: l++;
395: }
396: stash->n += n;
397: space->local_used += n;
398: space->local_remaining -= n;
399: return(0);
400: }
402: /*
403: MatStashValuesColBlocked_Private - inserts blocks of values into the stash.
404: This function expects the values to be roworiented. Multiple columns belong
405: to the same block-row can be inserted with a single call to this function.
406: This function extracts the sub-block of values based on the dimensions of
407: the original input block, and the row,col values corresponding to the blocks.
409: Input Parameters:
410: stash - the stash
411: row - the global block-row correspoiding to the values
412: n - the number of elements inserted. All elements belong to the above row.
413: idxn - the global block-column indices corresponding to each of the blocks of
414: values. Each block is of size bs*bs.
415: values - the values inserted
416: rmax - the number of block-rows in the original block.
417: cmax - the number of block-columsn on the original block.
418: idx - the index of the current block-row in the original block.
419: */
422: PetscErrorCode MatStashValuesColBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)423: {
424: PetscErrorCode ierr;
425: PetscInt i,j,k,bs2,bs=stash->bs,l;
426: const PetscScalar *vals;
427: PetscScalar *array;
428: PetscMatStashSpace space=stash->space;
431: if (!space || space->local_remaining < n) {
432: MatStashExpand_Private(stash,n);
433: }
434: space = stash->space;
435: l = space->local_used;
436: bs2 = bs*bs;
437: for (i=0; i<n; i++) {
438: space->idx[l] = row;
439: space->idy[l] = idxn[i];
440: /* Now copy over the block of values. Store the values column oriented.
441: This enables inserting multiple blocks belonging to a row with a single
442: funtion call */
443: array = space->val + bs2*l;
444: vals = values + idx*bs2*n + bs*i;
445: for (j=0; j<bs; j++) {
446: for (k=0; k<bs; k++) array[k] = vals[k];
447: array += bs;
448: vals += rmax*bs;
449: }
450: l++;
451: }
452: stash->n += n;
453: space->local_used += n;
454: space->local_remaining -= n;
455: return(0);
456: }
457: /*
458: MatStashScatterBegin_Private - Initiates the transfer of values to the
459: correct owners. This function goes through the stash, and check the
460: owners of each stashed value, and sends the values off to the owner
461: processors.
463: Input Parameters:
464: stash - the stash
465: owners - an array of size 'no-of-procs' which gives the ownership range
466: for each node.
468: Notes: The 'owners' array in the cased of the blocked-stash has the
469: ranges specified blocked global indices, and for the regular stash in
470: the proper global indices.
471: */
474: PetscErrorCode MatStashScatterBegin_Private(Mat mat,MatStash *stash,PetscInt *owners)475: {
479: (*stash->ScatterBegin)(mat,stash,owners);
480: return(0);
481: }
485: static PetscErrorCode MatStashScatterBegin_Ref(Mat mat,MatStash *stash,PetscInt *owners)486: {
487: PetscInt *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2;
488: PetscInt size=stash->size,nsends;
489: PetscErrorCode ierr;
490: PetscInt count,*sindices,**rindices,i,j,idx,lastidx,l;
491: PetscScalar **rvalues,*svalues;
492: MPI_Comm comm = stash->comm;
493: MPI_Request *send_waits,*recv_waits,*recv_waits1,*recv_waits2;
494: PetscMPIInt *sizes,*nlengths,nreceives;
495: PetscInt *sp_idx,*sp_idy;
496: PetscScalar *sp_val;
497: PetscMatStashSpace space,space_next;
500: { /* make sure all processors are either in INSERTMODE or ADDMODE */
501: InsertMode addv;
502: MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));
503: if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
504: mat->insertmode = addv; /* in case this processor had no cache */
505: }
507: bs2 = stash->bs*stash->bs;
509: /* first count number of contributors to each processor */
510: PetscCalloc1(size,&sizes);
511: PetscCalloc1(size,&nlengths);
512: PetscMalloc1(stash->n+1,&owner);
514: i = j = 0;
515: lastidx = -1;
516: space = stash->space_head;
517: while (space) {
518: space_next = space->next;
519: sp_idx = space->idx;
520: for (l=0; l<space->local_used; l++) {
521: /* if indices are NOT locally sorted, need to start search at the beginning */
522: if (lastidx > (idx = sp_idx[l])) j = 0;
523: lastidx = idx;
524: for (; j<size; j++) {
525: if (idx >= owners[j] && idx < owners[j+1]) {
526: nlengths[j]++; owner[i] = j; break;
527: }
528: }
529: i++;
530: }
531: space = space_next;
532: }
533: /* Now check what procs get messages - and compute nsends. */
534: for (i=0, nsends=0; i<size; i++) {
535: if (nlengths[i]) {
536: sizes[i] = 1; nsends++;
537: }
538: }
540: {PetscMPIInt *onodes,*olengths;
541: /* Determine the number of messages to expect, their lengths, from from-ids */
542: PetscGatherNumberOfMessages(comm,sizes,nlengths,&nreceives);
543: PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);
544: /* since clubbing row,col - lengths are multiplied by 2 */
545: for (i=0; i<nreceives; i++) olengths[i] *=2;
546: PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);
547: /* values are size 'bs2' lengths (and remove earlier factor 2 */
548: for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2;
549: PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);
550: PetscFree(onodes);
551: PetscFree(olengths);}
553: /* do sends:
554: 1) starts[i] gives the starting index in svalues for stuff going to
555: the ith processor
556: */
557: PetscMalloc2(bs2*stash->n,&svalues,2*(stash->n+1),&sindices);
558: PetscMalloc1(2*nsends,&send_waits);
559: PetscMalloc2(size,&startv,size,&starti);
560: /* use 2 sends the first with all_a, the next with all_i and all_j */
561: startv[0] = 0; starti[0] = 0;
562: for (i=1; i<size; i++) {
563: startv[i] = startv[i-1] + nlengths[i-1];
564: starti[i] = starti[i-1] + 2*nlengths[i-1];
565: }
567: i = 0;
568: space = stash->space_head;
569: while (space) {
570: space_next = space->next;
571: sp_idx = space->idx;
572: sp_idy = space->idy;
573: sp_val = space->val;
574: for (l=0; l<space->local_used; l++) {
575: j = owner[i];
576: if (bs2 == 1) {
577: svalues[startv[j]] = sp_val[l];
578: } else {
579: PetscInt k;
580: PetscScalar *buf1,*buf2;
581: buf1 = svalues+bs2*startv[j];
582: buf2 = space->val + bs2*l;
583: for (k=0; k<bs2; k++) buf1[k] = buf2[k];
584: }
585: sindices[starti[j]] = sp_idx[l];
586: sindices[starti[j]+nlengths[j]] = sp_idy[l];
587: startv[j]++;
588: starti[j]++;
589: i++;
590: }
591: space = space_next;
592: }
593: startv[0] = 0;
594: for (i=1; i<size; i++) startv[i] = startv[i-1] + nlengths[i-1];
596: for (i=0,count=0; i<size; i++) {
597: if (sizes[i]) {
598: MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);
599: MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_SCALAR,i,tag2,comm,send_waits+count++);
600: }
601: }
602: #if defined(PETSC_USE_INFO)
603: PetscInfo1(NULL,"No of messages: %d \n",nsends);
604: for (i=0; i<size; i++) {
605: if (sizes[i]) {
606: PetscInfo2(NULL,"Mesg_to: %d: size: %d bytes\n",i,nlengths[i]*(bs2*sizeof(PetscScalar)+2*sizeof(PetscInt)));
607: }
608: }
609: #endif
610: PetscFree(nlengths);
611: PetscFree(owner);
612: PetscFree2(startv,starti);
613: PetscFree(sizes);
615: /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
616: PetscMalloc1(2*nreceives,&recv_waits);
618: for (i=0; i<nreceives; i++) {
619: recv_waits[2*i] = recv_waits1[i];
620: recv_waits[2*i+1] = recv_waits2[i];
621: }
622: stash->recv_waits = recv_waits;
624: PetscFree(recv_waits1);
625: PetscFree(recv_waits2);
627: stash->svalues = svalues;
628: stash->sindices = sindices;
629: stash->rvalues = rvalues;
630: stash->rindices = rindices;
631: stash->send_waits = send_waits;
632: stash->nsends = nsends;
633: stash->nrecvs = nreceives;
634: stash->reproduce_count = 0;
635: return(0);
636: }
638: /*
639: MatStashScatterGetMesg_Private - This function waits on the receives posted
640: in the function MatStashScatterBegin_Private() and returns one message at
641: a time to the calling function. If no messages are left, it indicates this
642: by setting flg = 0, else it sets flg = 1.
644: Input Parameters:
645: stash - the stash
647: Output Parameters:
648: nvals - the number of entries in the current message.
649: rows - an array of row indices (or blocked indices) corresponding to the values
650: cols - an array of columnindices (or blocked indices) corresponding to the values
651: vals - the values
652: flg - 0 indicates no more message left, and the current call has no values associated.
653: 1 indicates that the current call successfully received a message, and the
654: other output parameters nvals,rows,cols,vals are set appropriately.
655: */
658: PetscErrorCode MatStashScatterGetMesg_Private(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt **cols,PetscScalar **vals,PetscInt *flg)659: {
663: (*stash->ScatterGetMesg)(stash,nvals,rows,cols,vals,flg);
664: return(0);
665: }
669: static PetscErrorCode MatStashScatterGetMesg_Ref(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt **cols,PetscScalar **vals,PetscInt *flg)670: {
672: PetscMPIInt i,*flg_v = stash->flg_v,i1,i2;
673: PetscInt bs2;
674: MPI_Status recv_status;
675: PetscBool match_found = PETSC_FALSE;
678: *flg = 0; /* When a message is discovered this is reset to 1 */
679: /* Return if no more messages to process */
680: if (stash->nprocessed == stash->nrecvs) return(0);
682: bs2 = stash->bs*stash->bs;
683: /* If a matching pair of receives are found, process them, and return the data to
684: the calling function. Until then keep receiving messages */
685: while (!match_found) {
686: if (stash->reproduce) {
687: i = stash->reproduce_count++;
688: MPI_Wait(stash->recv_waits+i,&recv_status);
689: } else {
690: MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);
691: }
692: if (recv_status.MPI_SOURCE < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Negative MPI source!");
694: /* Now pack the received message into a structure which is usable by others */
695: if (i % 2) {
696: MPI_Get_count(&recv_status,MPIU_SCALAR,nvals);
698: flg_v[2*recv_status.MPI_SOURCE] = i/2;
700: *nvals = *nvals/bs2;
701: } else {
702: MPI_Get_count(&recv_status,MPIU_INT,nvals);
704: flg_v[2*recv_status.MPI_SOURCE+1] = i/2;
706: *nvals = *nvals/2; /* This message has both row indices and col indices */
707: }
709: /* Check if we have both messages from this proc */
710: i1 = flg_v[2*recv_status.MPI_SOURCE];
711: i2 = flg_v[2*recv_status.MPI_SOURCE+1];
712: if (i1 != -1 && i2 != -1) {
713: *rows = stash->rindices[i2];
714: *cols = *rows + *nvals;
715: *vals = stash->rvalues[i1];
716: *flg = 1;
717: stash->nprocessed++;
718: match_found = PETSC_TRUE;
719: }
720: }
721: return(0);
722: }
724: typedef struct {
725: PetscInt row;
726: PetscInt col;
727: PetscScalar vals[1]; /* Actually an array of length bs2 */
728: } MatStashBlock;
732: static PetscErrorCode MatStashSortCompress_Private(MatStash *stash,InsertMode insertmode)733: {
735: PetscMatStashSpace space;
736: PetscInt n = stash->n,bs = stash->bs,bs2 = bs*bs,cnt,*row,*col,*perm,rowstart,i;
737: PetscScalar **valptr;
740: PetscMalloc4(n,&row,n,&col,n,&valptr,n,&perm);
741: for (space=stash->space_head,cnt=0; space; space=space->next) {
742: for (i=0; i<space->local_used; i++) {
743: row[cnt] = space->idx[i];
744: col[cnt] = space->idy[i];
745: valptr[cnt] = &space->val[i*bs2];
746: perm[cnt] = cnt; /* Will tell us where to find valptr after sorting row[] and col[] */
747: cnt++;
748: }
749: }
750: if (cnt != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MatStash n %D, but counted %D entries",n,cnt);
751: PetscSortIntWithArrayPair(n,row,col,perm);
752: /* Scan through the rows, sorting each one, combining duplicates, and packing send buffers */
753: for (rowstart=0,cnt=0,i=1; i<=n; i++) {
754: if (i == n || row[i] != row[rowstart]) { /* Sort the last row. */
755: PetscInt colstart;
756: PetscSortIntWithArray(i-rowstart,&col[rowstart],&perm[rowstart]);
757: for (colstart=rowstart; colstart<i; ) { /* Compress multiple insertions to the same location */
758: PetscInt j,l;
759: MatStashBlock *block;
760: PetscSegBufferGet(stash->segsendblocks,1,&block);
761: block->row = row[rowstart];
762: block->col = col[colstart];
763: PetscMemcpy(block->vals,valptr[perm[colstart]],bs2*sizeof(block->vals[0]));
764: for (j=colstart+1; j<i && col[j] == col[colstart]; j++) { /* Add any extra stashed blocks at the same (row,col) */
765: if (insertmode == ADD_VALUES) {
766: for (l=0; l<bs2; l++) block->vals[l] += valptr[perm[j]][l];
767: } else {
768: PetscMemcpy(block->vals,valptr[perm[j]],bs2*sizeof(block->vals[0]));
769: }
770: }
771: colstart = j;
772: }
773: rowstart = i;
774: }
775: }
776: PetscFree4(row,col,valptr,perm);
777: return(0);
778: }
782: static PetscErrorCode MatStashBlockTypeSetUp(MatStash *stash)783: {
787: if (stash->blocktype == MPI_DATATYPE_NULL) {
788: PetscInt bs2 = PetscSqr(stash->bs);
789: PetscMPIInt blocklens[2];
790: MPI_Aint displs[2];
791: MPI_Datatype types[2],stype;
792: /* C++ std::complex is not my favorite datatype. Since it is not POD, we cannot use offsetof to find the offset of
793: * vals. But the layout is actually guaranteed by the standard, so we do a little dance here with struct
794: * DummyBlock, substituting PetscReal for PetscComplex so that we can determine the offset.
795: */
796: struct DummyBlock {PetscInt row,col; PetscReal vals;};
798: stash->blocktype_size = offsetof(struct DummyBlock,vals) + bs2*sizeof(PetscScalar);
799: if (stash->blocktype_size % sizeof(PetscInt)) { /* Implies that PetscInt is larger and does not satisfy alignment without padding */
800: stash->blocktype_size += sizeof(PetscInt) - stash->blocktype_size % sizeof(PetscInt);
801: }
802: PetscSegBufferCreate(stash->blocktype_size,1,&stash->segsendblocks);
803: PetscSegBufferCreate(stash->blocktype_size,1,&stash->segrecvblocks);
804: PetscSegBufferCreate(sizeof(MatStashFrame),1,&stash->segrecvframe);
805: blocklens[0] = 2;
806: blocklens[1] = bs2;
807: displs[0] = offsetof(struct DummyBlock,row);
808: displs[1] = offsetof(struct DummyBlock,vals);
809: types[0] = MPIU_INT;
810: types[1] = MPIU_SCALAR;
811: MPI_Type_create_struct(2,blocklens,displs,types,&stype);
812: MPI_Type_commit(&stype);
813: MPI_Type_create_resized(stype,0,stash->blocktype_size,&stash->blocktype); /* MPI-2 */
814: MPI_Type_commit(&stash->blocktype);
815: MPI_Type_free(&stype);
816: }
817: return(0);
818: }
822: /* Callback invoked after target rank has initiatied receive of rendezvous message.
823: * Here we post the main sends.
824: */
825: static PetscErrorCode MatStashBTSSend_Private(MPI_Comm comm,const PetscMPIInt tag[],PetscMPIInt rankid,PetscMPIInt rank,void *sdata,MPI_Request req[],void *ctx)826: {
827: MatStash *stash = (MatStash*)ctx;
828: MatStashHeader *hdr = (MatStashHeader*)sdata;
832: if (rank != stash->sendranks[rankid]) SETERRQ3(comm,PETSC_ERR_PLIB,"BTS Send rank %d does not match sendranks[%d] %d",rank,rankid,stash->sendranks[rankid]);
833: MPI_Isend(stash->sendframes[rankid].buffer,hdr->count,stash->blocktype,rank,tag[0],comm,&req[0]);
834: stash->sendframes[rankid].count = hdr->count;
835: stash->sendframes[rankid].pending = 1;
836: return(0);
837: }
841: /* Callback invoked by target after receiving rendezvous message.
842: * Here we post the main recvs.
843: */
844: static PetscErrorCode MatStashBTSRecv_Private(MPI_Comm comm,const PetscMPIInt tag[],PetscMPIInt rank,void *rdata,MPI_Request req[],void *ctx)845: {
846: MatStash *stash = (MatStash*)ctx;
847: MatStashHeader *hdr = (MatStashHeader*)rdata;
848: MatStashFrame *frame;
852: PetscSegBufferGet(stash->segrecvframe,1,&frame);
853: PetscSegBufferGet(stash->segrecvblocks,hdr->count,&frame->buffer);
854: MPI_Irecv(frame->buffer,hdr->count,stash->blocktype,rank,tag[0],comm,&req[0]);
855: frame->count = hdr->count;
856: frame->pending = 1;
857: return(0);
858: }
862: /*
863: * owners[] contains the ownership ranges; may be indexed by either blocks or scalars
864: */
865: static PetscErrorCode MatStashScatterBegin_BTS(Mat mat,MatStash *stash,PetscInt owners[])866: {
868: size_t nblocks;
869: char *sendblocks;
872: #if defined(PETSC_USE_DEBUG)
873: { /* make sure all processors are either in INSERTMODE or ADDMODE */
874: InsertMode addv;
875: MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));
876: if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
877: }
878: #endif
880: if (stash->subset_off_proc && !mat->subsetoffprocentries) { /* We won't use the old scatter context. */
881: MatStashScatterDestroy_BTS(stash);
882: }
884: MatStashBlockTypeSetUp(stash);
885: MatStashSortCompress_Private(stash,mat->insertmode);
886: PetscSegBufferGetSize(stash->segsendblocks,&nblocks);
887: PetscSegBufferExtractInPlace(stash->segsendblocks,&sendblocks);
888: if (stash->subset_off_proc && mat->subsetoffprocentries) { /* Set up sendhdrs and sendframes for each rank that we sent before */
889: PetscInt i;
890: size_t b;
891: for (i=0,b=0; i<stash->nsendranks; i++) {
892: stash->sendframes[i].buffer = &sendblocks[b*stash->blocktype_size];
893: /* sendhdr is never actually sent, but the count is used by MatStashBTSSend_Private */
894: stash->sendhdr[i].count = 0; /* Might remain empty (in which case we send a zero-sized message) if no values are communicated to that process */
895: for ( ; b<nblocks; b++) {
896: MatStashBlock *sendblock_b = (MatStashBlock*)&sendblocks[b*stash->blocktype_size];
897: if (PetscUnlikely(sendblock_b->row < owners[stash->sendranks[i]])) SETERRQ2(stash->comm,PETSC_ERR_ARG_WRONG,"MAT_SUBSET_OFF_PROC_ENTRIES set, but row %D owned by %d not communicated in initial assembly",sendblock_b->row,stash->sendranks[i]);
898: if (sendblock_b->row >= owners[stash->sendranks[i]+1]) break;
899: stash->sendhdr[i].count++;
900: }
901: }
902: } else { /* Dynamically count and pack (first time) */
903: PetscInt sendno;
904: size_t i,rowstart;
906: /* Count number of send ranks and allocate for sends */
907: stash->nsendranks = 0;
908: for (rowstart=0; rowstart<nblocks; ) {
909: PetscInt owner;
910: MatStashBlock *sendblock_rowstart = (MatStashBlock*)&sendblocks[rowstart*stash->blocktype_size];
911: PetscFindInt(sendblock_rowstart->row,stash->size+1,owners,&owner);
912: if (owner < 0) owner = -(owner+2);
913: for (i=rowstart+1; i<nblocks; i++) { /* Move forward through a run of blocks with the same owner */
914: MatStashBlock *sendblock_i = (MatStashBlock*)&sendblocks[i*stash->blocktype_size];
915: if (sendblock_i->row >= owners[owner+1]) break;
916: }
917: stash->nsendranks++;
918: rowstart = i;
919: }
920: PetscMalloc3(stash->nsendranks,&stash->sendranks,stash->nsendranks,&stash->sendhdr,stash->nsendranks,&stash->sendframes);
922: /* Set up sendhdrs and sendframes */
923: sendno = 0;
924: for (rowstart=0; rowstart<nblocks; ) {
925: PetscInt owner;
926: MatStashBlock *sendblock_rowstart = (MatStashBlock*)&sendblocks[rowstart*stash->blocktype_size];
927: PetscFindInt(sendblock_rowstart->row,stash->size+1,owners,&owner);
928: if (owner < 0) owner = -(owner+2);
929: stash->sendranks[sendno] = owner;
930: for (i=rowstart+1; i<nblocks; i++) { /* Move forward through a run of blocks with the same owner */
931: MatStashBlock *sendblock_i = (MatStashBlock*)&sendblocks[i*stash->blocktype_size];
932: if (sendblock_i->row >= owners[owner+1]) break;
933: }
934: stash->sendframes[sendno].buffer = sendblock_rowstart;
935: stash->sendframes[sendno].pending = 0;
936: stash->sendhdr[sendno].count = i - rowstart;
937: sendno++;
938: rowstart = i;
939: }
940: if (sendno != stash->nsendranks) SETERRQ2(stash->comm,PETSC_ERR_PLIB,"BTS counted %D sendranks, but %D sends",stash->nsendranks,sendno);
941: }
943: /* Encode insertmode on the outgoing messages. If we want to support more than two options, we would need a new
944: * message or a dummy entry of some sort. */
945: if (mat->insertmode == INSERT_VALUES) {
946: size_t i;
947: for (i=0; i<nblocks; i++) {
948: MatStashBlock *sendblock_i = (MatStashBlock*)&sendblocks[i*stash->blocktype_size];
949: sendblock_i->row = -(sendblock_i->row+1);
950: }
951: }
953: if (stash->subset_off_proc && mat->subsetoffprocentries) {
954: PetscMPIInt i,tag;
955: PetscCommGetNewTag(stash->comm,&tag);
956: for (i=0; i<stash->nrecvranks; i++) {
957: MatStashBTSRecv_Private(stash->comm,&tag,stash->recvranks[i],&stash->recvhdr[i],&stash->recvreqs[i],stash);
958: }
959: for (i=0; i<stash->nsendranks; i++) {
960: MatStashBTSSend_Private(stash->comm,&tag,i,stash->sendranks[i],&stash->sendhdr[i],&stash->sendreqs[i],stash);
961: }
962: stash->use_status = PETSC_TRUE; /* Use count from message status. */
963: } else {
964: PetscCommBuildTwoSidedFReq(stash->comm,1,MPIU_INT,stash->nsendranks,stash->sendranks,(PetscInt*)stash->sendhdr,
965: &stash->nrecvranks,&stash->recvranks,(PetscInt*)&stash->recvhdr,1,&stash->sendreqs,&stash->recvreqs,
966: MatStashBTSSend_Private,MatStashBTSRecv_Private,stash);
967: PetscMalloc2(stash->nrecvranks,&stash->some_indices,stash->nrecvranks,&stash->some_statuses);
968: stash->use_status = PETSC_FALSE; /* Use count from header instead of from message. */
969: }
971: PetscSegBufferExtractInPlace(stash->segrecvframe,&stash->recvframes);
972: stash->recvframe_active = NULL;
973: stash->recvframe_i = 0;
974: stash->some_i = 0;
975: stash->some_count = 0;
976: stash->recvcount = 0;
977: stash->subset_off_proc = mat->subsetoffprocentries;
978: stash->insertmode = &mat->insertmode;
979: return(0);
980: }
984: static PetscErrorCode MatStashScatterGetMesg_BTS(MatStash *stash,PetscMPIInt *n,PetscInt **row,PetscInt **col,PetscScalar **val,PetscInt *flg)985: {
987: MatStashBlock *block;
990: *flg = 0;
991: while (!stash->recvframe_active || stash->recvframe_i == stash->recvframe_count) {
992: if (stash->some_i == stash->some_count) {
993: if (stash->recvcount == stash->nrecvranks) return(0); /* Done */
994: MPI_Waitsome(stash->nrecvranks,stash->recvreqs,&stash->some_count,stash->some_indices,stash->use_status?stash->some_statuses:MPI_STATUSES_IGNORE);
995: stash->some_i = 0;
996: }
997: stash->recvframe_active = &stash->recvframes[stash->some_indices[stash->some_i]];
998: stash->recvframe_count = stash->recvframe_active->count; /* From header; maximum count */
999: if (stash->use_status) { /* Count what was actually sent */
1000: MPI_Get_count(&stash->some_statuses[stash->some_i],stash->blocktype,&stash->recvframe_count);
1001: }
1002: if (stash->recvframe_count > 0) { /* Check for InsertMode consistency */
1003: block = (MatStashBlock*)&((char*)stash->recvframe_active->buffer)[0];
1004: if (PetscUnlikely(*stash->insertmode == NOT_SET_VALUES)) *stash->insertmode = block->row < 0 ? INSERT_VALUES : ADD_VALUES;
1005: if (PetscUnlikely(*stash->insertmode == INSERT_VALUES && block->row >= 0)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Assembling INSERT_VALUES, but rank %d requested ADD_VALUES",stash->recvranks[stash->some_indices[stash->some_i]]);
1006: if (PetscUnlikely(*stash->insertmode == ADD_VALUES && block->row < 0)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Assembling ADD_VALUES, but rank %d requested INSERT_VALUES",stash->recvranks[stash->some_indices[stash->some_i]]);
1007: }
1008: stash->some_i++;
1009: stash->recvcount++;
1010: stash->recvframe_i = 0;
1011: }
1012: *n = 1;
1013: block = (MatStashBlock*)&((char*)stash->recvframe_active->buffer)[stash->recvframe_i*stash->blocktype_size];
1014: if (block->row < 0) block->row = -(block->row + 1);
1015: *row = &block->row;
1016: *col = &block->col;
1017: *val = block->vals;
1018: stash->recvframe_i++;
1019: *flg = 1;
1020: return(0);
1021: }
1025: static PetscErrorCode MatStashScatterEnd_BTS(MatStash *stash)1026: {
1030: MPI_Waitall(stash->nsendranks,stash->sendreqs,MPI_STATUSES_IGNORE);
1031: if (stash->subset_off_proc) { /* Reuse the communication contexts, so consolidate and reset segrecvblocks */
1032: void *dummy;
1033: PetscSegBufferExtractInPlace(stash->segrecvblocks,&dummy);
1034: } else { /* No reuse, so collect everything. */
1035: MatStashScatterDestroy_BTS(stash);
1036: }
1038: /* Now update nmaxold to be app 10% more than max n used, this way the
1039: wastage of space is reduced the next time this stash is used.
1040: Also update the oldmax, only if it increases */
1041: if (stash->n) {
1042: PetscInt bs2 = stash->bs*stash->bs;
1043: PetscInt oldnmax = ((int)(stash->n * 1.1) + 5)*bs2;
1044: if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
1045: }
1047: stash->nmax = 0;
1048: stash->n = 0;
1049: stash->reallocs = -1;
1050: stash->nprocessed = 0;
1052: PetscMatStashSpaceDestroy(&stash->space_head);
1054: stash->space = 0;
1056: return(0);
1057: }
1061: static PetscErrorCode MatStashScatterDestroy_BTS(MatStash *stash)1062: {
1066: PetscSegBufferDestroy(&stash->segsendblocks);
1067: PetscSegBufferDestroy(&stash->segrecvframe);
1068: stash->recvframes = NULL;
1069: PetscSegBufferDestroy(&stash->segrecvblocks);
1070: if (stash->blocktype != MPI_DATATYPE_NULL) {
1071: MPI_Type_free(&stash->blocktype);
1072: }
1073: stash->nsendranks = 0;
1074: stash->nrecvranks = 0;
1075: PetscFree3(stash->sendranks,stash->sendhdr,stash->sendframes);
1076: PetscFree(stash->sendreqs);
1077: PetscFree(stash->recvreqs);
1078: PetscFree(stash->recvranks);
1079: PetscFree(stash->recvhdr);
1080: PetscFree2(stash->some_indices,stash->some_statuses);
1081: return(0);
1082: }