2: #include <petsc-private/matimpl.h>
4: #define DEFAULT_STASH_SIZE 10000 6: /*
7: MatStashCreate_Private - Creates a stash,currently used for all the parallel
8: matrix implementations. The stash is where elements of a matrix destined
9: to be stored on other processors are kept until matrix assembly is done.
11: This is a simple minded stash. Simply adds entries to end of stash.
13: Input Parameters:
14: comm - communicator, required for scatters.
15: bs - stash block size. used when stashing blocks of values
17: Output Parameters:
18: stash - the newly created stash
19: */
22: PetscErrorCode MatStashCreate_Private(MPI_Comm comm,PetscInt bs,MatStash *stash) 23: {
25: PetscInt max,*opt,nopt,i;
26: PetscBool flg;
29: /* Require 2 tags,get the second using PetscCommGetNewTag() */
30: stash->comm = comm;
32: PetscCommGetNewTag(stash->comm,&stash->tag1);
33: PetscCommGetNewTag(stash->comm,&stash->tag2);
34: MPI_Comm_size(stash->comm,&stash->size);
35: MPI_Comm_rank(stash->comm,&stash->rank);
36: PetscMalloc(2*stash->size*sizeof(PetscMPIInt),&stash->flg_v);
37: for (i=0; i<2*stash->size; i++) stash->flg_v[i] = -1;
40: nopt = stash->size;
41: PetscMalloc(nopt*sizeof(PetscInt),&opt);
42: PetscOptionsGetIntArray(NULL,"-matstash_initial_size",opt,&nopt,&flg);
43: if (flg) {
44: if (nopt == 1) max = opt[0];
45: else if (nopt == stash->size) max = opt[stash->rank];
46: else if (stash->rank < nopt) max = opt[stash->rank];
47: else max = 0; /* Use default */
48: stash->umax = max;
49: } else {
50: stash->umax = 0;
51: }
52: PetscFree(opt);
53: if (bs <= 0) bs = 1;
55: stash->bs = bs;
56: stash->nmax = 0;
57: stash->oldnmax = 0;
58: stash->n = 0;
59: stash->reallocs = -1;
60: stash->space_head = 0;
61: stash->space = 0;
63: stash->send_waits = 0;
64: stash->recv_waits = 0;
65: stash->send_status = 0;
66: stash->nsends = 0;
67: stash->nrecvs = 0;
68: stash->svalues = 0;
69: stash->rvalues = 0;
70: stash->rindices = 0;
71: stash->nprocessed = 0;
72: stash->reproduce = PETSC_FALSE;
74: PetscOptionsGetBool(NULL,"-matstash_reproduce",&stash->reproduce,NULL);
75: return(0);
76: }
78: /*
79: MatStashDestroy_Private - Destroy the stash
80: */
83: PetscErrorCode MatStashDestroy_Private(MatStash *stash) 84: {
88: PetscMatStashSpaceDestroy(&stash->space_head);
90: stash->space = 0;
92: PetscFree(stash->flg_v);
93: return(0);
94: }
96: /*
97: MatStashScatterEnd_Private - This is called as the final stage of
98: scatter. The final stages of message passing is done here, and
99: all the memory used for message passing is cleaned up. This
100: routine also resets the stash, and deallocates the memory used
101: for the stash. It also keeps track of the current memory usage
102: so that the same value can be used the next time through.
103: */
106: PetscErrorCode MatStashScatterEnd_Private(MatStash *stash)107: {
109: PetscInt nsends=stash->nsends,bs2,oldnmax,i;
110: MPI_Status *send_status;
113: for (i=0; i<2*stash->size; i++) stash->flg_v[i] = -1;
114: /* wait on sends */
115: if (nsends) {
116: PetscMalloc(2*nsends*sizeof(MPI_Status),&send_status);
117: MPI_Waitall(2*nsends,stash->send_waits,send_status);
118: PetscFree(send_status);
119: }
121: /* Now update nmaxold to be app 10% more than max n used, this way the
122: wastage of space is reduced the next time this stash is used.
123: Also update the oldmax, only if it increases */
124: if (stash->n) {
125: bs2 = stash->bs*stash->bs;
126: oldnmax = ((int)(stash->n * 1.1) + 5)*bs2;
127: if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
128: }
130: stash->nmax = 0;
131: stash->n = 0;
132: stash->reallocs = -1;
133: stash->nprocessed = 0;
135: PetscMatStashSpaceDestroy(&stash->space_head);
137: stash->space = 0;
139: PetscFree(stash->send_waits);
140: PetscFree(stash->recv_waits);
141: PetscFree2(stash->svalues,stash->sindices);
142: PetscFree(stash->rvalues[0]);
143: PetscFree(stash->rvalues);
144: PetscFree(stash->rindices[0]);
145: PetscFree(stash->rindices);
146: return(0);
147: }
149: /*
150: MatStashGetInfo_Private - Gets the relavant statistics of the stash
152: Input Parameters:
153: stash - the stash
154: nstash - the size of the stash. Indicates the number of values stored.
155: reallocs - the number of additional mallocs incurred.
157: */
160: PetscErrorCode MatStashGetInfo_Private(MatStash *stash,PetscInt *nstash,PetscInt *reallocs)161: {
162: PetscInt bs2 = stash->bs*stash->bs;
165: if (nstash) *nstash = stash->n*bs2;
166: if (reallocs) {
167: if (stash->reallocs < 0) *reallocs = 0;
168: else *reallocs = stash->reallocs;
169: }
170: return(0);
171: }
173: /*
174: MatStashSetInitialSize_Private - Sets the initial size of the stash
176: Input Parameters:
177: stash - the stash
178: max - the value that is used as the max size of the stash.
179: this value is used while allocating memory.
180: */
183: PetscErrorCode MatStashSetInitialSize_Private(MatStash *stash,PetscInt max)184: {
186: stash->umax = max;
187: return(0);
188: }
190: /* MatStashExpand_Private - Expand the stash. This function is called
191: when the space in the stash is not sufficient to add the new values
192: being inserted into the stash.
194: Input Parameters:
195: stash - the stash
196: incr - the minimum increase requested
198: Notes:
199: This routine doubles the currently used memory.
200: */
203: static PetscErrorCode MatStashExpand_Private(MatStash *stash,PetscInt incr)204: {
206: PetscInt newnmax,bs2= stash->bs*stash->bs;
209: /* allocate a larger stash */
210: if (!stash->oldnmax && !stash->nmax) { /* new stash */
211: if (stash->umax) newnmax = stash->umax/bs2;
212: else newnmax = DEFAULT_STASH_SIZE/bs2;
213: } else if (!stash->nmax) { /* resuing stash */
214: if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs2;
215: else newnmax = stash->oldnmax/bs2;
216: } else newnmax = stash->nmax*2;
217: if (newnmax < (stash->nmax + incr)) newnmax += 2*incr;
219: /* Get a MatStashSpace and attach it to stash */
220: PetscMatStashSpaceGet(bs2,newnmax,&stash->space);
221: if (!stash->space_head) { /* new stash or resuing stash->oldnmax */
222: stash->space_head = stash->space;
223: }
225: stash->reallocs++;
226: stash->nmax = newnmax;
227: return(0);
228: }
229: /*
230: MatStashValuesRow_Private - inserts values into the stash. This function
231: expects the values to be roworiented. Multiple columns belong to the same row
232: can be inserted with a single call to this function.
234: Input Parameters:
235: stash - the stash
236: row - the global row correspoiding to the values
237: n - the number of elements inserted. All elements belong to the above row.
238: idxn - the global column indices corresponding to each of the values.
239: values - the values inserted
240: */
243: PetscErrorCode MatStashValuesRow_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscBool ignorezeroentries)244: {
245: PetscErrorCode ierr;
246: PetscInt i,k,cnt = 0;
247: PetscMatStashSpace space=stash->space;
250: /* Check and see if we have sufficient memory */
251: if (!space || space->local_remaining < n) {
252: MatStashExpand_Private(stash,n);
253: }
254: space = stash->space;
255: k = space->local_used;
256: for (i=0; i<n; i++) {
257: if (ignorezeroentries && (values[i] == 0.0)) continue;
258: space->idx[k] = row;
259: space->idy[k] = idxn[i];
260: space->val[k] = values[i];
261: k++;
262: cnt++;
263: }
264: stash->n += cnt;
265: space->local_used += cnt;
266: space->local_remaining -= cnt;
267: return(0);
268: }
270: /*
271: MatStashValuesCol_Private - inserts values into the stash. This function
272: expects the values to be columnoriented. Multiple columns belong to the same row
273: can be inserted with a single call to this function.
275: Input Parameters:
276: stash - the stash
277: row - the global row correspoiding to the values
278: n - the number of elements inserted. All elements belong to the above row.
279: idxn - the global column indices corresponding to each of the values.
280: values - the values inserted
281: stepval - the consecutive values are sepated by a distance of stepval.
282: this happens because the input is columnoriented.
283: */
286: PetscErrorCode MatStashValuesCol_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt stepval,PetscBool ignorezeroentries)287: {
288: PetscErrorCode ierr;
289: PetscInt i,k,cnt = 0;
290: PetscMatStashSpace space=stash->space;
293: /* Check and see if we have sufficient memory */
294: if (!space || space->local_remaining < n) {
295: MatStashExpand_Private(stash,n);
296: }
297: space = stash->space;
298: k = space->local_used;
299: for (i=0; i<n; i++) {
300: if (ignorezeroentries && (values[i*stepval] == 0.0)) continue;
301: space->idx[k] = row;
302: space->idy[k] = idxn[i];
303: space->val[k] = values[i*stepval];
304: k++;
305: cnt++;
306: }
307: stash->n += cnt;
308: space->local_used += cnt;
309: space->local_remaining -= cnt;
310: return(0);
311: }
313: /*
314: MatStashValuesRowBlocked_Private - inserts blocks of values into the stash.
315: This function expects the values to be roworiented. Multiple columns belong
316: to the same block-row can be inserted with a single call to this function.
317: This function extracts the sub-block of values based on the dimensions of
318: the original input block, and the row,col values corresponding to the blocks.
320: Input Parameters:
321: stash - the stash
322: row - the global block-row correspoiding to the values
323: n - the number of elements inserted. All elements belong to the above row.
324: idxn - the global block-column indices corresponding to each of the blocks of
325: values. Each block is of size bs*bs.
326: values - the values inserted
327: rmax - the number of block-rows in the original block.
328: cmax - the number of block-columsn on the original block.
329: idx - the index of the current block-row in the original block.
330: */
333: PetscErrorCode MatStashValuesRowBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)334: {
335: PetscErrorCode ierr;
336: PetscInt i,j,k,bs2,bs=stash->bs,l;
337: const PetscScalar *vals;
338: PetscScalar *array;
339: PetscMatStashSpace space=stash->space;
342: if (!space || space->local_remaining < n) {
343: MatStashExpand_Private(stash,n);
344: }
345: space = stash->space;
346: l = space->local_used;
347: bs2 = bs*bs;
348: for (i=0; i<n; i++) {
349: space->idx[l] = row;
350: space->idy[l] = idxn[i];
351: /* Now copy over the block of values. Store the values column oriented.
352: This enables inserting multiple blocks belonging to a row with a single
353: funtion call */
354: array = space->val + bs2*l;
355: vals = values + idx*bs2*n + bs*i;
356: for (j=0; j<bs; j++) {
357: for (k=0; k<bs; k++) array[k*bs] = vals[k];
358: array++;
359: vals += cmax*bs;
360: }
361: l++;
362: }
363: stash->n += n;
364: space->local_used += n;
365: space->local_remaining -= n;
366: return(0);
367: }
369: /*
370: MatStashValuesColBlocked_Private - inserts blocks of values into the stash.
371: This function expects the values to be roworiented. Multiple columns belong
372: to the same block-row can be inserted with a single call to this function.
373: This function extracts the sub-block of values based on the dimensions of
374: the original input block, and the row,col values corresponding to the blocks.
376: Input Parameters:
377: stash - the stash
378: row - the global block-row correspoiding to the values
379: n - the number of elements inserted. All elements belong to the above row.
380: idxn - the global block-column indices corresponding to each of the blocks of
381: values. Each block is of size bs*bs.
382: values - the values inserted
383: rmax - the number of block-rows in the original block.
384: cmax - the number of block-columsn on the original block.
385: idx - the index of the current block-row in the original block.
386: */
389: PetscErrorCode MatStashValuesColBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)390: {
391: PetscErrorCode ierr;
392: PetscInt i,j,k,bs2,bs=stash->bs,l;
393: const PetscScalar *vals;
394: PetscScalar *array;
395: PetscMatStashSpace space=stash->space;
398: if (!space || space->local_remaining < n) {
399: MatStashExpand_Private(stash,n);
400: }
401: space = stash->space;
402: l = space->local_used;
403: bs2 = bs*bs;
404: for (i=0; i<n; i++) {
405: space->idx[l] = row;
406: space->idy[l] = idxn[i];
407: /* Now copy over the block of values. Store the values column oriented.
408: This enables inserting multiple blocks belonging to a row with a single
409: funtion call */
410: array = space->val + bs2*l;
411: vals = values + idx*bs2*n + bs*i;
412: for (j=0; j<bs; j++) {
413: for (k=0; k<bs; k++) array[k] = vals[k];
414: array += bs;
415: vals += rmax*bs;
416: }
417: l++;
418: }
419: stash->n += n;
420: space->local_used += n;
421: space->local_remaining -= n;
422: return(0);
423: }
424: /*
425: MatStashScatterBegin_Private - Initiates the transfer of values to the
426: correct owners. This function goes through the stash, and check the
427: owners of each stashed value, and sends the values off to the owner
428: processors.
430: Input Parameters:
431: stash - the stash
432: owners - an array of size 'no-of-procs' which gives the ownership range
433: for each node.
435: Notes: The 'owners' array in the cased of the blocked-stash has the
436: ranges specified blocked global indices, and for the regular stash in
437: the proper global indices.
438: */
441: PetscErrorCode MatStashScatterBegin_Private(Mat mat,MatStash *stash,PetscInt *owners)442: {
443: PetscInt *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2;
444: PetscInt size=stash->size,nsends;
445: PetscErrorCode ierr;
446: PetscInt count,*sindices,**rindices,i,j,idx,lastidx,l;
447: PetscScalar **rvalues,*svalues;
448: MPI_Comm comm = stash->comm;
449: MPI_Request *send_waits,*recv_waits,*recv_waits1,*recv_waits2;
450: PetscMPIInt *nprocs,*nlengths,nreceives;
451: PetscInt *sp_idx,*sp_idy;
452: PetscScalar *sp_val;
453: PetscMatStashSpace space,space_next;
456: bs2 = stash->bs*stash->bs;
458: /* first count number of contributors to each processor */
459: PetscMalloc(size*sizeof(PetscMPIInt),&nprocs);
460: PetscMemzero(nprocs,size*sizeof(PetscMPIInt));
461: PetscMalloc(size*sizeof(PetscMPIInt),&nlengths);
462: PetscMemzero(nlengths,size*sizeof(PetscMPIInt));
463: PetscMalloc((stash->n+1)*sizeof(PetscInt),&owner);
465: i = j = 0;
466: lastidx = -1;
467: space = stash->space_head;
468: while (space != NULL) {
469: space_next = space->next;
470: sp_idx = space->idx;
471: for (l=0; l<space->local_used; l++) {
472: /* if indices are NOT locally sorted, need to start search at the beginning */
473: if (lastidx > (idx = sp_idx[l])) j = 0;
474: lastidx = idx;
475: for (; j<size; j++) {
476: if (idx >= owners[j] && idx < owners[j+1]) {
477: nlengths[j]++; owner[i] = j; break;
478: }
479: }
480: i++;
481: }
482: space = space_next;
483: }
484: /* Now check what procs get messages - and compute nsends. */
485: for (i=0, nsends=0; i<size; i++) {
486: if (nlengths[i]) {
487: nprocs[i] = 1; nsends++;
488: }
489: }
491: {PetscMPIInt *onodes,*olengths;
492: /* Determine the number of messages to expect, their lengths, from from-ids */
493: PetscGatherNumberOfMessages(comm,nprocs,nlengths,&nreceives);
494: PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);
495: /* since clubbing row,col - lengths are multiplied by 2 */
496: for (i=0; i<nreceives; i++) olengths[i] *=2;
497: PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);
498: /* values are size 'bs2' lengths (and remove earlier factor 2 */
499: for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2;
500: PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);
501: PetscFree(onodes);
502: PetscFree(olengths);}
504: /* do sends:
505: 1) starts[i] gives the starting index in svalues for stuff going to
506: the ith processor
507: */
508: PetscMalloc2(bs2*stash->n,PetscScalar,&svalues,2*(stash->n+1),PetscInt,&sindices);
509: PetscMalloc(2*nsends*sizeof(MPI_Request),&send_waits);
510: PetscMalloc2(size,PetscInt,&startv,size,PetscInt,&starti);
511: /* use 2 sends the first with all_a, the next with all_i and all_j */
512: startv[0] = 0; starti[0] = 0;
513: for (i=1; i<size; i++) {
514: startv[i] = startv[i-1] + nlengths[i-1];
515: starti[i] = starti[i-1] + 2*nlengths[i-1];
516: }
518: i = 0;
519: space = stash->space_head;
520: while (space != NULL) {
521: space_next = space->next;
522: sp_idx = space->idx;
523: sp_idy = space->idy;
524: sp_val = space->val;
525: for (l=0; l<space->local_used; l++) {
526: j = owner[i];
527: if (bs2 == 1) {
528: svalues[startv[j]] = sp_val[l];
529: } else {
530: PetscInt k;
531: PetscScalar *buf1,*buf2;
532: buf1 = svalues+bs2*startv[j];
533: buf2 = space->val + bs2*l;
534: for (k=0; k<bs2; k++) buf1[k] = buf2[k];
535: }
536: sindices[starti[j]] = sp_idx[l];
537: sindices[starti[j]+nlengths[j]] = sp_idy[l];
538: startv[j]++;
539: starti[j]++;
540: i++;
541: }
542: space = space_next;
543: }
544: startv[0] = 0;
545: for (i=1; i<size; i++) startv[i] = startv[i-1] + nlengths[i-1];
547: for (i=0,count=0; i<size; i++) {
548: if (nprocs[i]) {
549: MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);
550: MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_SCALAR,i,tag2,comm,send_waits+count++);
551: }
552: }
553: #if defined(PETSC_USE_INFO)
554: PetscInfo1(NULL,"No of messages: %d \n",nsends);
555: for (i=0; i<size; i++) {
556: if (nprocs[i]) {
557: PetscInfo2(NULL,"Mesg_to: %d: size: %d bytes\n",i,nlengths[i]*(bs2*sizeof(PetscScalar)+2*sizeof(PetscInt)));
558: }
559: }
560: #endif
561: PetscFree(nlengths);
562: PetscFree(owner);
563: PetscFree2(startv,starti);
564: PetscFree(nprocs);
566: /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
567: PetscMalloc(2*nreceives*sizeof(MPI_Request),&recv_waits);
569: for (i=0; i<nreceives; i++) {
570: recv_waits[2*i] = recv_waits1[i];
571: recv_waits[2*i+1] = recv_waits2[i];
572: }
573: stash->recv_waits = recv_waits;
575: PetscFree(recv_waits1);
576: PetscFree(recv_waits2);
578: stash->svalues = svalues;
579: stash->sindices = sindices;
580: stash->rvalues = rvalues;
581: stash->rindices = rindices;
582: stash->send_waits = send_waits;
583: stash->nsends = nsends;
584: stash->nrecvs = nreceives;
585: stash->reproduce_count = 0;
586: return(0);
587: }
589: /*
590: MatStashScatterGetMesg_Private - This function waits on the receives posted
591: in the function MatStashScatterBegin_Private() and returns one message at
592: a time to the calling function. If no messages are left, it indicates this
593: by setting flg = 0, else it sets flg = 1.
595: Input Parameters:
596: stash - the stash
598: Output Parameters:
599: nvals - the number of entries in the current message.
600: rows - an array of row indices (or blocked indices) corresponding to the values
601: cols - an array of columnindices (or blocked indices) corresponding to the values
602: vals - the values
603: flg - 0 indicates no more message left, and the current call has no values associated.
604: 1 indicates that the current call successfully received a message, and the
605: other output parameters nvals,rows,cols,vals are set appropriately.
606: */
609: PetscErrorCode MatStashScatterGetMesg_Private(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt **cols,PetscScalar **vals,PetscInt *flg)610: {
612: PetscMPIInt i,*flg_v = stash->flg_v,i1,i2;
613: PetscInt bs2;
614: MPI_Status recv_status;
615: PetscBool match_found = PETSC_FALSE;
618: *flg = 0; /* When a message is discovered this is reset to 1 */
619: /* Return if no more messages to process */
620: if (stash->nprocessed == stash->nrecvs) return(0);
622: bs2 = stash->bs*stash->bs;
623: /* If a matching pair of receives are found, process them, and return the data to
624: the calling function. Until then keep receiving messages */
625: while (!match_found) {
626: if (stash->reproduce) {
627: i = stash->reproduce_count++;
628: MPI_Wait(stash->recv_waits+i,&recv_status);
629: } else {
630: MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);
631: }
632: if (recv_status.MPI_SOURCE < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Negative MPI source!");
634: /* Now pack the received message into a structure which is usable by others */
635: if (i % 2) {
636: MPI_Get_count(&recv_status,MPIU_SCALAR,nvals);
638: flg_v[2*recv_status.MPI_SOURCE] = i/2;
640: *nvals = *nvals/bs2;
641: } else {
642: MPI_Get_count(&recv_status,MPIU_INT,nvals);
644: flg_v[2*recv_status.MPI_SOURCE+1] = i/2;
646: *nvals = *nvals/2; /* This message has both row indices and col indices */
647: }
649: /* Check if we have both messages from this proc */
650: i1 = flg_v[2*recv_status.MPI_SOURCE];
651: i2 = flg_v[2*recv_status.MPI_SOURCE+1];
652: if (i1 != -1 && i2 != -1) {
653: *rows = stash->rindices[i2];
654: *cols = *rows + *nvals;
655: *vals = stash->rvalues[i1];
656: *flg = 1;
657: stash->nprocessed++;
658: match_found = PETSC_TRUE;
659: }
660: }
661: return(0);
662: }