Actual source code: matstash.c

petsc-3.7.3 2016-08-01
Report Typos and Errors
  2: #include <petsc/private/matimpl.h>

  4: #define DEFAULT_STASH_SIZE   10000

  6: static PetscErrorCode MatStashScatterBegin_Ref(Mat,MatStash*,PetscInt*);
  7: static PetscErrorCode MatStashScatterGetMesg_Ref(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*);
  8: static PetscErrorCode MatStashScatterEnd_Ref(MatStash*);
  9: static PetscErrorCode MatStashScatterBegin_BTS(Mat,MatStash*,PetscInt*);
 10: static PetscErrorCode MatStashScatterGetMesg_BTS(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*);
 11: static PetscErrorCode MatStashScatterEnd_BTS(MatStash*);
 12: static PetscErrorCode MatStashScatterDestroy_BTS(MatStash*);

 14: /*
 15:   MatStashCreate_Private - Creates a stash,currently used for all the parallel
 16:   matrix implementations. The stash is where elements of a matrix destined
 17:   to be stored on other processors are kept until matrix assembly is done.

 19:   This is a simple minded stash. Simply adds entries to end of stash.

 21:   Input Parameters:
 22:   comm - communicator, required for scatters.
 23:   bs   - stash block size. used when stashing blocks of values

 25:   Output Parameters:
 26:   stash    - the newly created stash
 27: */
 30: PetscErrorCode MatStashCreate_Private(MPI_Comm comm,PetscInt bs,MatStash *stash)
 31: {
 33:   PetscInt       max,*opt,nopt,i;
 34:   PetscBool      flg;

 37:   /* Require 2 tags,get the second using PetscCommGetNewTag() */
 38:   stash->comm = comm;

 40:   PetscCommGetNewTag(stash->comm,&stash->tag1);
 41:   PetscCommGetNewTag(stash->comm,&stash->tag2);
 42:   MPI_Comm_size(stash->comm,&stash->size);
 43:   MPI_Comm_rank(stash->comm,&stash->rank);
 44:   PetscMalloc1(2*stash->size,&stash->flg_v);
 45:   for (i=0; i<2*stash->size; i++) stash->flg_v[i] = -1;


 48:   nopt = stash->size;
 49:   PetscMalloc1(nopt,&opt);
 50:   PetscOptionsGetIntArray(NULL,NULL,"-matstash_initial_size",opt,&nopt,&flg);
 51:   if (flg) {
 52:     if (nopt == 1)                max = opt[0];
 53:     else if (nopt == stash->size) max = opt[stash->rank];
 54:     else if (stash->rank < nopt)  max = opt[stash->rank];
 55:     else                          max = 0; /* Use default */
 56:     stash->umax = max;
 57:   } else {
 58:     stash->umax = 0;
 59:   }
 60:   PetscFree(opt);
 61:   if (bs <= 0) bs = 1;

 63:   stash->bs         = bs;
 64:   stash->nmax       = 0;
 65:   stash->oldnmax    = 0;
 66:   stash->n          = 0;
 67:   stash->reallocs   = -1;
 68:   stash->space_head = 0;
 69:   stash->space      = 0;

 71:   stash->send_waits  = 0;
 72:   stash->recv_waits  = 0;
 73:   stash->send_status = 0;
 74:   stash->nsends      = 0;
 75:   stash->nrecvs      = 0;
 76:   stash->svalues     = 0;
 77:   stash->rvalues     = 0;
 78:   stash->rindices    = 0;
 79:   stash->nprocessed  = 0;
 80:   stash->reproduce   = PETSC_FALSE;
 81:   stash->blocktype   = MPI_DATATYPE_NULL;

 83:   PetscOptionsGetBool(NULL,NULL,"-matstash_reproduce",&stash->reproduce,NULL);
 84:   PetscOptionsGetBool(NULL,NULL,"-matstash_bts",&flg,NULL);
 85:   if (flg) {
 86:     stash->ScatterBegin   = MatStashScatterBegin_BTS;
 87:     stash->ScatterGetMesg = MatStashScatterGetMesg_BTS;
 88:     stash->ScatterEnd     = MatStashScatterEnd_BTS;
 89:     stash->ScatterDestroy = MatStashScatterDestroy_BTS;
 90:   } else {
 91:     stash->ScatterBegin   = MatStashScatterBegin_Ref;
 92:     stash->ScatterGetMesg = MatStashScatterGetMesg_Ref;
 93:     stash->ScatterEnd     = MatStashScatterEnd_Ref;
 94:     stash->ScatterDestroy = NULL;
 95:   }
 96:   return(0);
 97: }

 99: /*
100:    MatStashDestroy_Private - Destroy the stash
101: */
104: PetscErrorCode MatStashDestroy_Private(MatStash *stash)
105: {

109:   PetscMatStashSpaceDestroy(&stash->space_head);
110:   if (stash->ScatterDestroy) {(*stash->ScatterDestroy)(stash);}

112:   stash->space = 0;

114:   PetscFree(stash->flg_v);
115:   return(0);
116: }

118: /*
119:    MatStashScatterEnd_Private - This is called as the final stage of
120:    scatter. The final stages of message passing is done here, and
121:    all the memory used for message passing is cleaned up. This
122:    routine also resets the stash, and deallocates the memory used
123:    for the stash. It also keeps track of the current memory usage
124:    so that the same value can be used the next time through.
125: */
128: PetscErrorCode MatStashScatterEnd_Private(MatStash *stash)
129: {

133:   (*stash->ScatterEnd)(stash);
134:   return(0);
135: }

139: static PetscErrorCode MatStashScatterEnd_Ref(MatStash *stash)
140: {
142:   PetscInt       nsends=stash->nsends,bs2,oldnmax,i;
143:   MPI_Status     *send_status;

146:   for (i=0; i<2*stash->size; i++) stash->flg_v[i] = -1;
147:   /* wait on sends */
148:   if (nsends) {
149:     PetscMalloc1(2*nsends,&send_status);
150:     MPI_Waitall(2*nsends,stash->send_waits,send_status);
151:     PetscFree(send_status);
152:   }

154:   /* Now update nmaxold to be app 10% more than max n used, this way the
155:      wastage of space is reduced the next time this stash is used.
156:      Also update the oldmax, only if it increases */
157:   if (stash->n) {
158:     bs2     = stash->bs*stash->bs;
159:     oldnmax = ((int)(stash->n * 1.1) + 5)*bs2;
160:     if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
161:   }

163:   stash->nmax       = 0;
164:   stash->n          = 0;
165:   stash->reallocs   = -1;
166:   stash->nprocessed = 0;

168:   PetscMatStashSpaceDestroy(&stash->space_head);

170:   stash->space = 0;

172:   PetscFree(stash->send_waits);
173:   PetscFree(stash->recv_waits);
174:   PetscFree2(stash->svalues,stash->sindices);
175:   PetscFree(stash->rvalues[0]);
176:   PetscFree(stash->rvalues);
177:   PetscFree(stash->rindices[0]);
178:   PetscFree(stash->rindices);
179:   return(0);
180: }

182: /*
183:    MatStashGetInfo_Private - Gets the relavant statistics of the stash

185:    Input Parameters:
186:    stash    - the stash
187:    nstash   - the size of the stash. Indicates the number of values stored.
188:    reallocs - the number of additional mallocs incurred.

190: */
193: PetscErrorCode MatStashGetInfo_Private(MatStash *stash,PetscInt *nstash,PetscInt *reallocs)
194: {
195:   PetscInt bs2 = stash->bs*stash->bs;

198:   if (nstash) *nstash = stash->n*bs2;
199:   if (reallocs) {
200:     if (stash->reallocs < 0) *reallocs = 0;
201:     else                     *reallocs = stash->reallocs;
202:   }
203:   return(0);
204: }

206: /*
207:    MatStashSetInitialSize_Private - Sets the initial size of the stash

209:    Input Parameters:
210:    stash  - the stash
211:    max    - the value that is used as the max size of the stash.
212:             this value is used while allocating memory.
213: */
216: PetscErrorCode MatStashSetInitialSize_Private(MatStash *stash,PetscInt max)
217: {
219:   stash->umax = max;
220:   return(0);
221: }

223: /* MatStashExpand_Private - Expand the stash. This function is called
224:    when the space in the stash is not sufficient to add the new values
225:    being inserted into the stash.

227:    Input Parameters:
228:    stash - the stash
229:    incr  - the minimum increase requested

231:    Notes:
232:    This routine doubles the currently used memory.
233:  */
236: static PetscErrorCode MatStashExpand_Private(MatStash *stash,PetscInt incr)
237: {
239:   PetscInt       newnmax,bs2= stash->bs*stash->bs;

242:   /* allocate a larger stash */
243:   if (!stash->oldnmax && !stash->nmax) { /* new stash */
244:     if (stash->umax)                  newnmax = stash->umax/bs2;
245:     else                              newnmax = DEFAULT_STASH_SIZE/bs2;
246:   } else if (!stash->nmax) { /* resuing stash */
247:     if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs2;
248:     else                              newnmax = stash->oldnmax/bs2;
249:   } else                              newnmax = stash->nmax*2;
250:   if (newnmax  < (stash->nmax + incr)) newnmax += 2*incr;

252:   /* Get a MatStashSpace and attach it to stash */
253:   PetscMatStashSpaceGet(bs2,newnmax,&stash->space);
254:   if (!stash->space_head) { /* new stash or resuing stash->oldnmax */
255:     stash->space_head = stash->space;
256:   }

258:   stash->reallocs++;
259:   stash->nmax = newnmax;
260:   return(0);
261: }
262: /*
263:   MatStashValuesRow_Private - inserts values into the stash. This function
264:   expects the values to be roworiented. Multiple columns belong to the same row
265:   can be inserted with a single call to this function.

267:   Input Parameters:
268:   stash  - the stash
269:   row    - the global row correspoiding to the values
270:   n      - the number of elements inserted. All elements belong to the above row.
271:   idxn   - the global column indices corresponding to each of the values.
272:   values - the values inserted
273: */
276: PetscErrorCode MatStashValuesRow_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscBool ignorezeroentries)
277: {
278:   PetscErrorCode     ierr;
279:   PetscInt           i,k,cnt = 0;
280:   PetscMatStashSpace space=stash->space;

283:   /* Check and see if we have sufficient memory */
284:   if (!space || space->local_remaining < n) {
285:     MatStashExpand_Private(stash,n);
286:   }
287:   space = stash->space;
288:   k     = space->local_used;
289:   for (i=0; i<n; i++) {
290:     if (ignorezeroentries && (values[i] == 0.0)) continue;
291:     space->idx[k] = row;
292:     space->idy[k] = idxn[i];
293:     space->val[k] = values[i];
294:     k++;
295:     cnt++;
296:   }
297:   stash->n               += cnt;
298:   space->local_used      += cnt;
299:   space->local_remaining -= cnt;
300:   return(0);
301: }

303: /*
304:   MatStashValuesCol_Private - inserts values into the stash. This function
305:   expects the values to be columnoriented. Multiple columns belong to the same row
306:   can be inserted with a single call to this function.

308:   Input Parameters:
309:   stash   - the stash
310:   row     - the global row correspoiding to the values
311:   n       - the number of elements inserted. All elements belong to the above row.
312:   idxn    - the global column indices corresponding to each of the values.
313:   values  - the values inserted
314:   stepval - the consecutive values are sepated by a distance of stepval.
315:             this happens because the input is columnoriented.
316: */
319: PetscErrorCode MatStashValuesCol_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt stepval,PetscBool ignorezeroentries)
320: {
321:   PetscErrorCode     ierr;
322:   PetscInt           i,k,cnt = 0;
323:   PetscMatStashSpace space=stash->space;

326:   /* Check and see if we have sufficient memory */
327:   if (!space || space->local_remaining < n) {
328:     MatStashExpand_Private(stash,n);
329:   }
330:   space = stash->space;
331:   k     = space->local_used;
332:   for (i=0; i<n; i++) {
333:     if (ignorezeroentries && (values[i*stepval] == 0.0)) continue;
334:     space->idx[k] = row;
335:     space->idy[k] = idxn[i];
336:     space->val[k] = values[i*stepval];
337:     k++;
338:     cnt++;
339:   }
340:   stash->n               += cnt;
341:   space->local_used      += cnt;
342:   space->local_remaining -= cnt;
343:   return(0);
344: }

346: /*
347:   MatStashValuesRowBlocked_Private - inserts blocks of values into the stash.
348:   This function expects the values to be roworiented. Multiple columns belong
349:   to the same block-row can be inserted with a single call to this function.
350:   This function extracts the sub-block of values based on the dimensions of
351:   the original input block, and the row,col values corresponding to the blocks.

353:   Input Parameters:
354:   stash  - the stash
355:   row    - the global block-row correspoiding to the values
356:   n      - the number of elements inserted. All elements belong to the above row.
357:   idxn   - the global block-column indices corresponding to each of the blocks of
358:            values. Each block is of size bs*bs.
359:   values - the values inserted
360:   rmax   - the number of block-rows in the original block.
361:   cmax   - the number of block-columsn on the original block.
362:   idx    - the index of the current block-row in the original block.
363: */
366: PetscErrorCode MatStashValuesRowBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
367: {
368:   PetscErrorCode     ierr;
369:   PetscInt           i,j,k,bs2,bs=stash->bs,l;
370:   const PetscScalar  *vals;
371:   PetscScalar        *array;
372:   PetscMatStashSpace space=stash->space;

375:   if (!space || space->local_remaining < n) {
376:     MatStashExpand_Private(stash,n);
377:   }
378:   space = stash->space;
379:   l     = space->local_used;
380:   bs2   = bs*bs;
381:   for (i=0; i<n; i++) {
382:     space->idx[l] = row;
383:     space->idy[l] = idxn[i];
384:     /* Now copy over the block of values. Store the values column oriented.
385:        This enables inserting multiple blocks belonging to a row with a single
386:        funtion call */
387:     array = space->val + bs2*l;
388:     vals  = values + idx*bs2*n + bs*i;
389:     for (j=0; j<bs; j++) {
390:       for (k=0; k<bs; k++) array[k*bs] = vals[k];
391:       array++;
392:       vals += cmax*bs;
393:     }
394:     l++;
395:   }
396:   stash->n               += n;
397:   space->local_used      += n;
398:   space->local_remaining -= n;
399:   return(0);
400: }

402: /*
403:   MatStashValuesColBlocked_Private - inserts blocks of values into the stash.
404:   This function expects the values to be roworiented. Multiple columns belong
405:   to the same block-row can be inserted with a single call to this function.
406:   This function extracts the sub-block of values based on the dimensions of
407:   the original input block, and the row,col values corresponding to the blocks.

409:   Input Parameters:
410:   stash  - the stash
411:   row    - the global block-row correspoiding to the values
412:   n      - the number of elements inserted. All elements belong to the above row.
413:   idxn   - the global block-column indices corresponding to each of the blocks of
414:            values. Each block is of size bs*bs.
415:   values - the values inserted
416:   rmax   - the number of block-rows in the original block.
417:   cmax   - the number of block-columsn on the original block.
418:   idx    - the index of the current block-row in the original block.
419: */
422: PetscErrorCode MatStashValuesColBlocked_Private(MatStash *stash,PetscInt row,PetscInt n,const PetscInt idxn[],const PetscScalar values[],PetscInt rmax,PetscInt cmax,PetscInt idx)
423: {
424:   PetscErrorCode     ierr;
425:   PetscInt           i,j,k,bs2,bs=stash->bs,l;
426:   const PetscScalar  *vals;
427:   PetscScalar        *array;
428:   PetscMatStashSpace space=stash->space;

431:   if (!space || space->local_remaining < n) {
432:     MatStashExpand_Private(stash,n);
433:   }
434:   space = stash->space;
435:   l     = space->local_used;
436:   bs2   = bs*bs;
437:   for (i=0; i<n; i++) {
438:     space->idx[l] = row;
439:     space->idy[l] = idxn[i];
440:     /* Now copy over the block of values. Store the values column oriented.
441:      This enables inserting multiple blocks belonging to a row with a single
442:      funtion call */
443:     array = space->val + bs2*l;
444:     vals  = values + idx*bs2*n + bs*i;
445:     for (j=0; j<bs; j++) {
446:       for (k=0; k<bs; k++) array[k] = vals[k];
447:       array += bs;
448:       vals  += rmax*bs;
449:     }
450:     l++;
451:   }
452:   stash->n               += n;
453:   space->local_used      += n;
454:   space->local_remaining -= n;
455:   return(0);
456: }
457: /*
458:   MatStashScatterBegin_Private - Initiates the transfer of values to the
459:   correct owners. This function goes through the stash, and check the
460:   owners of each stashed value, and sends the values off to the owner
461:   processors.

463:   Input Parameters:
464:   stash  - the stash
465:   owners - an array of size 'no-of-procs' which gives the ownership range
466:            for each node.

468:   Notes: The 'owners' array in the cased of the blocked-stash has the
469:   ranges specified blocked global indices, and for the regular stash in
470:   the proper global indices.
471: */
474: PetscErrorCode MatStashScatterBegin_Private(Mat mat,MatStash *stash,PetscInt *owners)
475: {

479:   (*stash->ScatterBegin)(mat,stash,owners);
480:   return(0);
481: }

485: static PetscErrorCode MatStashScatterBegin_Ref(Mat mat,MatStash *stash,PetscInt *owners)
486: {
487:   PetscInt           *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2;
488:   PetscInt           size=stash->size,nsends;
489:   PetscErrorCode     ierr;
490:   PetscInt           count,*sindices,**rindices,i,j,idx,lastidx,l;
491:   PetscScalar        **rvalues,*svalues;
492:   MPI_Comm           comm = stash->comm;
493:   MPI_Request        *send_waits,*recv_waits,*recv_waits1,*recv_waits2;
494:   PetscMPIInt        *sizes,*nlengths,nreceives;
495:   PetscInt           *sp_idx,*sp_idy;
496:   PetscScalar        *sp_val;
497:   PetscMatStashSpace space,space_next;

500:   {                             /* make sure all processors are either in INSERTMODE or ADDMODE */
501:     InsertMode addv;
502:     MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));
503:     if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
504:     mat->insertmode = addv; /* in case this processor had no cache */
505:   }

507:   bs2 = stash->bs*stash->bs;

509:   /*  first count number of contributors to each processor */
510:   PetscCalloc1(size,&sizes);
511:   PetscCalloc1(size,&nlengths);
512:   PetscMalloc1(stash->n+1,&owner);

514:   i       = j    = 0;
515:   lastidx = -1;
516:   space   = stash->space_head;
517:   while (space) {
518:     space_next = space->next;
519:     sp_idx     = space->idx;
520:     for (l=0; l<space->local_used; l++) {
521:       /* if indices are NOT locally sorted, need to start search at the beginning */
522:       if (lastidx > (idx = sp_idx[l])) j = 0;
523:       lastidx = idx;
524:       for (; j<size; j++) {
525:         if (idx >= owners[j] && idx < owners[j+1]) {
526:           nlengths[j]++; owner[i] = j; break;
527:         }
528:       }
529:       i++;
530:     }
531:     space = space_next;
532:   }
533:   /* Now check what procs get messages - and compute nsends. */
534:   for (i=0, nsends=0; i<size; i++) {
535:     if (nlengths[i]) {
536:       sizes[i] = 1; nsends++;
537:     }
538:   }

540:   {PetscMPIInt *onodes,*olengths;
541:    /* Determine the number of messages to expect, their lengths, from from-ids */
542:    PetscGatherNumberOfMessages(comm,sizes,nlengths,&nreceives);
543:    PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);
544:    /* since clubbing row,col - lengths are multiplied by 2 */
545:    for (i=0; i<nreceives; i++) olengths[i] *=2;
546:    PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);
547:    /* values are size 'bs2' lengths (and remove earlier factor 2 */
548:    for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2;
549:    PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);
550:    PetscFree(onodes);
551:    PetscFree(olengths);}

553:   /* do sends:
554:       1) starts[i] gives the starting index in svalues for stuff going to
555:          the ith processor
556:   */
557:   PetscMalloc2(bs2*stash->n,&svalues,2*(stash->n+1),&sindices);
558:   PetscMalloc1(2*nsends,&send_waits);
559:   PetscMalloc2(size,&startv,size,&starti);
560:   /* use 2 sends the first with all_a, the next with all_i and all_j */
561:   startv[0] = 0; starti[0] = 0;
562:   for (i=1; i<size; i++) {
563:     startv[i] = startv[i-1] + nlengths[i-1];
564:     starti[i] = starti[i-1] + 2*nlengths[i-1];
565:   }

567:   i     = 0;
568:   space = stash->space_head;
569:   while (space) {
570:     space_next = space->next;
571:     sp_idx     = space->idx;
572:     sp_idy     = space->idy;
573:     sp_val     = space->val;
574:     for (l=0; l<space->local_used; l++) {
575:       j = owner[i];
576:       if (bs2 == 1) {
577:         svalues[startv[j]] = sp_val[l];
578:       } else {
579:         PetscInt    k;
580:         PetscScalar *buf1,*buf2;
581:         buf1 = svalues+bs2*startv[j];
582:         buf2 = space->val + bs2*l;
583:         for (k=0; k<bs2; k++) buf1[k] = buf2[k];
584:       }
585:       sindices[starti[j]]             = sp_idx[l];
586:       sindices[starti[j]+nlengths[j]] = sp_idy[l];
587:       startv[j]++;
588:       starti[j]++;
589:       i++;
590:     }
591:     space = space_next;
592:   }
593:   startv[0] = 0;
594:   for (i=1; i<size; i++) startv[i] = startv[i-1] + nlengths[i-1];

596:   for (i=0,count=0; i<size; i++) {
597:     if (sizes[i]) {
598:       MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);
599:       MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_SCALAR,i,tag2,comm,send_waits+count++);
600:     }
601:   }
602: #if defined(PETSC_USE_INFO)
603:   PetscInfo1(NULL,"No of messages: %d \n",nsends);
604:   for (i=0; i<size; i++) {
605:     if (sizes[i]) {
606:       PetscInfo2(NULL,"Mesg_to: %d: size: %d bytes\n",i,nlengths[i]*(bs2*sizeof(PetscScalar)+2*sizeof(PetscInt)));
607:     }
608:   }
609: #endif
610:   PetscFree(nlengths);
611:   PetscFree(owner);
612:   PetscFree2(startv,starti);
613:   PetscFree(sizes);

615:   /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
616:   PetscMalloc1(2*nreceives,&recv_waits);

618:   for (i=0; i<nreceives; i++) {
619:     recv_waits[2*i]   = recv_waits1[i];
620:     recv_waits[2*i+1] = recv_waits2[i];
621:   }
622:   stash->recv_waits = recv_waits;

624:   PetscFree(recv_waits1);
625:   PetscFree(recv_waits2);

627:   stash->svalues         = svalues;
628:   stash->sindices        = sindices;
629:   stash->rvalues         = rvalues;
630:   stash->rindices        = rindices;
631:   stash->send_waits      = send_waits;
632:   stash->nsends          = nsends;
633:   stash->nrecvs          = nreceives;
634:   stash->reproduce_count = 0;
635:   return(0);
636: }

638: /*
639:    MatStashScatterGetMesg_Private - This function waits on the receives posted
640:    in the function MatStashScatterBegin_Private() and returns one message at
641:    a time to the calling function. If no messages are left, it indicates this
642:    by setting flg = 0, else it sets flg = 1.

644:    Input Parameters:
645:    stash - the stash

647:    Output Parameters:
648:    nvals - the number of entries in the current message.
649:    rows  - an array of row indices (or blocked indices) corresponding to the values
650:    cols  - an array of columnindices (or blocked indices) corresponding to the values
651:    vals  - the values
652:    flg   - 0 indicates no more message left, and the current call has no values associated.
653:            1 indicates that the current call successfully received a message, and the
654:              other output parameters nvals,rows,cols,vals are set appropriately.
655: */
658: PetscErrorCode MatStashScatterGetMesg_Private(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt **cols,PetscScalar **vals,PetscInt *flg)
659: {

663:   (*stash->ScatterGetMesg)(stash,nvals,rows,cols,vals,flg);
664:   return(0);
665: }

669: static PetscErrorCode MatStashScatterGetMesg_Ref(MatStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscInt **cols,PetscScalar **vals,PetscInt *flg)
670: {
672:   PetscMPIInt    i,*flg_v = stash->flg_v,i1,i2;
673:   PetscInt       bs2;
674:   MPI_Status     recv_status;
675:   PetscBool      match_found = PETSC_FALSE;

678:   *flg = 0; /* When a message is discovered this is reset to 1 */
679:   /* Return if no more messages to process */
680:   if (stash->nprocessed == stash->nrecvs) return(0);

682:   bs2 = stash->bs*stash->bs;
683:   /* If a matching pair of receives are found, process them, and return the data to
684:      the calling function. Until then keep receiving messages */
685:   while (!match_found) {
686:     if (stash->reproduce) {
687:       i    = stash->reproduce_count++;
688:       MPI_Wait(stash->recv_waits+i,&recv_status);
689:     } else {
690:       MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);
691:     }
692:     if (recv_status.MPI_SOURCE < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Negative MPI source!");

694:     /* Now pack the received message into a structure which is usable by others */
695:     if (i % 2) {
696:       MPI_Get_count(&recv_status,MPIU_SCALAR,nvals);

698:       flg_v[2*recv_status.MPI_SOURCE] = i/2;

700:       *nvals = *nvals/bs2;
701:     } else {
702:       MPI_Get_count(&recv_status,MPIU_INT,nvals);

704:       flg_v[2*recv_status.MPI_SOURCE+1] = i/2;

706:       *nvals = *nvals/2; /* This message has both row indices and col indices */
707:     }

709:     /* Check if we have both messages from this proc */
710:     i1 = flg_v[2*recv_status.MPI_SOURCE];
711:     i2 = flg_v[2*recv_status.MPI_SOURCE+1];
712:     if (i1 != -1 && i2 != -1) {
713:       *rows = stash->rindices[i2];
714:       *cols = *rows + *nvals;
715:       *vals = stash->rvalues[i1];
716:       *flg  = 1;
717:       stash->nprocessed++;
718:       match_found = PETSC_TRUE;
719:     }
720:   }
721:   return(0);
722: }

724: typedef struct {
725:   PetscInt row;
726:   PetscInt col;
727:   PetscScalar vals[1];          /* Actually an array of length bs2 */
728: } MatStashBlock;

732: static PetscErrorCode MatStashSortCompress_Private(MatStash *stash,InsertMode insertmode)
733: {
735:   PetscMatStashSpace space;
736:   PetscInt n = stash->n,bs = stash->bs,bs2 = bs*bs,cnt,*row,*col,*perm,rowstart,i;
737:   PetscScalar **valptr;

740:   PetscMalloc4(n,&row,n,&col,n,&valptr,n,&perm);
741:   for (space=stash->space_head,cnt=0; space; space=space->next) {
742:     for (i=0; i<space->local_used; i++) {
743:       row[cnt] = space->idx[i];
744:       col[cnt] = space->idy[i];
745:       valptr[cnt] = &space->val[i*bs2];
746:       perm[cnt] = cnt;          /* Will tell us where to find valptr after sorting row[] and col[] */
747:       cnt++;
748:     }
749:   }
750:   if (cnt != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"MatStash n %D, but counted %D entries",n,cnt);
751:   PetscSortIntWithArrayPair(n,row,col,perm);
752:   /* Scan through the rows, sorting each one, combining duplicates, and packing send buffers */
753:   for (rowstart=0,cnt=0,i=1; i<=n; i++) {
754:     if (i == n || row[i] != row[rowstart]) {         /* Sort the last row. */
755:       PetscInt colstart;
756:       PetscSortIntWithArray(i-rowstart,&col[rowstart],&perm[rowstart]);
757:       for (colstart=rowstart; colstart<i; ) { /* Compress multiple insertions to the same location */
758:         PetscInt j,l;
759:         MatStashBlock *block;
760:         PetscSegBufferGet(stash->segsendblocks,1,&block);
761:         block->row = row[rowstart];
762:         block->col = col[colstart];
763:         PetscMemcpy(block->vals,valptr[perm[colstart]],bs2*sizeof(block->vals[0]));
764:         for (j=colstart+1; j<i && col[j] == col[colstart]; j++) { /* Add any extra stashed blocks at the same (row,col) */
765:           if (insertmode == ADD_VALUES) {
766:             for (l=0; l<bs2; l++) block->vals[l] += valptr[perm[j]][l];
767:           } else {
768:             PetscMemcpy(block->vals,valptr[perm[j]],bs2*sizeof(block->vals[0]));
769:           }
770:         }
771:         colstart = j;
772:       }
773:       rowstart = i;
774:     }
775:   }
776:   PetscFree4(row,col,valptr,perm);
777:   return(0);
778: }

782: static PetscErrorCode MatStashBlockTypeSetUp(MatStash *stash)
783: {

787:   if (stash->blocktype == MPI_DATATYPE_NULL) {
788:     PetscInt     bs2 = PetscSqr(stash->bs);
789:     PetscMPIInt  blocklens[2];
790:     MPI_Aint     displs[2];
791:     MPI_Datatype types[2],stype;
792:     /* C++ std::complex is not my favorite datatype.  Since it is not POD, we cannot use offsetof to find the offset of
793:      * vals.  But the layout is actually guaranteed by the standard, so we do a little dance here with struct
794:      * DummyBlock, substituting PetscReal for PetscComplex so that we can determine the offset.
795:      */
796:     struct DummyBlock {PetscInt row,col; PetscReal vals;};

798:     stash->blocktype_size = offsetof(struct DummyBlock,vals) + bs2*sizeof(PetscScalar);
799:     if (stash->blocktype_size % sizeof(PetscInt)) { /* Implies that PetscInt is larger and does not satisfy alignment without padding */
800:       stash->blocktype_size += sizeof(PetscInt) - stash->blocktype_size % sizeof(PetscInt);
801:     }
802:     PetscSegBufferCreate(stash->blocktype_size,1,&stash->segsendblocks);
803:     PetscSegBufferCreate(stash->blocktype_size,1,&stash->segrecvblocks);
804:     PetscSegBufferCreate(sizeof(MatStashFrame),1,&stash->segrecvframe);
805:     blocklens[0] = 2;
806:     blocklens[1] = bs2;
807:     displs[0] = offsetof(struct DummyBlock,row);
808:     displs[1] = offsetof(struct DummyBlock,vals);
809:     types[0] = MPIU_INT;
810:     types[1] = MPIU_SCALAR;
811:     MPI_Type_create_struct(2,blocklens,displs,types,&stype);
812:     MPI_Type_commit(&stype);
813:     MPI_Type_create_resized(stype,0,stash->blocktype_size,&stash->blocktype); /* MPI-2 */
814:     MPI_Type_commit(&stash->blocktype);
815:     MPI_Type_free(&stype);
816:   }
817:   return(0);
818: }

822: /* Callback invoked after target rank has initiatied receive of rendezvous message.
823:  * Here we post the main sends.
824:  */
825: static PetscErrorCode MatStashBTSSend_Private(MPI_Comm comm,const PetscMPIInt tag[],PetscMPIInt rankid,PetscMPIInt rank,void *sdata,MPI_Request req[],void *ctx)
826: {
827:   MatStash *stash = (MatStash*)ctx;
828:   MatStashHeader *hdr = (MatStashHeader*)sdata;

832:   if (rank != stash->sendranks[rankid]) SETERRQ3(comm,PETSC_ERR_PLIB,"BTS Send rank %d does not match sendranks[%d] %d",rank,rankid,stash->sendranks[rankid]);
833:   MPI_Isend(stash->sendframes[rankid].buffer,hdr->count,stash->blocktype,rank,tag[0],comm,&req[0]);
834:   stash->sendframes[rankid].count = hdr->count;
835:   stash->sendframes[rankid].pending = 1;
836:   return(0);
837: }

841: /* Callback invoked by target after receiving rendezvous message.
842:  * Here we post the main recvs.
843:  */
844: static PetscErrorCode MatStashBTSRecv_Private(MPI_Comm comm,const PetscMPIInt tag[],PetscMPIInt rank,void *rdata,MPI_Request req[],void *ctx)
845: {
846:   MatStash *stash = (MatStash*)ctx;
847:   MatStashHeader *hdr = (MatStashHeader*)rdata;
848:   MatStashFrame *frame;

852:   PetscSegBufferGet(stash->segrecvframe,1,&frame);
853:   PetscSegBufferGet(stash->segrecvblocks,hdr->count,&frame->buffer);
854:   MPI_Irecv(frame->buffer,hdr->count,stash->blocktype,rank,tag[0],comm,&req[0]);
855:   frame->count = hdr->count;
856:   frame->pending = 1;
857:   return(0);
858: }

862: /*
863:  * owners[] contains the ownership ranges; may be indexed by either blocks or scalars
864:  */
865: static PetscErrorCode MatStashScatterBegin_BTS(Mat mat,MatStash *stash,PetscInt owners[])
866: {
868:   size_t nblocks;
869:   char *sendblocks;

872: #if defined(PETSC_USE_DEBUG)
873:   {                             /* make sure all processors are either in INSERTMODE or ADDMODE */
874:     InsertMode addv;
875:     MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));
876:     if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
877:   }
878: #endif

880:   if (stash->subset_off_proc && !mat->subsetoffprocentries) { /* We won't use the old scatter context. */
881:     MatStashScatterDestroy_BTS(stash);
882:   }

884:   MatStashBlockTypeSetUp(stash);
885:   MatStashSortCompress_Private(stash,mat->insertmode);
886:   PetscSegBufferGetSize(stash->segsendblocks,&nblocks);
887:   PetscSegBufferExtractInPlace(stash->segsendblocks,&sendblocks);
888:   if (stash->subset_off_proc && mat->subsetoffprocentries) { /* Set up sendhdrs and sendframes for each rank that we sent before */
889:     PetscInt i;
890:     size_t b;
891:     for (i=0,b=0; i<stash->nsendranks; i++) {
892:       stash->sendframes[i].buffer = &sendblocks[b*stash->blocktype_size];
893:       /* sendhdr is never actually sent, but the count is used by MatStashBTSSend_Private */
894:       stash->sendhdr[i].count = 0; /* Might remain empty (in which case we send a zero-sized message) if no values are communicated to that process */
895:       for ( ; b<nblocks; b++) {
896:         MatStashBlock *sendblock_b = (MatStashBlock*)&sendblocks[b*stash->blocktype_size];
897:         if (PetscUnlikely(sendblock_b->row < owners[stash->sendranks[i]])) SETERRQ2(stash->comm,PETSC_ERR_ARG_WRONG,"MAT_SUBSET_OFF_PROC_ENTRIES set, but row %D owned by %d not communicated in initial assembly",sendblock_b->row,stash->sendranks[i]);
898:         if (sendblock_b->row >= owners[stash->sendranks[i]+1]) break;
899:         stash->sendhdr[i].count++;
900:       }
901:     }
902:   } else {                      /* Dynamically count and pack (first time) */
903:     PetscInt sendno;
904:     size_t i,rowstart;

906:     /* Count number of send ranks and allocate for sends */
907:     stash->nsendranks = 0;
908:     for (rowstart=0; rowstart<nblocks; ) {
909:       PetscInt owner;
910:       MatStashBlock *sendblock_rowstart = (MatStashBlock*)&sendblocks[rowstart*stash->blocktype_size];
911:       PetscFindInt(sendblock_rowstart->row,stash->size+1,owners,&owner);
912:       if (owner < 0) owner = -(owner+2);
913:       for (i=rowstart+1; i<nblocks; i++) { /* Move forward through a run of blocks with the same owner */
914:         MatStashBlock *sendblock_i = (MatStashBlock*)&sendblocks[i*stash->blocktype_size];
915:         if (sendblock_i->row >= owners[owner+1]) break;
916:       }
917:       stash->nsendranks++;
918:       rowstart = i;
919:     }
920:     PetscMalloc3(stash->nsendranks,&stash->sendranks,stash->nsendranks,&stash->sendhdr,stash->nsendranks,&stash->sendframes);

922:     /* Set up sendhdrs and sendframes */
923:     sendno = 0;
924:     for (rowstart=0; rowstart<nblocks; ) {
925:       PetscInt owner;
926:       MatStashBlock *sendblock_rowstart = (MatStashBlock*)&sendblocks[rowstart*stash->blocktype_size];
927:       PetscFindInt(sendblock_rowstart->row,stash->size+1,owners,&owner);
928:       if (owner < 0) owner = -(owner+2);
929:       stash->sendranks[sendno] = owner;
930:       for (i=rowstart+1; i<nblocks; i++) { /* Move forward through a run of blocks with the same owner */
931:         MatStashBlock *sendblock_i = (MatStashBlock*)&sendblocks[i*stash->blocktype_size];
932:         if (sendblock_i->row >= owners[owner+1]) break;
933:       }
934:       stash->sendframes[sendno].buffer = sendblock_rowstart;
935:       stash->sendframes[sendno].pending = 0;
936:       stash->sendhdr[sendno].count = i - rowstart;
937:       sendno++;
938:       rowstart = i;
939:     }
940:     if (sendno != stash->nsendranks) SETERRQ2(stash->comm,PETSC_ERR_PLIB,"BTS counted %D sendranks, but %D sends",stash->nsendranks,sendno);
941:   }

943:   /* Encode insertmode on the outgoing messages. If we want to support more than two options, we would need a new
944:    * message or a dummy entry of some sort. */
945:   if (mat->insertmode == INSERT_VALUES) {
946:     size_t i;
947:     for (i=0; i<nblocks; i++) {
948:       MatStashBlock *sendblock_i = (MatStashBlock*)&sendblocks[i*stash->blocktype_size];
949:       sendblock_i->row = -(sendblock_i->row+1);
950:     }
951:   }

953:   if (stash->subset_off_proc && mat->subsetoffprocentries) {
954:     PetscMPIInt i,tag;
955:     PetscCommGetNewTag(stash->comm,&tag);
956:     for (i=0; i<stash->nrecvranks; i++) {
957:       MatStashBTSRecv_Private(stash->comm,&tag,stash->recvranks[i],&stash->recvhdr[i],&stash->recvreqs[i],stash);
958:     }
959:     for (i=0; i<stash->nsendranks; i++) {
960:       MatStashBTSSend_Private(stash->comm,&tag,i,stash->sendranks[i],&stash->sendhdr[i],&stash->sendreqs[i],stash);
961:     }
962:     stash->use_status = PETSC_TRUE; /* Use count from message status. */
963:   } else {
964:     PetscCommBuildTwoSidedFReq(stash->comm,1,MPIU_INT,stash->nsendranks,stash->sendranks,(PetscInt*)stash->sendhdr,
965:                                       &stash->nrecvranks,&stash->recvranks,(PetscInt*)&stash->recvhdr,1,&stash->sendreqs,&stash->recvreqs,
966:                                       MatStashBTSSend_Private,MatStashBTSRecv_Private,stash);
967:     PetscMalloc2(stash->nrecvranks,&stash->some_indices,stash->nrecvranks,&stash->some_statuses);
968:     stash->use_status = PETSC_FALSE; /* Use count from header instead of from message. */
969:   }

971:   PetscSegBufferExtractInPlace(stash->segrecvframe,&stash->recvframes);
972:   stash->recvframe_active = NULL;
973:   stash->recvframe_i      = 0;
974:   stash->some_i           = 0;
975:   stash->some_count       = 0;
976:   stash->recvcount        = 0;
977:   stash->subset_off_proc  = mat->subsetoffprocentries;
978:   stash->insertmode       = &mat->insertmode;
979:   return(0);
980: }

984: static PetscErrorCode MatStashScatterGetMesg_BTS(MatStash *stash,PetscMPIInt *n,PetscInt **row,PetscInt **col,PetscScalar **val,PetscInt *flg)
985: {
987:   MatStashBlock *block;

990:   *flg = 0;
991:   while (!stash->recvframe_active || stash->recvframe_i == stash->recvframe_count) {
992:     if (stash->some_i == stash->some_count) {
993:       if (stash->recvcount == stash->nrecvranks) return(0); /* Done */
994:       MPI_Waitsome(stash->nrecvranks,stash->recvreqs,&stash->some_count,stash->some_indices,stash->use_status?stash->some_statuses:MPI_STATUSES_IGNORE);
995:       stash->some_i = 0;
996:     }
997:     stash->recvframe_active = &stash->recvframes[stash->some_indices[stash->some_i]];
998:     stash->recvframe_count = stash->recvframe_active->count; /* From header; maximum count */
999:     if (stash->use_status) { /* Count what was actually sent */
1000:       MPI_Get_count(&stash->some_statuses[stash->some_i],stash->blocktype,&stash->recvframe_count);
1001:     }
1002:     if (stash->recvframe_count > 0) { /* Check for InsertMode consistency */
1003:       block = (MatStashBlock*)&((char*)stash->recvframe_active->buffer)[0];
1004:       if (PetscUnlikely(*stash->insertmode == NOT_SET_VALUES)) *stash->insertmode = block->row < 0 ? INSERT_VALUES : ADD_VALUES;
1005:       if (PetscUnlikely(*stash->insertmode == INSERT_VALUES && block->row >= 0)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Assembling INSERT_VALUES, but rank %d requested ADD_VALUES",stash->recvranks[stash->some_indices[stash->some_i]]);
1006:       if (PetscUnlikely(*stash->insertmode == ADD_VALUES && block->row < 0)) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Assembling ADD_VALUES, but rank %d requested INSERT_VALUES",stash->recvranks[stash->some_indices[stash->some_i]]);
1007:     }
1008:     stash->some_i++;
1009:     stash->recvcount++;
1010:     stash->recvframe_i = 0;
1011:   }
1012:   *n = 1;
1013:   block = (MatStashBlock*)&((char*)stash->recvframe_active->buffer)[stash->recvframe_i*stash->blocktype_size];
1014:   if (block->row < 0) block->row = -(block->row + 1);
1015:   *row = &block->row;
1016:   *col = &block->col;
1017:   *val = block->vals;
1018:   stash->recvframe_i++;
1019:   *flg = 1;
1020:   return(0);
1021: }

1025: static PetscErrorCode MatStashScatterEnd_BTS(MatStash *stash)
1026: {

1030:   MPI_Waitall(stash->nsendranks,stash->sendreqs,MPI_STATUSES_IGNORE);
1031:   if (stash->subset_off_proc) { /* Reuse the communication contexts, so consolidate and reset segrecvblocks  */
1032:     void *dummy;
1033:     PetscSegBufferExtractInPlace(stash->segrecvblocks,&dummy);
1034:   } else {                      /* No reuse, so collect everything. */
1035:     MatStashScatterDestroy_BTS(stash);
1036:   }

1038:   /* Now update nmaxold to be app 10% more than max n used, this way the
1039:      wastage of space is reduced the next time this stash is used.
1040:      Also update the oldmax, only if it increases */
1041:   if (stash->n) {
1042:     PetscInt bs2     = stash->bs*stash->bs;
1043:     PetscInt oldnmax = ((int)(stash->n * 1.1) + 5)*bs2;
1044:     if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
1045:   }

1047:   stash->nmax       = 0;
1048:   stash->n          = 0;
1049:   stash->reallocs   = -1;
1050:   stash->nprocessed = 0;

1052:   PetscMatStashSpaceDestroy(&stash->space_head);

1054:   stash->space = 0;

1056:   return(0);
1057: }

1061: static PetscErrorCode MatStashScatterDestroy_BTS(MatStash *stash)
1062: {

1066:   PetscSegBufferDestroy(&stash->segsendblocks);
1067:   PetscSegBufferDestroy(&stash->segrecvframe);
1068:   stash->recvframes = NULL;
1069:   PetscSegBufferDestroy(&stash->segrecvblocks);
1070:   if (stash->blocktype != MPI_DATATYPE_NULL) {
1071:     MPI_Type_free(&stash->blocktype);
1072:   }
1073:   stash->nsendranks = 0;
1074:   stash->nrecvranks = 0;
1075:   PetscFree3(stash->sendranks,stash->sendhdr,stash->sendframes);
1076:   PetscFree(stash->sendreqs);
1077:   PetscFree(stash->recvreqs);
1078:   PetscFree(stash->recvranks);
1079:   PetscFree(stash->recvhdr);
1080:   PetscFree2(stash->some_indices,stash->some_statuses);
1081:   return(0);
1082: }