Actual source code: stashij.c

petsc-3.3-p7 2013-05-11
  1: #define PETSCMAT_DLL
  2: #include <../src/mat/impls/ij/stashij.h>
  3: /* Need sorting routines. */
  4: #include <petscsys.h>
  5: /* Need PetscHash */
  6: #include <../src/sys/utils/hash.h>


  9: #undef  __FUNCT__
 11: PetscErrorCode MatStashSeqIJCreate_Private(MatStashSeqIJ *_stash)
 12: {
 14:   MatStashSeqIJ     stash;
 16:   PetscNew(struct _MatStashSeqIJ, &stash);
 17:   PetscHashIJCreate(&(stash->h));
 18:   *_stash = stash;
 19:   return(0);
 20: }

 22: #undef  __FUNCT__
 24: PetscErrorCode MatStashSeqIJGetMultivalued_Private(MatStashSeqIJ stash, PetscBool *_multivalued)
 25: {
 27:   *_multivalued = stash->multivalued;
 28:   return(0);
 29: }

 31: #undef  __FUNCT__
 33: PetscErrorCode MatStashSeqIJSetMultivalued_Private(MatStashSeqIJ stash, PetscBool multivalued)
 34: {
 37:   if(stash->n) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Cannot change multivaluedness of an non-empty MatStash");
 38:   stash->multivalued = multivalued;
 39:   PetscHashIJSetMultivalued(stash->h,multivalued);
 40:   return(0);
 41: }

 43: #undef  __FUNCT__
 45: PetscErrorCode MatStashSeqIJExtend_Private(MatStashSeqIJ stash, PetscInt len, const PetscInt *ixidx, const PetscInt *iyidx)
 46: {
 47:   PetscHashIJKey key;
 48:   PetscInt       i;
 51:   for(i = 0; i < len; ++i) {
 52:     key.i = ixidx[i];
 53:     key.j = iyidx[i];
 54:     PetscHashIJAdd(stash->h,key,stash->n);
 55:     ++(stash->n);
 56:   }
 57:   return(0);
 58: }

 60: #undef  __FUNCT__
 62: PetscErrorCode MatStashSeqIJGetIndices_Private(MatStashSeqIJ stash, PetscInt *_len, PetscInt **_ixidx, PetscInt **_iyidx)
 63: {
 64:   PetscInt       len, *ixidx = PETSC_NULL, *iyidx = PETSC_NULL, *kidx, start, end;

 68:   if(!_len && !_ixidx && !_iyidx) return(0);

 70:   len = stash->n;
 71:   if(_len) *_len = len;

 73:   if(!_ixidx && !_iyidx) return(0);

 75:   if(_ixidx) {
 76:     if(!*_ixidx) {
 77:       PetscMalloc(len*sizeof(PetscInt), _ixidx);
 78:     }
 79:     ixidx = *_ixidx;
 80:   }
 81:   if(_iyidx) {
 82:     if(!*_iyidx) {
 83:       PetscMalloc(len*sizeof(PetscInt), _iyidx);
 84:     }
 85:     iyidx = *_iyidx;
 86:   }
 87:   PetscMalloc(len*sizeof(PetscInt), &kidx);
 88:   PetscHashIJGetIndices(stash->h,ixidx,iyidx,kidx);
 89:   PetscSortIntWithArrayPair(len,ixidx,iyidx,kidx);
 90:   start = 0;
 91:   while (start < len) {
 92:     end = start+1;
 93:     while (end < len && ixidx[end] == ixidx[start]) ++end;
 94:     if (end - 1 > start) { /* found 2 or more of ixidx[start] in a row */
 95:       /* order the relevant portion of iy by k */
 96:       PetscSortIntWithArray(end-start,kidx+start,iyidx+start);
 97:     }
 98:   }
 99:   PetscFree(kidx);
100:   return(0);
101: }


104: #undef  __FUNCT__
106: PetscErrorCode MatStashSeqIJClear_Private(MatStashSeqIJ stash)
107: {
110:   PetscHashIJClear(stash->h);
111:   stash->n = 0;
112:   return(0);
113: }

115: #undef  __FUNCT__
117: PetscErrorCode MatStashSeqIJDestroy_Private(MatStashSeqIJ *_stash)
118: {
120:   MatStashSeqIJ     stash = *_stash;
122:   MatStashSeqIJClear_Private(stash);
123:   PetscHashIJDestroy(&(stash->h));
124:   PetscFree(stash);
125:   *_stash = PETSC_NULL;
126:   return(0);
127: }

129: #undef  __FUNCT__
131: PetscErrorCode MatStashSeqIJSetPreallocation_Private(MatStashSeqIJ stash, PetscInt size)
132: {
133:   PetscInt       s;
136:   PetscHashIJKeySize(stash->h,&s);
137:   if(size < (PetscInt) s)
138:     SETERRQ2(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Cannot resize stash of size %D down to %D", s, size);
139:   PetscHashIJResize(stash->h,size);
140:   return(0);
141: }

143: #undef  __FUNCT__
145: PetscErrorCode MatStashMPIIJCreate_Private(PetscLayout rmap, MatStashMPIIJ *_stash)
146: {
148:   MatStashMPIIJ     stash;
150:   PetscNew(struct _MatStashMPIIJ, &stash);
151:   stash->rmap = 0;
152:   PetscLayoutReference(rmap, &(stash->rmap));
153:   MatStashSeqIJCreate_Private(&(stash->astash));
154:   MatStashSeqIJCreate_Private(&(stash->bstash));
155:   stash->assembled = PETSC_TRUE;
156:   *_stash = stash;
157:   return(0);
158: }

160: #undef  __FUNCT__
162: PetscErrorCode MatStashMPIIJDestroy_Private(MatStashMPIIJ *_stash)
163: {
165:   MatStashMPIIJ     stash = *_stash;
167:   PetscLayoutDestroy(&(stash->rmap));
168:   MatStashSeqIJDestroy_Private(&(stash->astash));
169:   MatStashSeqIJDestroy_Private(&(stash->bstash));
170:   PetscFree(stash);
171:   *_stash = PETSC_NULL;
172:   return(0);
173: }

175: #undef  __FUNCT__
177: PetscErrorCode MatStashMPIIJClear_Private(MatStashMPIIJ stash)
178: {
181:   MatStashSeqIJClear_Private(stash->astash);
182:   MatStashSeqIJClear_Private(stash->bstash);
183:   return(0);
184: }

186: #undef  __FUNCT__
188: PetscErrorCode MatStashMPIIJSetPreallocation_Private(MatStashMPIIJ stash, PetscInt asize, PetscInt bsize)
189: {
192:   MatStashSeqIJSetPreallocation_Private(stash->astash,asize);
193:   MatStashSeqIJSetPreallocation_Private(stash->bstash,bsize);
194:   return(0);
195: }

197: #undef  __FUNCT__
199: PetscErrorCode MatStashMPIIJGetMultivalued_Private(MatStashMPIIJ stash, PetscBool *_multivalued)
200: {
203:   MatStashSeqIJGetMultivalued_Private(stash->astash, _multivalued);
204:   return(0);
205: }

207: #undef  __FUNCT__
209: PetscErrorCode MatStashMPIIJSetMultivalued_Private(MatStashMPIIJ stash, PetscBool multivalued)
210: {
213:   MatStashSeqIJSetMultivalued_Private(stash->astash, multivalued);
214:   MatStashSeqIJSetMultivalued_Private(stash->bstash, multivalued);
215:   return(0);
216: }

218: #undef  __FUNCT__
220: PetscErrorCode MatStashMPIIJExtend_Private(MatStashMPIIJ stash, PetscInt len, const PetscInt *ixidx, const PetscInt *iyidx)
221: {
223:   PetscInt       i;
225:   for(i = 0; i < len; ++i) {
226:     if(ixidx[i] >= stash->rmap->rstart && ixidx[i] < stash->rmap->rend) {
227:       MatStashSeqIJExtend_Private(stash->astash,1,ixidx+i,iyidx+i);
228:     } else if(ixidx[i] && ixidx[i] < stash->rmap->N) {
229:       MatStashSeqIJExtend_Private(stash->bstash,1,ixidx+i,iyidx+i);
230:     } else SETERRQ3(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "I index %D at position %D is out of range [0,%D)", ixidx[i],i,stash->rmap->N);
231:   }
232:   stash->assembled = PETSC_FALSE;
233:   return(0);
234: }

236: #undef  __FUNCT__
238: PetscErrorCode MatStashMPIIJGetIndices_Private(MatStashMPIIJ stash, PetscInt *_alen, PetscInt **_aixidx, PetscInt **_aiyidx, PetscInt *_blen, PetscInt **_bixidx, PetscInt **_biyidx)
239: {
242:   if(!stash->assembled) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Indices requested from an unassembled stash");
243:   MatStashSeqIJGetIndices_Private(stash->astash, _alen,_aixidx, _aiyidx);
244:   MatStashSeqIJGetIndices_Private(stash->bstash, _blen,_bixidx, _biyidx);
245:   return(0);
246: }

248: #undef  __FUNCT__
250: PetscErrorCode MatStashMPIIJGetIndicesMerged_Private(MatStashMPIIJ stash, PetscInt *_len, PetscInt **_ixidx, PetscInt **_iyidx)
251: {
253:   PetscInt       len, alen, *aixidx = PETSC_NULL, *aiyidx = PETSC_NULL, blen, *bixidx = PETSC_NULL, *biyidx = PETSC_NULL;

256:   if(!stash->assembled) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Indices requested from an unassembled stash");
257:   if(!_len && !_ixidx && !_iyidx) return(0);

259:   MatStashMPIIJGetIndices_Private(stash, &alen, PETSC_NULL, PETSC_NULL, &blen, PETSC_NULL, PETSC_NULL);
260:   len = alen + blen;
261:   if(_len) *_len = len;

263:   if((!_ixidx && !_iyidx) || (!alen && !blen)) return(0);
264: 
265:   if(!_ixidx || !_iyidx) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Output arrays must be null or non-null together");

267:   if(!alen) {
268:     /* Nothing to merge from the left, so get all of the indices from the right. */
269:     MatStashMPIIJGetIndices_Private(stash,PETSC_NULL,PETSC_NULL,PETSC_NULL,_len,_ixidx,_iyidx);
270:   } else if(!blen) {
271:     /* Nothing to merge from the right, so get all of the indices from the left. */
272:     MatStashMPIIJGetIndices_Private(stash,_len,_ixidx,_iyidx,PETSC_NULL,PETSC_NULL,PETSC_NULL);
273:   } else {
274:     /* Retrieve the indices into temporary arrays to hold the indices prior to merging. */
275:     MatStashMPIIJGetIndices_Private(stash,&alen,&aixidx,&aiyidx,&blen,&bixidx,&biyidx);
276:     /* Merge. */
277:     PetscMergeIntArrayPair(alen,aixidx,aiyidx,blen,bixidx,biyidx,_len,_ixidx,_iyidx);
278:     /* Clean up. */
279:     PetscFree(aixidx);
280:     PetscFree(aiyidx);
281:     PetscFree(bixidx);
282:     PetscFree(biyidx);
283:   }
284:   return(0);
285: }

289: PetscErrorCode MatStashMPIIJAssemble_Private(MatStashMPIIJ stash)
290: {
292:   MPI_Comm       comm = stash->rmap->comm;
293:   PetscMPIInt    size, rank, tag_ij, nsends, nrecvs, *plengths, *sstarts = PETSC_NULL, *rnodes, *rlengths, *rstarts = PETSC_NULL, rlengthtotal;
294:   MPI_Request    *recv_reqs_ij, *send_reqs;
295:   MPI_Status     recv_status, *send_statuses;
296:   PetscInt       len, *ixidx = PETSC_NULL, *iyidx = PETSC_NULL;
297:   PetscInt       *owner = PETSC_NULL, p, **rindices = PETSC_NULL, *sindices= PETSC_NULL;
298:   PetscInt       low, high, idx, lastidx, count, i, j;

301:   if(stash->assembled) return(0);

303:   /* Comm parameters */
304:   MPI_Comm_size(comm,&size);
305:   MPI_Comm_rank(comm,&rank);

307:   MatStashSeqIJGetIndices_Private(stash->bstash, &len, &ixidx, &iyidx);

309:   /* Each processor ships off its ixidx[j] and iyidx[j] */
310:   /*  first count number of contributors to each processor */
311:   PetscMalloc2(size,PetscMPIInt,&plengths,len,PetscInt,&owner);
312:   PetscMemzero(plengths,size*sizeof(PetscMPIInt));
313:   lastidx = -1;
314:   count   = 0;
315:   p       = 0;
316:   low = 0; high = size-1;
317:   for(i = 0; i < len; ++i) {
318:     idx = ixidx[i];
319:     if(idx < 0 || idx >= stash->rmap->range[size]) SETERRQ2(PETSC_COMM_SELF, PETSC_ERR_ARG_OUTOFRANGE, "Index %D out of range [0,%D)", idx, stash->rmap->range[size]);
320:     if(i) {
321:       if(idx > lastidx) {
322:         /* lower bound is still valid, but the upper bound might not be.*/
323:         /*
324:          range is ordered, hence, is a subsequence of the integers.
325:          Thus, the distance between idx and lastidx in the range is no greater
326:          than the distance between them within the integers: idx - lastidx.
327:          Therefore, high raised by idx-lastidx is a valid upper bound on idx.
328:          */
329:         high = PetscMin(size, high+(idx-lastidx));
330:         /* p is the largest index in range whose value does not
331:            exceed last; since idx > lastidx, idx is located above p
332:            within range.
333:          */
334:         low = p;
335:       }
336:       if(idx < lastidx) {
337:         /* upper bound is still valid, but the lower bound might not be.*/
338:         /*
339:          range is ordered, hence, is a subsequence of the integers.
340:          Thus, the distance between idx and lastidx in range is no greater
341:          than the distance between them within the integers: lastidx - idx.
342:          Therefore, low lowered by idx-lastidx is a valid upper bound on idx.
343:          */
344:         low = PetscMax(0,low+idx-lastidx);
345:         /* p is the largest range index whose value does not exceed lastidx;
346:          since idx < lastidx, idx is located no higher than p within range */
347:         high = p;
348:       }
349:     }/* if(i) */
350:     lastidx = idx;
351:     while((high) - (low) > 1) {
352:       p = (high+low)/2;
353:       if(i < stash->rmap->range[p]) {
354:         high = p;
355:       }
356:       else {
357:         low = p;
358:       }
359:     }
360:     plengths[p]++;
361:     owner[i] = p;
362:   }/* for(i=0; i < len; ++i) */

364:   nsends = 0;  for (p=0; p<size; ++p) { nsends += (plengths[p] > 0);}
365: 
366:   /* inform other processors of number of messages and max length*/
367:   PetscGatherNumberOfMessages(comm,PETSC_NULL,plengths,&nrecvs);
368:   PetscGatherMessageLengths(comm,nsends,nrecvs,plengths,&rnodes,&rlengths);
369:   /* Sort on the the receive nodes, so we can store the received ix indices in the order they were globally specified. */
370:   PetscSortMPIIntWithArray(nrecvs,rnodes,rlengths);
371:   /* sending/receiving pairs (ixidx[i],iyidx[i]) */
372:   for (i=0; i<nrecvs; ++i) rlengths[i] *=2;

374:   PetscCommGetNewTag(stash->rmap->comm, &tag_ij);
375:   PetscPostIrecvInt(comm,tag_ij,nrecvs,rnodes,rlengths,&rindices,&recv_reqs_ij);
376:   PetscFree(rnodes);

378:   for (i=0; i<nrecvs; ++i) rlengths[i] /=2;

380:   /* prepare send buffers and offsets.
381:       sindices is the index send buffer; 
382:       sstarts[p] gives the starting offset for values going to the pth processor, if any;
383:       because PAIRS of indices are sent from the same buffer, the index offset is 2*sstarts[p].
384:   */
385:   PetscMalloc((size+1)*sizeof(PetscMPIInt),&sstarts);
386:   PetscMalloc(2*len*sizeof(PetscInt),&sindices);

388:   /* Compute buffer offsets for the segments of data going to different processors,
389:      and zero out plengths: they will be used below as running counts when packing data
390:      into send buffers; as a result of that, plengths are recomputed by the end of the loop.
391:    */
392:   sstarts[0] = 0;
393:   for (p=0; p<size; ++p) {
394:     sstarts[p+1] = sstarts[p] + plengths[p];
395:     plengths[p] = 0;
396:   }

398:   /* Now pack the indices into the appropriate buffer segments. */
399:   count = 0;
400:   for(i = 0; i < len; ++i) {
401:     p = owner[count];
402:     /* All ixidx indices first, then all iyidx: a remnant of the code that handled both 1- and 2-index cases.*/
403:     sindices[2*sstarts[p]+plengths[p]]                           = ixidx[i];
404:     sindices[2*sstarts[p]+(sstarts[p+1]-sstarts[p])+plengths[p]] = iyidx[i];
405:     ++plengths[p];
406:     ++count;
407:   }

409:   /* Allocate a send requests for the indices */
410:   PetscMalloc(nsends*sizeof(MPI_Request),&send_reqs);
411:   /* Post sends */
412:   for (p=0,count=0; p<size; ++p) {
413:     if (plengths[p]) {
414:       MPI_Isend(sindices+2*sstarts[p],2*plengths[p],MPIU_INT,p,tag_ij,comm,send_reqs+count++);
415:     }
416:   }
417:   PetscFree2(plengths,owner);
418:   PetscFree(sstarts);

420:   /* Prepare to receive indices and values. */
421:   /* Compute the offsets of the individual received segments in the unified index/value arrays. */
422:   PetscMalloc(sizeof(PetscMPIInt)*(nrecvs+1), &rstarts);
423:   rstarts[0] = 0;
424:   for(j = 0; j < nrecvs; ++j) rstarts[j+1] = rstarts[j] + rlengths[j];


427:   /*  Wait on index receives and insert them the received indices into the local stash, as necessary. */
428:   count = nrecvs;
429:   rlengthtotal = 0;
430:   while (count) {
431:     PetscMPIInt n,k;
432:     MPI_Waitany(nrecvs,recv_reqs_ij,&k,&recv_status);
433:     MPI_Get_count(&recv_status,MPIU_INT,&n);
434:     rlengthtotal += n/2;
435:     count--;
436:     MatStashSeqIJExtend_Private(stash->astash,rlengths[k],rindices[k],rindices[k]+rlengths[k]);
437:   }
438:   if (rstarts[nrecvs] != rlengthtotal) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Total message lengths %D not as expected %D",rlengthtotal,rstarts[nrecvs]);
439:   PetscFree(ixidx);
440:   PetscFree(iyidx);
441:   MatStashSeqIJClear_Private(stash->bstash);

443:   PetscFree(rlengths);
444:   PetscFree(rstarts);
445:   PetscFree(rindices[0]);
446:   PetscFree(rindices);
447:   /* wait on sends */
448:   if (nsends) {
449:     PetscMalloc(sizeof(MPI_Status)*nsends,&send_statuses);
450:     MPI_Waitall(nsends,send_reqs,send_statuses);
451:     PetscFree(send_statuses);
452:   }
453:   PetscFree(sindices);

455:   return(0);
456: }