Actual source code: hem.c

petsc-3.3-p7 2013-05-11
  1: 
  2: #include <petsc-private/matimpl.h>    /*I "petscmat.h" I*/
  3: #include <../src/mat/impls/aij/seq/aij.h>
  4: #include <../src/mat/impls/aij/mpi/mpiaij.h>
  5: #include <assert.h>

  7: /* linked list methods 
  8:  *
  9:  *  PetscCDCreate
 10:  */
 11: PetscErrorCode PetscCDCreate( PetscInt a_size, PetscCoarsenData **a_out )
 12: {
 14:   PetscCoarsenData *ail;
 15:   PetscInt ii;
 16:   /* alocate pool, partially */
 17:   PetscMalloc(sizeof(PetscCoarsenData), &ail);
 18:   *a_out = ail;
 19:   ail->pool_list.next = PETSC_NULL;
 20:   ail->pool_list.array = PETSC_NULL;
 21:   ail->chk_sz = 0;
 22:   /* allocate array */
 23:   ail->size = a_size;
 24:   PetscMalloc(a_size*sizeof(PetscCDIntNd*), &ail->array );
 25:   for(ii=0;ii<a_size;ii++) ail->array[ii] = PETSC_NULL;
 26:   ail->extra_nodes = PETSC_NULL;
 27:   ail->mat = PETSC_NULL;
 28:   ail->removedIS = PETSC_NULL;
 29:   return 0;
 30: }

 32: /* NPDestroy
 33:  */
 34: PetscErrorCode PetscCDDestroy( PetscCoarsenData *ail )
 35: {
 37:   /* delete agglist */
 38:   PetscCDArrNd *n = &ail->pool_list;
 39:   n = n->next;
 40:   while( n ){
 41:     PetscCDArrNd *lstn = n;
 42:     n = n->next;
 43:     PetscFree( lstn );
 44:   }
 45:   if( ail->pool_list.array ) {
 46:     PetscFree( ail->pool_list.array );
 47:   }
 48:   if( ail->removedIS ) {
 49:     ISDestroy( &ail->removedIS );
 50:   }
 51:   /* delete this (+array) */
 52:   PetscFree( ail->array );
 53:   /* delete this (+agg+pool array) */
 54:   PetscFree( ail );
 55:   return 0;
 56: }
 57: /* PetscCDSetChuckSize
 58:  */
 59: PetscErrorCode PetscCDSetChuckSize( PetscCoarsenData *ail, PetscInt a_sz )
 60: {
 61:   ail->chk_sz = a_sz;
 62:   return 0;
 63: }
 64: /*  PetscCDGetNewNode
 65:  */
 66: PetscErrorCode PetscCDGetNewNode( PetscCoarsenData *ail, PetscCDIntNd **a_out, PetscInt a_id )
 67: {
 69:   if( ail->extra_nodes ){
 70:     PetscCDIntNd *node = ail->extra_nodes;
 71:     ail->extra_nodes = node->next;
 72:     node->gid = a_id;
 73:     node->next = PETSC_NULL;
 74:     *a_out = node;
 75:   }
 76:   else {
 77:     if( !ail->pool_list.array ){
 78:       if( !ail->chk_sz ) ail->chk_sz = 10; /* use a chuck size of ail->size? */
 79:       PetscMalloc(ail->chk_sz*sizeof(PetscCDIntNd), &ail->pool_list.array);
 80:       ail->new_node = ail->pool_list.array;
 81:       ail->new_left = ail->chk_sz;
 82:       ail->new_node->next = PETSC_NULL;
 83:     }
 84:     else if( !ail->new_left ){
 85:       PetscCDArrNd *node;
 86:       PetscMalloc(ail->chk_sz*sizeof(PetscCDIntNd) + sizeof(PetscCDArrNd), &node );
 87:       node->array = (PetscCDIntNd*)(node + 1);
 88:       node->next = ail->pool_list.next;
 89:       ail->pool_list.next = node;
 90:       ail->new_left = ail->chk_sz;
 91:       ail->new_node = node->array;
 92:     }
 93:     ail->new_node->gid = a_id;
 94:     ail->new_node->next = PETSC_NULL;
 95:     *a_out = ail->new_node++; ail->new_left--;
 96:   }
 97:   return 0;
 98: }

100: /* PetscLLNSetID
101:  */
102: PetscErrorCode PetscLLNSetID( PetscCDIntNd *a_this, PetscInt a_id )
103: {
104:   a_this->gid = a_id;
105:   return 0;
106: }
107: /* PetscLLNGetID
108:  */
109: PetscErrorCode PetscLLNGetID( const PetscCDIntNd *a_this, PetscInt *a_gid)
110: {
111:   *a_gid = a_this->gid;
112:   return 0;
113: }
114: /* PetscCDGetHeadPos
115:  */
116: PetscErrorCode PetscCDGetHeadPos( const PetscCoarsenData *ail, PetscInt a_idx, PetscCDPos *pos )
117: {
118:   if(a_idx>=ail->size)SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"a_idx >= ail->size: a_idx=%d.",a_idx);
119:   *pos = ail->array[a_idx];
120:   return 0;
121: }
122: /* PetscCDGetNextPos
123:  */
124: PetscErrorCode PetscCDGetNextPos( const PetscCoarsenData *ail, PetscInt l_idx, PetscCDPos *pos )
125: {
126:   if(!(*pos))SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"NULL input position.");
127:   *pos = (*pos)->next;
128:   return 0;
129: }

131: /* PetscCDAppendID
132:  */
133: PetscErrorCode PetscCDAppendID( PetscCoarsenData *ail, PetscInt a_idx, PetscInt a_id )
134: {
136:   PetscCDIntNd *n,*n2;
137:   PetscCDGetNewNode( ail, &n, a_id );
138:   if(a_idx>=ail->size)SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"Index %d out of range.",a_idx);
139:   if( !(n2=ail->array[a_idx]) )  ail->array[a_idx] = n;
140:   else {
141:     do{
142:       if(!n2->next) {
143:         n2->next = n;
144:         assert(n->next == PETSC_NULL);
145:         break;
146:       }
147:       n2 = n2->next;
148:     }while(n2);
149:     assert(n2);
150:   }
151:   return 0;
152: }
153: /* PetscCDAppendNode
154:  */
155: PetscErrorCode PetscCDAppendNode( PetscCoarsenData *ail, PetscInt a_idx,  PetscCDIntNd *a_n )
156: {
157:   PetscCDIntNd *n2;
158:   if(a_idx>=ail->size)SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"Index %d out of range.",a_idx);
159:   if( !(n2=ail->array[a_idx]) )  ail->array[a_idx] = a_n;
160:   else{
161:     do{
162:       if(!n2->next) {
163:         n2->next = a_n;
164:         a_n->next = PETSC_NULL;
165:         break;
166:       }
167:       n2 = n2->next;
168:     }while(n2);
169:     assert(n2);
170:   }
171:   return 0;
172: }

174: /* PetscCDRemoveNextNode: a_last->next, this exposes single linked list structure to API
175:  */
176: PetscErrorCode PetscCDRemoveNextNode( PetscCoarsenData *ail, PetscInt a_idx,  PetscCDIntNd *a_last )
177: {
178:   PetscCDIntNd *del;
179:   if(a_idx>=ail->size)SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"Index %d out of range.",a_idx);
180:   assert(a_last->next);
181:   del = a_last->next;
182:   a_last->next = del->next;
183:   /* del->next = PETSC_NULL; -- this still used in a iterator so keep it intact -- need to fix this with a double linked list */
184:   /* could reuse n2 but PetscCDAppendNode sometimes uses it */
185:   return 0;
186: }

188: /* PetscCDPrint
189:  */
192: PetscErrorCode PetscCDPrint( const PetscCoarsenData *ail, MPI_Comm comm )
193: {
195:   PetscCDIntNd *n;
196:   PetscInt ii,kk;
197:   PetscMPIInt    mype;

200:   MPI_Comm_rank( comm, &mype );
201:   for(ii=0;ii<ail->size;ii++){
202:     kk = 0;
203:     n = ail->array[ii];
204:     if(n)PetscPrintf(comm,"[%d]%s list %d:\n",mype,__FUNCT__,ii);
205:     while(n){
206:       PetscPrintf(comm,"\t[%d] %d) id %d\n",mype,++kk,n->gid);
207:       n = n->next;
208:     }
209:   }
210:   return(0);
211: }
212: /* PetscCDAppendRemove
213:  */
214: PetscErrorCode PetscCDAppendRemove(PetscCoarsenData *ail, PetscInt a_destidx, PetscInt a_srcidx)
215: {
216:   PetscCDIntNd *n;
217:   if(a_srcidx>=ail->size)SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"Index %d out of range.",a_srcidx);
218:   if(a_destidx>=ail->size)SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"Index %d out of range.",a_destidx);
219:   if(a_destidx==a_srcidx)SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"a_destidx==a_srcidx %d.",a_destidx);
220:   n = ail->array[a_destidx];
221:   if( !n  ) ail->array[a_destidx] = ail->array[a_srcidx];
222:   else {
223:     do{
224:       if( !n->next ){
225:         n->next = ail->array[a_srcidx];
226:         break;
227:       }
228:       n = n->next;
229:     }while( 1 );
230:   }
231:   ail->array[a_srcidx] = PETSC_NULL;
232:   return 0;
233: }

235: /* PetscCDRemoveAll
236:  */
237: PetscErrorCode PetscCDRemoveAll( PetscCoarsenData *ail, PetscInt a_idx )
238: {
239:   PetscCDIntNd *rem,*n1;
240:   if(a_idx>=ail->size)SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"Index %d out of range.",a_idx);
241:   rem = ail->array[a_idx];
242:   ail->array[a_idx] = PETSC_NULL;
243:   if(!(n1=ail->extra_nodes)) ail->extra_nodes = rem;
244:   else {
245:     while( n1->next ) n1 = n1->next;
246:     n1->next = rem;
247:   }
248:   return 0;
249: }

251: /* PetscCDSizeAt
252:  */
253: PetscErrorCode PetscCDSizeAt( const PetscCoarsenData *ail, PetscInt a_idx, PetscInt *a_sz )
254: {
255:   PetscCDIntNd *n1;
256:   PetscInt sz = 0;
257:   if(a_idx>=ail->size)SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"Index %d out of range.",a_idx);
258:   n1 = ail->array[a_idx];
259:   while(n1){
260:     n1 = n1->next;
261:     sz++;
262:   }
263:   *a_sz = sz;
264:   return 0;
265: }

267: /* PetscCDEmptyAt
268:  */
269: PetscErrorCode PetscCDEmptyAt( const PetscCoarsenData *ail, PetscInt a_idx, PetscBool *a_e )
270: {
271:   if(a_idx>=ail->size)SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_USER,"Index %d out of range.",a_idx);
272:   *a_e = (PetscBool)(ail->array[a_idx]==PETSC_NULL);
273:   return 0;
274: }

276: /* PetscCDGetMIS
277:  */
278: PetscErrorCode PetscCDGetMIS( PetscCoarsenData *ail, IS *a_mis )
279: {
281:   PetscCDIntNd *n;
282:   PetscInt ii,kk;
283:   PetscInt *permute;

285:   for(ii=kk=0;ii<ail->size;ii++){
286:     n = ail->array[ii];
287:     if(n) kk++;
288:   }
289:   PetscMalloc( kk*sizeof(PetscInt), &permute );
290:   for(ii=kk=0;ii<ail->size;ii++){
291:     n = ail->array[ii];
292:     if(n) permute[kk++] = ii;
293:   }
294:   ISCreateGeneral(PETSC_COMM_SELF, kk, permute, PETSC_OWN_POINTER, a_mis);
295: 

297:   return 0;
298: }
299: /* PetscCDGetMat
300:  */
301: PetscErrorCode PetscCDGetMat( const PetscCoarsenData *ail, Mat *a_mat )
302: {
303:   *a_mat = ail->mat;
304:   return 0;
305: }

307: /* PetscCDSetMat
308:  */
309: PetscErrorCode PetscCDSetMat( PetscCoarsenData *ail, Mat a_mat )
310: {
311:   ail->mat = a_mat;
312:   return 0;
313: }


316: /* PetscCDGetASMBlocks
317:  */
318: PetscErrorCode PetscCDGetASMBlocks( const PetscCoarsenData *ail, const PetscInt a_bs, PetscInt *a_sz, IS **a_local_is )
319: {
321:   PetscCDIntNd *n;
322:   PetscInt lsz,ii,kk,*idxs,jj;
323:   IS *is_loc;
324: 
325:   for(ii=kk=0;ii<ail->size;ii++){
326:     if(ail->array[ii]) kk++;
327:   }
328:   *a_sz = kk; /* out */

330:   PetscMalloc( kk*sizeof(IS*), &is_loc );
331:   *a_local_is = is_loc; /* out */
332: 
333:   for(ii=kk=0;ii<ail->size;ii++){
334:     for( lsz=0, n=ail->array[ii] ; n ; lsz++, n=n->next ) /* void */;
335:     if( lsz ){
336:       PetscMalloc( a_bs*lsz*sizeof(PetscInt), &idxs );
337:       for( lsz = 0, n=ail->array[ii] ; n ; n = n->next) {
338:         PetscInt gid;
339:         PetscLLNGetID( n, &gid );
340:         for(jj=0;jj<a_bs;lsz++,jj++) idxs[lsz] = a_bs*gid + jj;
341:       }
342:       ISCreateGeneral(PETSC_COMM_SELF, lsz, idxs, PETSC_OWN_POINTER, &is_loc[kk++] );
343: 
344:     }
345:   }
346:   assert(*a_sz == kk);

348:   return 0;
349: }


352: /* PetscCDSetRemovedIS
353:  */
354: PetscErrorCode PetscCDSetRemovedIS( PetscCoarsenData *ail, MPI_Comm comm, const PetscInt a_sz, PetscInt a_ids[])
355: {

358:   if( ail->removedIS ) {
359:     ISDestroy( &ail->removedIS);
360:   }
361:   ISCreateGeneral( comm, a_sz, a_ids, PETSC_COPY_VALUES, &ail->removedIS );
362:   return 0;
363: }

365: /* PetscCDGetRemovedIS
366:  */
367: PetscErrorCode PetscCDGetRemovedIS( PetscCoarsenData *ail, IS *a_is )
368: {
369:   *a_is = ail->removedIS;
370:   ail->removedIS = PETSC_NULL; /* hack to relinquish ownership */
371:   return 0;
372: }

374: /* ********************************************************************** */
375: /* edge for priority queue */
376: typedef struct edge_tag{
377:   PetscReal   weight;
378:   PetscInt    lid0,gid1,cpid1;
379: }Edge;

381: int gamg_hem_compare (const void *a, const void *b)
382: {
383:   PetscReal va = ((Edge*)a)->weight, vb = ((Edge*)b)->weight;
384:   return (va < vb) ? 1 : (va == vb) ? 0 : -1; /* 0 for equal */
385: }

387: /* -------------------------------------------------------------------------- */
388: /*
389:    heavyEdgeMatchAgg - parallel heavy edge matching (HEM). MatAIJ specific!!!

391:    Input Parameter:
392:    . perm - permutation
393:    . a_Gmat - glabal matrix of graph (data not defined)
394:    . verbose - 
395:    Output Parameter:
396:    . a_locals_llist - array of list of local nodes rooted at local node
397: */
400: PetscErrorCode heavyEdgeMatchAgg( const IS perm,
401:                                   const Mat a_Gmat,
402:                                   const PetscInt verbose, 
403:                                   PetscCoarsenData **a_locals_llist
404:                                   )
405: {
407:   PetscBool      isMPI;
408:   MPI_Comm       wcomm = ((PetscObject)a_Gmat)->comm;
409:   PetscInt       sub_it,kk,n,ix,*idx,*ii,iter,Iend,my0;
410:   PetscMPIInt    mype,npe;
411:   const PetscInt nloc = a_Gmat->rmap->n,n_iter=6; /* need to figure out how to stop this */
412:   PetscInt      *lid_cprowID,*lid_gid;
413:   PetscBool     *lid_matched;
414:   Mat_SeqAIJ    *matA, *matB=0;
415:   Mat_MPIAIJ    *mpimat=0;
416:   PetscScalar    one=1.;
417:   PetscCoarsenData *agg_llists,*deleted_list;
418:   Mat            cMat,tMat,P;
419:   MatScalar     *ap;
420:   PetscMPIInt    tag1,tag2;

423:   MPI_Comm_rank( wcomm, &mype );
424:   MPI_Comm_size( wcomm, &npe );
425:   MatGetOwnershipRange( a_Gmat, &my0, &Iend );
426:   PetscCommGetNewTag( wcomm, &tag1 );
427:   PetscCommGetNewTag( wcomm, &tag2 );

429:   PetscMalloc( nloc*sizeof(PetscInt), &lid_gid );  /* explicit array needed */
430:   PetscMalloc( nloc*sizeof(PetscInt), &lid_cprowID );
431:   PetscMalloc( nloc*sizeof(PetscBool), &lid_matched );

433:   PetscCDCreate( nloc, &agg_llists );
434:   /* PetscCDSetChuckSize( agg_llists, nloc+1 );  */
435:   *a_locals_llist = agg_llists;
436:   PetscCDCreate( npe, &deleted_list );
437:   PetscCDSetChuckSize( deleted_list, 100 );
438:   /* setup 'lid_gid' for scatters and add self to all lists */
439:   for(kk=0;kk<nloc;kk++) {
440:     lid_gid[kk] = kk + my0;
441:     PetscCDAppendID( agg_llists, kk, my0+kk );
442:   }

444:   /* make a copy of the graph, this gets destroyed in iterates */
445:   MatDuplicate(a_Gmat,MAT_COPY_VALUES,&cMat);
446:   PetscObjectTypeCompare( (PetscObject)a_Gmat, MATMPIAIJ, &isMPI );
447:   iter = 0;
448:   while( iter++ < n_iter ) {
449:     PetscScalar *cpcol_gid,*cpcol_max_ew,*cpcol_max_pe,*lid_max_ew;
450:     PetscBool   *cpcol_matched;
451:     PetscMPIInt *cpcol_pe,proc;
452:     Vec          locMaxEdge,locMaxPE,ghostMaxEdge,ghostMaxPE;
453:     PetscInt     nEdges,n_nz_row,jj;
454:     Edge        *Edges;
455:     PetscInt     gid;
456:     const PetscInt *perm_ix, n_sub_its = 120;
457: 
458:     /* get submatrices of cMat */
459:     if (isMPI) {
460:       mpimat = (Mat_MPIAIJ*)cMat->data;
461:       matA = (Mat_SeqAIJ*)mpimat->A->data;
462:       matB = (Mat_SeqAIJ*)mpimat->B->data;
463:       /* force compressed storage of B */
464:       matB->compressedrow.check = PETSC_TRUE;
465:       MatCheckCompressedRow(mpimat->B,&matB->compressedrow,matB->i,cMat->rmap->n,-1.0);
466:       assert( matB->compressedrow.use );
467:     } else {
468:       matA = (Mat_SeqAIJ*)cMat->data;
469:     }
470:     assert( matA && !matA->compressedrow.use );
471:     assert( matB==0 || matB->compressedrow.use );

473:     /* set max edge on nodes */
474:     MatGetVecs( cMat, &locMaxEdge, 0 );
475:     MatGetVecs( cMat, &locMaxPE, 0 );
476: 
477:     /* get 'cpcol_pe' & 'cpcol_gid' & init. 'cpcol_matched' using 'mpimat->lvec' */
478:     if( mpimat ) {
479:       Vec vec; PetscScalar vval;

481:       MatGetVecs( cMat, &vec, 0 );
482:       /* cpcol_pe */
483:       vval = (PetscScalar)(mype);
484:       for(kk=0,gid=my0;kk<nloc;kk++,gid++) {
485:         VecSetValues( vec, 1, &gid, &vval, INSERT_VALUES );   /* set with GID */
486:       }
487:       VecAssemblyBegin( vec );
488:       VecAssemblyEnd( vec );
489:       VecScatterBegin(mpimat->Mvctx,vec,mpimat->lvec,INSERT_VALUES,SCATTER_FORWARD);
490:         VecScatterEnd(mpimat->Mvctx,vec,mpimat->lvec,INSERT_VALUES,SCATTER_FORWARD);
491:       VecGetArray( mpimat->lvec, &cpcol_gid );  /* get proc ID in 'cpcol_gid' */
492:       VecGetLocalSize( mpimat->lvec, &n );
493:       PetscMalloc( n*sizeof(PetscInt), &cpcol_pe );
494:       for(kk=0;kk<n;kk++) cpcol_pe[kk] = (PetscMPIInt)PetscRealPart(cpcol_gid[kk]);
495:       VecRestoreArray( mpimat->lvec, &cpcol_gid );

497:       /* cpcol_gid */
498:       for(kk=0,gid=my0;kk<nloc;kk++,gid++) {
499:         vval = (PetscScalar)(gid);
500:         VecSetValues( vec, 1, &gid, &vval, INSERT_VALUES );   /* set with GID */
501:       }
502:       VecAssemblyBegin( vec );
503:       VecAssemblyEnd( vec );
504:       VecScatterBegin(mpimat->Mvctx,vec,mpimat->lvec,INSERT_VALUES,SCATTER_FORWARD);
505:         VecScatterEnd(mpimat->Mvctx,vec,mpimat->lvec,INSERT_VALUES,SCATTER_FORWARD);
506:       VecDestroy( &vec );
507:       VecGetArray( mpimat->lvec, &cpcol_gid );  /* get proc ID in 'cpcol_gid' */

509:       /* cpcol_matched */
510:       VecGetLocalSize( mpimat->lvec, &n );
511:       PetscMalloc( n*sizeof(PetscBool), &cpcol_matched );
512:       for(kk=0;kk<n;kk++) cpcol_matched[kk] = PETSC_FALSE;
513:     }

515:     /* need an inverse map - locals */
516:     for(kk=0;kk<nloc;kk++) lid_cprowID[kk] = -1;
517:     /* set index into compressed row 'lid_cprowID' */
518:     if( matB ) {
519:       ii = matB->compressedrow.i;
520:       for (ix=0; ix<matB->compressedrow.nrows; ix++) {
521:         lid_cprowID[matB->compressedrow.rindex[ix]] = ix;
522:       }
523:     }

525:     /* get removed IS, use '' */
526:     if( iter==1 ) {
527:       PetscInt *lid_rem,idx;
528:       PetscMalloc( nloc*sizeof(PetscInt), &lid_rem );
529:       for(kk=idx=0;kk<nloc;kk++){
530:         PetscInt nn,lid=kk;
531:         ii = matA->i; nn = ii[lid+1] - ii[lid];
532:         if( (ix=lid_cprowID[lid]) != -1 ) { /* if I have any ghost neighbors */
533:           ii = matB->compressedrow.i;
534:           nn += ii[ix+1] - ii[ix];
535:         }
536:         if( nn < 2 ) {
537:           lid_rem[idx++] = kk + my0;
538:         }
539:       }
540:       PetscCDSetRemovedIS( agg_llists, wcomm, idx, lid_rem );
541:       PetscFree( lid_rem );
542:     }

544:     /* compute 'locMaxEdge' & 'locMaxPE', and create list of edges, count edges' */
545:     for(nEdges=0,kk=0,gid=my0;kk<nloc;kk++,gid++){
546:       PetscReal max_e = 0., tt;
547:       PetscScalar vval;
548:       PetscInt lid = kk;
549:       PetscMPIInt max_pe=mype,pe;
550:       ii = matA->i; n = ii[lid+1] - ii[lid]; idx = matA->j + ii[lid];
551:       ap = matA->a + ii[lid];
552:       for (jj=0; jj<n; jj++) {
553:         PetscInt lidj = idx[jj];
554:         if(lidj != lid && PetscRealPart(ap[jj]) > max_e ) max_e = PetscRealPart(ap[jj]);
555:         if(lidj > lid) nEdges++;
556:       }
557:       if( (ix=lid_cprowID[lid]) != -1 ) { /* if I have any ghost neighbors */
558:         ii = matB->compressedrow.i; n = ii[ix+1] - ii[ix];
559:         ap = matB->a + ii[ix];
560:         idx = matB->j + ii[ix];
561:         for( jj=0 ; jj<n ; jj++ ) {
562:           if( (tt=PetscRealPart(ap[jj])) > max_e ) max_e = tt;
563:           nEdges++;
564:           if( (pe=cpcol_pe[idx[jj]]) > max_pe ) max_pe = pe;
565:         }
566:       }
567:       vval = max_e;
568:       VecSetValues( locMaxEdge, 1, &gid, &vval, INSERT_VALUES );
569: 
570:       vval = (PetscScalar)max_pe;
571:       VecSetValues( locMaxPE, 1, &gid, &vval, INSERT_VALUES );
572:     }
573:     VecAssemblyBegin( locMaxEdge );
574:     VecAssemblyEnd( locMaxEdge );
575:     VecAssemblyBegin( locMaxPE );
576:     VecAssemblyEnd( locMaxPE );

578:     /* get 'cpcol_max_ew' & 'cpcol_max_pe' */
579:     if( mpimat ) {
580:       VecDuplicate( mpimat->lvec, &ghostMaxEdge );
581:       VecScatterBegin(mpimat->Mvctx,locMaxEdge,ghostMaxEdge,INSERT_VALUES,SCATTER_FORWARD);
582:         VecScatterEnd(mpimat->Mvctx,locMaxEdge,ghostMaxEdge,INSERT_VALUES,SCATTER_FORWARD);
583:       VecGetArray( ghostMaxEdge, &cpcol_max_ew );
584: 
585:       VecDuplicate( mpimat->lvec, &ghostMaxPE );
586:       VecScatterBegin(mpimat->Mvctx,locMaxPE,ghostMaxPE,INSERT_VALUES,SCATTER_FORWARD);
587:         VecScatterEnd(mpimat->Mvctx,locMaxPE,ghostMaxPE,INSERT_VALUES,SCATTER_FORWARD);
588:       VecGetArray( ghostMaxPE, &cpcol_max_pe );
589:     }

591:     /* setup sorted list of edges */
592:     PetscMalloc( nEdges*sizeof(Edge), &Edges );
593:     ISGetIndices( perm, &perm_ix );
594:     for(nEdges=n_nz_row=kk=0;kk<nloc;kk++){
595:       PetscInt nn, lid = perm_ix[kk];
596:       ii = matA->i; nn = n = ii[lid+1] - ii[lid]; idx = matA->j + ii[lid];
597:       ap = matA->a + ii[lid];
598:       for (jj=0; jj<n; jj++) {
599:         PetscInt lidj = idx[jj];        assert(PetscRealPart(ap[jj])>0.);
600:         if(lidj > lid) {
601:           Edges[nEdges].lid0 = lid;
602:           Edges[nEdges].gid1 = lidj + my0;
603:           Edges[nEdges].cpid1 = -1;
604:           Edges[nEdges].weight = PetscRealPart(ap[jj]);
605:           nEdges++;
606:         }
607:       }
608:       if( (ix=lid_cprowID[lid]) != -1 ) { /* if I have any ghost neighbors */
609:         ii = matB->compressedrow.i; n = ii[ix+1] - ii[ix];
610:         ap = matB->a + ii[ix];
611:         idx = matB->j + ii[ix];
612:         nn += n;
613:         for( jj=0 ; jj<n ; jj++ ) {
614:           assert(PetscRealPart(ap[jj])>0.);
615:           Edges[nEdges].lid0 = lid;
616:           Edges[nEdges].gid1 = (PetscInt)PetscRealPart(cpcol_gid[idx[jj]]);
617:           Edges[nEdges].cpid1 = idx[jj];
618:           Edges[nEdges].weight = PetscRealPart(ap[jj]);
619:           nEdges++;
620:         }
621:       }
622:       if( nn > 1 ) n_nz_row++;
623:       else if( iter == 1 ){
624:         /* should select this because it is technically in the MIS but lets not */
625:         PetscCDRemoveAll( agg_llists, lid );
626:       }
627:     }
628:     ISRestoreIndices(perm,&perm_ix);

630:     qsort( Edges, nEdges, sizeof(Edge), gamg_hem_compare );

632:     /* projection matrix */
633:     MatCreateAIJ( wcomm, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE, 1, 0, 1, 0, &P );
634: 

636:     /* clear matched flags */
637:     for(kk=0;kk<nloc;kk++) lid_matched[kk] = PETSC_FALSE;
638:     /* process - communicate - process */
639:     for(sub_it=0;sub_it<n_sub_its;sub_it++){
640:       PetscInt nactive_edges;
641: 
642:       VecGetArray( locMaxEdge, &lid_max_ew );
643:       for(kk=nactive_edges=0;kk<nEdges;kk++){
644:         /* HEM */
645:         const Edge *e = &Edges[kk];
646:         const PetscInt lid0=e->lid0,gid1=e->gid1,cpid1=e->cpid1,gid0=lid0+my0,lid1=gid1-my0;
647:         PetscBool isOK = PETSC_TRUE;

649:         /* skip if either (local) vertex is done already */
650:         if( lid_matched[lid0] || (gid1>=my0 && gid1<Iend && lid_matched[gid1-my0]) ) {
651:           continue;
652:         }
653:         /* skip if ghost vertex is done */
654:         if( cpid1 != -1 && cpcol_matched[cpid1] ) {
655:           continue;
656:         }

658:         nactive_edges++;
659:         /* skip if I have a bigger edge someplace (lid_max_ew gets updated) */
660:         if( PetscRealPart(lid_max_ew[lid0]) > e->weight + 1.e-12 ) {
661:           continue;
662:         }
663: 
664:         if( cpid1 == -1 ) {
665:           if( PetscRealPart(lid_max_ew[lid1]) > e->weight + 1.e-12 ) {
666:             continue;
667:           }
668:         }
669:         else {
670:           /* see if edge might get matched on other proc */
671:           PetscReal g_max_e = PetscRealPart(cpcol_max_ew[cpid1]);
672:           if( g_max_e > e->weight + 1.e-12 ) {
673:             continue;
674:           }
675:           /* check for max_e == to this edge and larger processor that will deal with this */
676:           else if( e->weight > g_max_e - 1.e-12 && (PetscMPIInt)PetscRealPart(cpcol_max_pe[cpid1]) > mype ) {
677:             continue;
678:           }
679:         }

681:         /* check ghost for v0 */
682:         if( isOK ){
683:           PetscReal max_e,ew;
684:           if( (ix=lid_cprowID[lid0]) != -1 ) { /* if I have any ghost neighbors */
685:             ii = matB->compressedrow.i; n = ii[ix+1] - ii[ix];
686:             ap = matB->a + ii[ix];
687:             idx = matB->j + ii[ix];
688:             for( jj=0 ; jj<n && isOK; jj++ ) {
689:               PetscInt lidj = idx[jj];
690:               if( cpcol_matched[lidj] ) continue;
691:               ew = PetscRealPart(ap[jj]); max_e = PetscRealPart(cpcol_max_ew[lidj]);
692:               /* check for max_e == to this edge and larger processor that will deal with this */
693:               if( ew > max_e - 1.e-12 && ew > PetscRealPart(lid_max_ew[lid0]) - 1.e-12 &&
694:                   (PetscMPIInt)PetscRealPart(cpcol_max_pe[lidj]) > mype )
695:               {
696:                 isOK = PETSC_FALSE;
697:               }
698:             }
699:           }

701:           /* for v1 */
702:           if( cpid1 == -1 && isOK ){
703:             if( (ix=lid_cprowID[lid1]) != -1 ) { /* if I have any ghost neighbors */
704:               ii = matB->compressedrow.i; n = ii[ix+1] - ii[ix];
705:               ap = matB->a + ii[ix];
706:               idx = matB->j + ii[ix];
707:               for( jj=0 ; jj<n && isOK ; jj++ ) {
708:                 PetscInt lidj = idx[jj];
709:                 if( cpcol_matched[lidj] ) continue;
710:                 ew = PetscRealPart(ap[jj]); max_e = PetscRealPart(cpcol_max_ew[lidj]);
711:                 /* check for max_e == to this edge and larger processor that will deal with this */
712:                 if( ew > max_e - 1.e-12 && ew > PetscRealPart(lid_max_ew[lid1]) - 1.e-12 &&
713:                     (PetscMPIInt)PetscRealPart(cpcol_max_pe[lidj]) > mype ) {
714:                   isOK = PETSC_FALSE;
715:                 }
716:               }
717:             }
718:           }
719:         }

721:         /* do it */
722:         if( isOK ){
723:           if( cpid1 == -1 ) {
724:             lid_matched[lid1] = PETSC_TRUE;  /* keep track of what we've done this round */
725:             PetscCDAppendRemove( agg_llists, lid0, lid1 );
726:           }
727:           else if( sub_it != n_sub_its-1 ) {
728:             /* add gid1 to list of ghost deleted by me -- I need their children */
729:             proc = cpcol_pe[cpid1];
730:             cpcol_matched[cpid1] = PETSC_TRUE; /* messing with VecGetArray array -- needed??? */
731:             PetscCDAppendID( deleted_list, proc, cpid1 );  /* cache to send messages */
732:             PetscCDAppendID( deleted_list, proc, lid0 );
733:           }
734:           else {
735:             continue;
736:           }
737:           lid_matched[lid0] = PETSC_TRUE; /* keep track of what we've done this round */
738:           /* set projection */
739:           MatSetValues(P,1,&gid0,1,&gid0,&one,INSERT_VALUES);
740:           MatSetValues(P,1,&gid1,1,&gid0,&one,INSERT_VALUES);
741:         } /* matched */
742:       } /* edge loop */

744:       /* deal with deleted ghost on first pass */
745:       if(npe>1 && sub_it != n_sub_its-1 ){
746:         PetscCDPos pos;  PetscBool ise;
747:         PetscInt nSend1, **sbuffs1,nSend2;
748: #define REQ_BF_SIZE 100
749:         MPI_Request *sreqs2[REQ_BF_SIZE],*rreqs2[REQ_BF_SIZE];
750:         MPI_Status status;
751: 
752:         /* send request */
753:         for(proc=0,nSend1=0;proc<npe;proc++){
754:           PetscCDEmptyAt(deleted_list,proc,&ise);
755:           if( !ise ) nSend1++;
756:         }
757:         PetscMalloc( nSend1*sizeof(PetscInt*), &sbuffs1 );
758:         /* PetscMalloc4( nSend1, PetscInt*, sbuffs1, nSend1, PetscInt*, rbuffs1, nSend1, MPI_Request*, sreqs1, nSend1, MPI_Request*, rreqs1 );   */
759:         /* PetscFree4(sbuffs1,rbuffs1,sreqs1,rreqs1); */
760:         for(proc=0,nSend1=0;proc<npe;proc++){
761:           /* count ghosts */
762:           PetscCDSizeAt(deleted_list,proc,&n);
763:           if(n>0){
764: #define CHUNCK_SIZE 100
765:             PetscInt *sbuff,*pt;
766:             MPI_Request *request;
767:             assert(n%2==0);
768:             n /= 2;
769:             PetscMalloc( (2 + 2*n + n*CHUNCK_SIZE)*sizeof(PetscInt) + 2*sizeof(MPI_Request), &sbuff );
770:             /* PetscMalloc4(2+2*n,PetscInt,sbuffs1[nSend1],n*CHUNCK_SIZE,PetscInt,rbuffs1[nSend1],1,MPI_Request,rreqs2[nSend1],1,MPI_Request,sreqs2[nSend1]); */
771: 
772:             /* save requests */
773:             sbuffs1[nSend1] = sbuff;
774:             request = (MPI_Request*)sbuff;
775:             sbuff = pt = (PetscInt*)(request+1);
776:             *pt++ = n; *pt++ = mype;

778:             PetscCDGetHeadPos(deleted_list,proc,&pos);
779:             while(pos){
780:               PetscInt lid0, cpid, gid;
781:               PetscLLNGetID( pos, &cpid );
782:               gid = (PetscInt)PetscRealPart(cpcol_gid[cpid]);
783:               PetscCDGetNextPos(deleted_list,proc,&pos);
784:               PetscLLNGetID( pos, &lid0 );
785:               PetscCDGetNextPos(deleted_list,proc,&pos);
786:               *pt++ = gid; *pt++ = lid0;
787:             }
788:             /* send request tag1 [n, proc, n*[gid1,lid0] ] */
789:             MPI_Isend(sbuff, 2*n+2, MPIU_INT, proc, tag1, wcomm, request);
790:             /* post recieve */
791:             request = (MPI_Request*)pt;
792:             rreqs2[nSend1] = request; /* cache recv request */
793:             pt = (PetscInt*)(request+1);
794:             MPI_Irecv( pt, n*CHUNCK_SIZE, MPIU_INT, proc, tag2, wcomm, request);
795:             /* clear list */
796:             PetscCDRemoveAll( deleted_list, proc );
797:             nSend1++;
798:           }
799:         }
800:         /* recieve requests, send response, clear lists */
801:         kk = nactive_edges;
802:         MPI_Allreduce(&kk,&nactive_edges,1,MPIU_INT,MPI_SUM,wcomm); /* not correct syncronization and global */
803:         nSend2 = 0;
804:         while( 1 ){
805: #define BF_SZ 10000
806:           PetscMPIInt flag,count;
807:           PetscInt rbuff[BF_SZ],*pt,*pt2,*pt3,count2,*sbuff,count3;
808:           MPI_Request *request;
809:           MPI_Iprobe( MPI_ANY_SOURCE, tag1, wcomm, &flag, &status );
810:           if(!flag) break;
811:           MPI_Get_count( &status, MPIU_INT, &count );
812:           if(count > BF_SZ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"buffer too small for recieve: %d",count);
813:           proc = status.MPI_SOURCE;
814:           /* recieve request tag1 [n, proc, n*[gid1,lid0] ] */
815:           MPI_Recv( rbuff, count, MPIU_INT, proc, tag1, wcomm, &status );
816:           /* count sends */
817:           pt = rbuff; count3 = count2 = 0;
818:           n = *pt++; kk = *pt++;           assert(kk==proc);
819:           while(n--){
820:             PetscInt gid1=*pt++, lid1=gid1-my0; kk=*pt++;  assert(lid1>=0 && lid1<nloc);
821:             if(lid_matched[lid1]){
822:               PetscPrintf(PETSC_COMM_SELF,"\t *** [%d]%s %d) ERROR recieved deleted gid %d, deleted by (lid) %d from proc %d\n",mype,__FUNCT__,sub_it,gid1,kk);
823:               PetscSleep(1);
824:             }
825:             assert(!lid_matched[lid1]);
826:             lid_matched[lid1] = PETSC_TRUE; /* keep track of what we've done this round */
827:             PetscCDSizeAt( agg_llists, lid1, &kk );
828:             count2 += kk + 2;
829:             count3++; /* number of verts requested (n) */
830:           }
831:           assert(pt-rbuff==count);
832:           if(count2 > count3*CHUNCK_SIZE) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Irecv will be too small: %d",count2);
833:           /* send tag2 *[lid0, n, n*[gid] ] */
834:           PetscMalloc( count2*sizeof(PetscInt) + sizeof(MPI_Request), &sbuff );
835:           request = (MPI_Request*)sbuff;
836:           sreqs2[nSend2++] = request; /* cache request */
837:           if(nSend2==REQ_BF_SIZE) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"buffer too small for requests: %d",nSend2);
838:           pt2 = sbuff = (PetscInt*)(request+1);
839:           pt  = rbuff;
840:           n = *pt++; kk = *pt++;           assert(kk==proc);
841:           while(n--){
842:             /* read [n, proc, n*[gid1,lid0] */
843:             PetscInt gid1=*pt++, lid1=gid1-my0, lid0=*pt++;   assert(lid1>=0 && lid1<nloc);
844:             /* write [lid0, n, n*[gid] ] */
845:             *pt2++ = lid0;
846:             pt3 = pt2++; /* save pointer for later */
847:             /* for(pos=PetscCDGetHeadPos(agg_llists,lid1) ; pos ; pos=PetscCDGetNextPos(agg_llists,lid1,pos)){ */
848:             PetscCDGetHeadPos(agg_llists,lid1,&pos);
849:             while(pos){
850:               PetscInt gid;
851:               PetscLLNGetID( pos, &gid );
852:               PetscCDGetNextPos(agg_llists,lid1,&pos);
853:               *pt2++ = gid;
854:             }
855:             *pt3 = (pt2-pt3)-1;
856:             /* clear list */
857:             PetscCDRemoveAll( agg_llists, lid1 );
858:           }
859:           assert(pt2-sbuff==count2); assert(pt-rbuff==count);
860:           /* send requested data tag2 *[lid0, n, n*[gid1] ] */
861:           MPI_Isend(sbuff, count2, MPIU_INT, proc, tag2, wcomm, request);
862:         }
863: 
864:         /* recieve tag2 *[lid0, n, n*[gid] ] */
865:         for(kk=0;kk<nSend1;kk++){
866:           PetscMPIInt count;
867:           MPI_Request *request;
868:           PetscInt *pt, *pt2;
869:           request = rreqs2[kk]; /* no need to free -- buffer is in 'sbuffs1' */
870:           MPI_Wait( request, &status );
871:           MPI_Get_count( &status, MPIU_INT, &count );
872:           pt = pt2 = (PetscInt*)(request+1);
873:           while(pt-pt2 < count){
874:             PetscInt lid0 = *pt++, n = *pt++;           assert(lid0>=0 && lid0<nloc);
875:             while(n--){
876:               PetscInt gid1 = *pt++;
877:               PetscCDAppendID( agg_llists, lid0, gid1 );
878:             }
879:           }
880:           assert(pt-pt2==count);
881:         }
882: 
883:         /* wait for tag1 isends */
884:         while( nSend1-- ){
885:           MPI_Request *request;
886:           request = (MPI_Request*)sbuffs1[nSend1];
887:           MPI_Wait( request, &status );
888:           PetscFree( request );
889:         }
890:         PetscFree( sbuffs1 );
891: 
892:         /* wait for tag2 isends */
893:         while( nSend2-- ){
894:           MPI_Request *request = sreqs2[nSend2];
895:           MPI_Wait( request, &status );
896:           PetscFree( request );
897:         }
898: 
899:         VecRestoreArray( ghostMaxEdge, &cpcol_max_ew );
900:         VecRestoreArray( ghostMaxPE, &cpcol_max_pe );

902:         /* get 'cpcol_matched' - use locMaxPE, ghostMaxEdge, cpcol_max_ew */
903:         for(kk=0,gid=my0;kk<nloc;kk++,gid++) {
904:           PetscScalar vval = lid_matched[kk] ? 1.0 : 0.0;
905:           VecSetValues( locMaxPE, 1, &gid, &vval, INSERT_VALUES );   /* set with GID */
906:         }
907:         VecAssemblyBegin( locMaxPE );
908:         VecAssemblyEnd( locMaxPE );
909:         VecScatterBegin(mpimat->Mvctx,locMaxPE,ghostMaxEdge,INSERT_VALUES,SCATTER_FORWARD);
910:           VecScatterEnd(mpimat->Mvctx,locMaxPE,ghostMaxEdge,INSERT_VALUES,SCATTER_FORWARD);
911:         VecGetArray( ghostMaxEdge, &cpcol_max_ew );
912:         VecGetLocalSize( mpimat->lvec, &n );
913:         for(kk=0;kk<n;kk++) {
914:           cpcol_matched[kk] = (PetscBool)(PetscRealPart(cpcol_max_ew[kk]) != 0.0);
915:         }
916: 
917:         VecRestoreArray( ghostMaxEdge, &cpcol_max_ew );
918:       } /* npe > 1 */

920:       /* compute 'locMaxEdge' */
921:       VecRestoreArray( locMaxEdge, &lid_max_ew );
922:       for(kk=0,gid=my0;kk<nloc;kk++,gid++){
923:         PetscReal   max_e = 0.,tt;
924:         PetscScalar vval;
925:         PetscInt    lid = kk;
926:         if( lid_matched[lid] ) vval = 0.;
927:         else {
928:           ii = matA->i; n = ii[lid+1] - ii[lid]; idx = matA->j + ii[lid];
929:           ap = matA->a + ii[lid];
930:           for (jj=0; jj<n; jj++) {
931:             PetscInt lidj = idx[jj];
932:             if( lid_matched[lidj] ) continue; /* this is new - can change local max */
933:             if(lidj != lid && PetscRealPart(ap[jj]) > max_e ) max_e = PetscRealPart(ap[jj]);
934:           }
935:           if( lid_cprowID && (ix=lid_cprowID[lid]) != -1 ) { /* if I have any ghost neighbors */
936:             ii = matB->compressedrow.i; n = ii[ix+1] - ii[ix];
937:             ap = matB->a + ii[ix];
938:             idx = matB->j + ii[ix];
939:             for( jj=0 ; jj<n ; jj++ ) {
940:               PetscInt lidj = idx[jj];
941:               if( cpcol_matched[lidj] ) continue;
942:               if( (tt=PetscRealPart(ap[jj])) > max_e ) max_e = tt;
943:             }
944:           }
945:         }
946:         vval = (PetscScalar)max_e;
947:         VecSetValues( locMaxEdge, 1, &gid, &vval, INSERT_VALUES );   /* set with GID */
948:       }
949:       VecAssemblyBegin( locMaxEdge );
950:       VecAssemblyEnd( locMaxEdge );
951: 
952:       if(npe>1 && sub_it != n_sub_its-1 ){
953:         /* compute 'cpcol_max_ew' */
954:         VecScatterBegin(mpimat->Mvctx,locMaxEdge,ghostMaxEdge,INSERT_VALUES,SCATTER_FORWARD);
955:           VecScatterEnd(mpimat->Mvctx,locMaxEdge,ghostMaxEdge,INSERT_VALUES,SCATTER_FORWARD);
956:         VecGetArray( ghostMaxEdge, &cpcol_max_ew );
957:         VecGetArray( locMaxEdge, &lid_max_ew );

959:         /* compute 'cpcol_max_pe' */
960:         for(kk=0,gid=my0;kk<nloc;kk++,gid++){
961:           PetscInt lid = kk;
962:           PetscReal ew,v1_max_e,v0_max_e=PetscRealPart(lid_max_ew[lid]);
963:           PetscScalar vval;
964:           PetscMPIInt max_pe=mype,pe;
965:           if( lid_matched[lid] ) vval = (PetscScalar)mype;
966:           else if( (ix=lid_cprowID[lid]) != -1 ) { /* if I have any ghost neighbors */
967:             ii = matB->compressedrow.i; n = ii[ix+1] - ii[ix];
968:             ap = matB->a + ii[ix];
969:             idx = matB->j + ii[ix];
970:             for( jj=0 ; jj<n ; jj++ ) {
971:               PetscInt lidj = idx[jj];
972:               if( cpcol_matched[lidj] ) continue;
973:               ew = PetscRealPart(ap[jj]); v1_max_e = PetscRealPart(cpcol_max_ew[lidj]);
974:               /* get max pe that has a max_e == to this edge w */
975:               if( (pe=cpcol_pe[idx[jj]]) > max_pe && ew > v1_max_e - 1.e-12 && ew > v0_max_e - 1.e-12 ) max_pe = pe;
976:               assert(ew < v0_max_e + 1.e-12 && ew < v1_max_e + 1.e-12);
977:             }
978:             vval = (PetscScalar)max_pe;
979:           }
980:           VecSetValues( locMaxPE, 1, &gid, &vval, INSERT_VALUES );
981:         }
982:         VecAssemblyBegin( locMaxPE );
983:         VecAssemblyEnd( locMaxPE );

985:         VecScatterBegin(mpimat->Mvctx,locMaxPE,ghostMaxPE,INSERT_VALUES,SCATTER_FORWARD);
986:           VecScatterEnd(mpimat->Mvctx,locMaxPE,ghostMaxPE,INSERT_VALUES,SCATTER_FORWARD);
987:         VecGetArray( ghostMaxPE, &cpcol_max_pe );
988:         VecRestoreArray( locMaxEdge, &lid_max_ew );
989:       } /* deal with deleted ghost */
990:       if(verbose>2) PetscPrintf(wcomm,"\t[%d]%s %d.%d: %d active edges.\n",
991:                                 mype,__FUNCT__,iter,sub_it,nactive_edges);
992:       if(!nactive_edges) break;
993:     } /* sub_it loop */

995:     /* clean up iteration */
996:     PetscFree( Edges );
997:     if( mpimat ){
998:       VecRestoreArray( ghostMaxEdge, &cpcol_max_ew );
999:       VecDestroy( &ghostMaxEdge );
1000:       VecRestoreArray( ghostMaxPE, &cpcol_max_pe );
1001:       VecDestroy( &ghostMaxPE );
1002:       PetscFree( cpcol_pe );
1003:       PetscFree( cpcol_matched );
1004:     }

1006:     VecDestroy( &locMaxEdge );
1007:     VecDestroy( &locMaxPE );

1009:     if( mpimat ){
1010:       VecRestoreArray( mpimat->lvec, &cpcol_gid );
1011:     }

1013:     /* create next G if needed */
1014:     if( iter == n_iter ) { /* hard wired test - need to look at full surrounded nodes or something */
1015:       MatDestroy( &P );
1016:       MatDestroy( &cMat );
1017:       break;
1018:     }
1019:     else {
1020:       Vec diag;
1021:       /* add identity for unmatched vertices so they stay alive */
1022:       for(kk=0,gid=my0;kk<nloc;kk++,gid++){
1023:         if( !lid_matched[kk] ) {
1024:           gid = kk+my0;
1025:           MatGetRow(cMat,gid,&n,0,0);
1026:           if(n>1){
1027:             MatSetValues(P,1,&gid,1,&gid,&one,INSERT_VALUES);
1028:           }
1029:           MatRestoreRow(cMat,gid,&n,0,0);
1030:         }
1031:       }
1032:       MatAssemblyBegin(P,MAT_FINAL_ASSEMBLY);
1033:       MatAssemblyEnd(P,MAT_FINAL_ASSEMBLY);

1035:       /* project to make new graph with colapsed edges */
1036:       MatPtAP(cMat,P,MAT_INITIAL_MATRIX,1.0,&tMat);
1037:       MatDestroy( &P );
1038:       MatDestroy( &cMat );
1039:       cMat = tMat;
1040:       MatGetVecs( cMat, &diag, 0 );
1041:       MatGetDiagonal( cMat, diag );     /* effectively PCJACOBI */
1042:       VecReciprocal( diag );
1043:       VecSqrtAbs( diag );
1044:       MatDiagonalScale( cMat, diag, diag );
1045:       VecDestroy( &diag );
1046:     }
1047:   } /* coarsen iterator */

1049:   /* make fake matrix */
1050:   if (npe>1){
1051:     Mat mat;
1052:     PetscCDPos pos;
1053:     PetscInt gid, NN, MM, jj, mxsz = 0;
1054: 
1055:     for(kk=0;kk<nloc;kk++){
1056:       PetscCDSizeAt( agg_llists, kk, &jj );
1057:       if( jj > mxsz )  mxsz = jj;
1058:     }
1059:     MatGetSize( a_Gmat, &MM, &NN );
1060:     if( mxsz > MM-nloc ) mxsz = MM-nloc;

1062:     MatCreateAIJ( wcomm, nloc, nloc,
1063:                          PETSC_DETERMINE, PETSC_DETERMINE,
1064:                          0, 0, mxsz, 0, &mat );
1065: 

1067:     /* */
1068:     for(kk=0,gid=my0;kk<nloc;kk++,gid++){
1069:       /* for(pos=PetscCDGetHeadPos(agg_llists,kk) ; pos ; pos=PetscCDGetNextPos(agg_llists,kk,pos)){ */
1070:       PetscCDGetHeadPos(agg_llists,kk,&pos);
1071:       while(pos){
1072:         PetscInt gid1;
1073:         PetscLLNGetID( pos, &gid1 );
1074:         PetscCDGetNextPos(agg_llists,kk,&pos);

1076:         if( gid1 < my0 || gid1 >= my0+nloc ) {
1077:           MatSetValues(mat,1,&gid,1,&gid1,&one,ADD_VALUES);
1078:         }
1079:       }
1080:     }
1081:     MatAssemblyBegin(mat,MAT_FINAL_ASSEMBLY);
1082:     MatAssemblyEnd(mat,MAT_FINAL_ASSEMBLY);

1084:     PetscCDSetMat( agg_llists, mat );
1085:   }

1087:   PetscFree( lid_cprowID );
1088:   PetscFree( lid_gid );
1089:   PetscFree( lid_matched );
1090:   PetscCDDestroy( deleted_list );

1092:   return(0);
1093: }

1095: typedef struct {
1096:   int dummy;
1097: } MatCoarsen_HEM;
1098: /*
1099:    HEM coarsen, simple greedy. 
1100: */
1103: static PetscErrorCode MatCoarsenApply_HEM( MatCoarsen coarse )
1104: {
1105:   /* MatCoarsen_HEM *HEM = (MatCoarsen_HEM*)coarse->subctx; */
1106:   PetscErrorCode  ierr;
1107:   Mat             mat = coarse->graph;
1108: 
1111:   if(!coarse->perm) {
1112:     IS perm;
1113:     PetscInt n,m;
1114:     MPI_Comm wcomm = ((PetscObject)mat)->comm;
1115:     MatGetLocalSize( mat, &m, &n );
1116:     ISCreateStride( wcomm, m, 0, 1, &perm );
1117:     heavyEdgeMatchAgg( perm, mat, coarse->verbose, &coarse->agg_lists );
1118:     ISDestroy( &perm );
1119:   }
1120:   else {
1121:     heavyEdgeMatchAgg( coarse->perm, mat, coarse->verbose, &coarse->agg_lists );
1122:   }

1124:   return(0);
1125: }

1129: PetscErrorCode MatCoarsenView_HEM(MatCoarsen coarse,PetscViewer viewer)
1130: {
1131:   /* MatCoarsen_HEM *HEM = (MatCoarsen_HEM *)coarse->subctx; */
1133:   int rank;
1134:   PetscBool    iascii;

1138:   MPI_Comm_rank(((PetscObject)coarse)->comm,&rank);
1139:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
1140:   if (iascii) {
1141:     PetscViewerASCIISynchronizedPrintf(viewer,"  [%d] HEM aggregator\n",rank);
1142:     PetscViewerFlush(viewer);
1143:     PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);
1144:   }
1145:   else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported for this HEM coarsener",
1146:                 ((PetscObject)viewer)->type_name);

1148:   return(0);
1149: }

1153: PetscErrorCode MatCoarsenDestroy_HEM ( MatCoarsen coarse )
1154: {
1155:   MatCoarsen_HEM *HEM = (MatCoarsen_HEM *)coarse->subctx;

1160:   PetscFree(HEM);
1161: 
1162:   return(0);
1163: }

1165: /*MC
1166:    MATCOARSENHEM - Creates a coarsen context via the external package HEM.

1168:    Collective on MPI_Comm

1170:    Input Parameter:
1171: .  coarse - the coarsen context

1173:    Options Database Keys:
1174: +  -mat_coarsen_HEM_xxx - 

1176:    Level: beginner

1178: .keywords: Coarsen, create, context

1180: .seealso: MatCoarsenSetType(), MatCoarsenType

1182: M*/

1184: EXTERN_C_BEGIN
1187: PetscErrorCode  MatCoarsenCreate_HEM(MatCoarsen coarse)
1188: {
1190:   MatCoarsen_HEM *HEM;

1193:   PetscNewLog( coarse, MatCoarsen_HEM, &HEM );
1194:   coarse->subctx              = (void*)HEM;

1196:   coarse->ops->apply          = MatCoarsenApply_HEM;
1197:   coarse->ops->view           = MatCoarsenView_HEM;
1198:   coarse->ops->destroy        = MatCoarsenDestroy_HEM;
1199:   /* coarse->ops->setfromoptions = MatCoarsenSetFromOptions_HEM; */
1200:   return(0);
1201: }
1202: EXTERN_C_END