Actual source code: hem.c

  1: #include <petsc/private/matimpl.h>
  2: #include <../src/mat/impls/aij/seq/aij.h>
  3: #include <../src/mat/impls/aij/mpi/mpiaij.h>

  5: /* linked list methods
  6:  *
  7:  *  PetscCDCreate
  8:  */
  9: PetscErrorCode PetscCDCreate(PetscInt a_size, PetscCoarsenData **a_out)
 10: {
 11:   PetscCoarsenData *ail;

 13:   PetscFunctionBegin;
 14:   /* allocate pool, partially */
 15:   PetscCall(PetscNew(&ail));
 16:   *a_out               = ail;
 17:   ail->pool_list.next  = NULL;
 18:   ail->pool_list.array = NULL;
 19:   ail->chk_sz          = 0;
 20:   /* allocate array */
 21:   ail->size = a_size;
 22:   PetscCall(PetscCalloc1(a_size, &ail->array));
 23:   ail->extra_nodes = NULL;
 24:   ail->mat         = NULL;
 25:   PetscFunctionReturn(PETSC_SUCCESS);
 26: }

 28: /* NPDestroy
 29:  */
 30: PetscErrorCode PetscCDDestroy(PetscCoarsenData *ail)
 31: {
 32:   PetscCDArrNd *n = &ail->pool_list;

 34:   PetscFunctionBegin;
 35:   n = n->next;
 36:   while (n) {
 37:     PetscCDArrNd *lstn = n;
 38:     n                  = n->next;
 39:     PetscCall(PetscFree(lstn));
 40:   }
 41:   if (ail->pool_list.array) PetscCall(PetscFree(ail->pool_list.array));
 42:   PetscCall(PetscFree(ail->array));
 43:   if (ail->mat) PetscCall(MatDestroy(&ail->mat));
 44:   /* delete this (+agg+pool array) */
 45:   PetscCall(PetscFree(ail));
 46:   PetscFunctionReturn(PETSC_SUCCESS);
 47: }

 49: /* PetscCDSetChuckSize
 50:  */
 51: PetscErrorCode PetscCDSetChuckSize(PetscCoarsenData *ail, PetscInt a_sz)
 52: {
 53:   PetscFunctionBegin;
 54:   ail->chk_sz = a_sz;
 55:   PetscFunctionReturn(PETSC_SUCCESS);
 56: }

 58: /*  PetscCDGetNewNode
 59:  */
 60: static PetscErrorCode PetscCDGetNewNode(PetscCoarsenData *ail, PetscCDIntNd **a_out, PetscInt a_id)
 61: {
 62:   PetscFunctionBegin;
 63:   *a_out = NULL; /* squelch -Wmaybe-uninitialized */
 64:   if (ail->extra_nodes) {
 65:     PetscCDIntNd *node = ail->extra_nodes;
 66:     ail->extra_nodes   = node->next;
 67:     node->gid          = a_id;
 68:     node->next         = NULL;
 69:     *a_out             = node;
 70:   } else {
 71:     if (!ail->pool_list.array) {
 72:       if (!ail->chk_sz) ail->chk_sz = 10; /* use a chuck size of ail->size? */
 73:       PetscCall(PetscMalloc1(ail->chk_sz, &ail->pool_list.array));
 74:       ail->new_node       = ail->pool_list.array;
 75:       ail->new_left       = ail->chk_sz;
 76:       ail->new_node->next = NULL;
 77:     } else if (!ail->new_left) {
 78:       PetscCDArrNd *node;
 79:       PetscCall(PetscMalloc(ail->chk_sz * sizeof(PetscCDIntNd) + sizeof(PetscCDArrNd), &node));
 80:       node->array         = (PetscCDIntNd *)(node + 1);
 81:       node->next          = ail->pool_list.next;
 82:       ail->pool_list.next = node;
 83:       ail->new_left       = ail->chk_sz;
 84:       ail->new_node       = node->array;
 85:     }
 86:     ail->new_node->gid  = a_id;
 87:     ail->new_node->next = NULL;
 88:     *a_out              = ail->new_node++;
 89:     ail->new_left--;
 90:   }
 91:   PetscFunctionReturn(PETSC_SUCCESS);
 92: }

 94: /* PetscCDIntNdSetID
 95:  */
 96: PetscErrorCode PetscCDIntNdSetID(PetscCDIntNd *a_this, PetscInt a_id)
 97: {
 98:   PetscFunctionBegin;
 99:   a_this->gid = a_id;
100:   PetscFunctionReturn(PETSC_SUCCESS);
101: }

103: /* PetscCDIntNdGetID
104:  */
105: PetscErrorCode PetscCDIntNdGetID(const PetscCDIntNd *a_this, PetscInt *a_gid)
106: {
107:   PetscFunctionBegin;
108:   *a_gid = a_this->gid;
109:   PetscFunctionReturn(PETSC_SUCCESS);
110: }

112: /* PetscCDGetHeadPos
113:  */
114: PetscErrorCode PetscCDGetHeadPos(const PetscCoarsenData *ail, PetscInt a_idx, PetscCDIntNd **pos)
115: {
116:   PetscFunctionBegin;
117:   PetscCheck(a_idx < ail->size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "a_idx >= ail->size: a_idx=%" PetscInt_FMT ".", a_idx);
118:   *pos = ail->array[a_idx];
119:   PetscFunctionReturn(PETSC_SUCCESS);
120: }

122: /* PetscCDGetNextPos
123:  */
124: PetscErrorCode PetscCDGetNextPos(const PetscCoarsenData *ail, PetscInt l_idx, PetscCDIntNd **pos)
125: {
126:   PetscFunctionBegin;
127:   PetscCheck((*pos), PETSC_COMM_SELF, PETSC_ERR_PLIB, "NULL input position.");
128:   *pos = (*pos)->next;
129:   PetscFunctionReturn(PETSC_SUCCESS);
130: }

132: /* PetscCDAppendID
133:  */
134: PetscErrorCode PetscCDAppendID(PetscCoarsenData *ail, PetscInt a_idx, PetscInt a_id)
135: {
136:   PetscCDIntNd *n, *n2;

138:   PetscFunctionBegin;
139:   PetscCall(PetscCDGetNewNode(ail, &n, a_id));
140:   PetscCheck(a_idx < ail->size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Index %" PetscInt_FMT " out of range.", a_idx);
141:   if (!(n2 = ail->array[a_idx])) ail->array[a_idx] = n;
142:   else {
143:     do {
144:       if (!n2->next) {
145:         n2->next = n;
146:         PetscCheck(!n->next, PETSC_COMM_SELF, PETSC_ERR_PLIB, "n should not have a next");
147:         break;
148:       }
149:       n2 = n2->next;
150:     } while (n2);
151:     PetscCheck(n2, PETSC_COMM_SELF, PETSC_ERR_PLIB, "n2 should be non-null");
152:   }
153:   PetscFunctionReturn(PETSC_SUCCESS);
154: }

156: /* PetscCDAppendNode
157:  */
158: PetscErrorCode PetscCDAppendNode(PetscCoarsenData *ail, PetscInt a_idx, PetscCDIntNd *a_n)
159: {
160:   PetscCDIntNd *n2;

162:   PetscFunctionBegin;
163:   PetscCheck(a_idx < ail->size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Index %" PetscInt_FMT " out of range.", a_idx);
164:   if (!(n2 = ail->array[a_idx])) ail->array[a_idx] = a_n;
165:   else {
166:     do {
167:       if (!n2->next) {
168:         n2->next  = a_n;
169:         a_n->next = NULL;
170:         break;
171:       }
172:       n2 = n2->next;
173:     } while (n2);
174:     PetscCheck(n2, PETSC_COMM_SELF, PETSC_ERR_PLIB, "n2 should be non-null");
175:   }
176:   PetscFunctionReturn(PETSC_SUCCESS);
177: }

179: /* PetscCDRemoveNextNode: a_last->next, this exposes single linked list structure to API
180:  */
181: PetscErrorCode PetscCDRemoveNextNode(PetscCoarsenData *ail, PetscInt a_idx, PetscCDIntNd *a_last)
182: {
183:   PetscCDIntNd *del;

185:   PetscFunctionBegin;
186:   PetscCheck(a_idx < ail->size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Index %" PetscInt_FMT " out of range.", a_idx);
187:   PetscCheck(a_last->next, PETSC_COMM_SELF, PETSC_ERR_PLIB, "a_last should have a next");
188:   del          = a_last->next;
189:   a_last->next = del->next;
190:   /* del->next = NULL; -- this still used in a iterator so keep it intact -- need to fix this with a double linked list */
191:   /* could reuse n2 but PetscCDAppendNode sometimes uses it */
192:   PetscFunctionReturn(PETSC_SUCCESS);
193: }

195: /* PetscCDPrint
196:  */
197: PetscErrorCode PetscCDPrint(const PetscCoarsenData *ail, MPI_Comm comm)
198: {
199:   PetscCDIntNd *n;
200:   PetscInt      ii, kk;

202:   PetscFunctionBegin;
203:   for (ii = 0; ii < ail->size; ii++) {
204:     kk = 0;
205:     n  = ail->array[ii];
206:     if (n) PetscCall(PetscSynchronizedPrintf(PETSC_COMM_WORLD, "list %" PetscInt_FMT ":\n", ii));
207:     while (n) {
208:       PetscCall(PetscSynchronizedPrintf(PETSC_COMM_WORLD, "\t %" PetscInt_FMT ") id %" PetscInt_FMT "\n", ++kk, n->gid));
209:       n = n->next;
210:     }
211:   }
212:   PetscCall(PetscSynchronizedFlush(PETSC_COMM_WORLD, PETSC_STDOUT));
213:   PetscFunctionReturn(PETSC_SUCCESS);
214: }

216: /* PetscCDAppendRemove
217:  */
218: PetscErrorCode PetscCDAppendRemove(PetscCoarsenData *ail, PetscInt a_destidx, PetscInt a_srcidx)
219: {
220:   PetscCDIntNd *n;

222:   PetscFunctionBegin;
223:   PetscCheck(a_srcidx < ail->size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Index %" PetscInt_FMT " out of range.", a_srcidx);
224:   PetscCheck(a_destidx < ail->size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Index %" PetscInt_FMT " out of range.", a_destidx);
225:   PetscCheck(a_destidx != a_srcidx, PETSC_COMM_SELF, PETSC_ERR_PLIB, "a_destidx==a_srcidx %" PetscInt_FMT ".", a_destidx);
226:   n = ail->array[a_destidx];
227:   if (!n) ail->array[a_destidx] = ail->array[a_srcidx];
228:   else {
229:     do {
230:       if (!n->next) {
231:         n->next = ail->array[a_srcidx];
232:         break;
233:       }
234:       n = n->next;
235:     } while (1);
236:   }
237:   ail->array[a_srcidx] = NULL;
238:   PetscFunctionReturn(PETSC_SUCCESS);
239: }

241: /* PetscCDRemoveAll
242:  */
243: PetscErrorCode PetscCDRemoveAll(PetscCoarsenData *ail, PetscInt a_idx)
244: {
245:   PetscCDIntNd *rem, *n1;

247:   PetscFunctionBegin;
248:   PetscCheck(a_idx < ail->size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Index %" PetscInt_FMT " out of range.", a_idx);
249:   rem               = ail->array[a_idx];
250:   ail->array[a_idx] = NULL;
251:   if (!(n1 = ail->extra_nodes)) ail->extra_nodes = rem;
252:   else {
253:     while (n1->next) n1 = n1->next;
254:     n1->next = rem;
255:   }
256:   PetscFunctionReturn(PETSC_SUCCESS);
257: }

259: /* PetscCDSizeAt
260:  */
261: PetscErrorCode PetscCDSizeAt(const PetscCoarsenData *ail, PetscInt a_idx, PetscInt *a_sz)
262: {
263:   PetscCDIntNd *n1;
264:   PetscInt      sz = 0;

266:   PetscFunctionBegin;
267:   PetscCheck(a_idx < ail->size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Index %" PetscInt_FMT " out of range.", a_idx);
268:   n1 = ail->array[a_idx];
269:   while (n1) {
270:     n1 = n1->next;
271:     sz++;
272:   }
273:   *a_sz = sz;
274:   PetscFunctionReturn(PETSC_SUCCESS);
275: }

277: /* PetscCDEmptyAt
278:  */
279: PetscErrorCode PetscCDEmptyAt(const PetscCoarsenData *ail, PetscInt a_idx, PetscBool *a_e)
280: {
281:   PetscFunctionBegin;
282:   PetscCheck(a_idx < ail->size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Index %" PetscInt_FMT " out of range.", a_idx);
283:   *a_e = (PetscBool)(ail->array[a_idx] == NULL);
284:   PetscFunctionReturn(PETSC_SUCCESS);
285: }

287: /* PetscCDGetMIS - used for C-F methods
288:  */
289: PetscErrorCode PetscCDGetMIS(PetscCoarsenData *ail, IS *a_mis)
290: {
291:   PetscCDIntNd *n;
292:   PetscInt      ii, kk;
293:   PetscInt     *permute;

295:   PetscFunctionBegin;
296:   for (ii = kk = 0; ii < ail->size; ii++) {
297:     n = ail->array[ii];
298:     if (n) kk++;
299:   }
300:   PetscCall(PetscMalloc1(kk, &permute));
301:   for (ii = kk = 0; ii < ail->size; ii++) {
302:     n = ail->array[ii];
303:     if (n) permute[kk++] = ii;
304:   }
305:   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, kk, permute, PETSC_OWN_POINTER, a_mis));
306:   PetscFunctionReturn(PETSC_SUCCESS);
307: }

309: /* PetscCDGetMat
310:  */
311: PetscErrorCode PetscCDGetMat(PetscCoarsenData *ail, Mat *a_mat)
312: {
313:   PetscFunctionBegin;
314:   *a_mat   = ail->mat;
315:   ail->mat = NULL; // give it up
316:   PetscFunctionReturn(PETSC_SUCCESS);
317: }

319: /* PetscCDSetMat
320:  */
321: PetscErrorCode PetscCDSetMat(PetscCoarsenData *ail, Mat a_mat)
322: {
323:   PetscFunctionBegin;
324:   if (ail->mat) {
325:     PetscCall(MatDestroy(&ail->mat)); //should not happen
326:   }
327:   ail->mat = a_mat;
328:   PetscFunctionReturn(PETSC_SUCCESS);
329: }

331: /* PetscCDGetASMBlocks - get IS of aggregates for ASM smoothers
332:  */
333: PetscErrorCode PetscCDGetASMBlocks(const PetscCoarsenData *ail, const PetscInt a_bs, Mat mat, PetscInt *a_sz, IS **a_local_is)
334: {
335:   PetscCDIntNd *n;
336:   PetscInt      lsz, ii, kk, *idxs, jj, s, e, gid;
337:   IS           *is_loc, is_bcs;

339:   PetscFunctionBegin;
340:   for (ii = kk = 0; ii < ail->size; ii++) {
341:     if (ail->array[ii]) kk++;
342:   }
343:   /* count BCs */
344:   PetscCall(MatGetOwnershipRange(mat, &s, &e));
345:   for (gid = s, lsz = 0; gid < e; gid++) {
346:     PetscCall(MatGetRow(mat, gid, &jj, NULL, NULL));
347:     if (jj < 2) lsz++;
348:     PetscCall(MatRestoreRow(mat, gid, &jj, NULL, NULL));
349:   }
350:   if (lsz) {
351:     PetscCall(PetscMalloc1(a_bs * lsz, &idxs));
352:     for (gid = s, lsz = 0; gid < e; gid++) {
353:       PetscCall(MatGetRow(mat, gid, &jj, NULL, NULL));
354:       if (jj < 2) {
355:         for (jj = 0; jj < a_bs; lsz++, jj++) idxs[lsz] = a_bs * gid + jj;
356:       }
357:       PetscCall(MatRestoreRow(mat, gid, &jj, NULL, NULL));
358:     }
359:     PetscCall(ISCreateGeneral(PETSC_COMM_SELF, lsz, idxs, PETSC_OWN_POINTER, &is_bcs));
360:     *a_sz = kk + 1; /* out */
361:   } else {
362:     is_bcs = NULL;
363:     *a_sz  = kk; /* out */
364:   }
365:   PetscCall(PetscMalloc1(*a_sz, &is_loc));

367:   for (ii = kk = 0; ii < ail->size; ii++) {
368:     for (lsz = 0, n = ail->array[ii]; n; lsz++, n = n->next) /* void */
369:       ;
370:     if (lsz) {
371:       PetscCall(PetscMalloc1(a_bs * lsz, &idxs));
372:       for (lsz = 0, n = ail->array[ii]; n; n = n->next) {
373:         PetscCall(PetscCDIntNdGetID(n, &gid));
374:         for (jj = 0; jj < a_bs; lsz++, jj++) idxs[lsz] = a_bs * gid + jj;
375:       }
376:       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, lsz, idxs, PETSC_OWN_POINTER, &is_loc[kk++]));
377:     }
378:   }
379:   if (is_bcs) is_loc[kk++] = is_bcs;
380:   PetscCheck(*a_sz == kk, PETSC_COMM_SELF, PETSC_ERR_PLIB, "*a_sz %" PetscInt_FMT " != kk %" PetscInt_FMT, *a_sz, kk);
381:   *a_local_is = is_loc; /* out */

383:   PetscFunctionReturn(PETSC_SUCCESS);
384: }

386: /* edge for priority queue */
387: typedef struct edge_tag {
388:   PetscReal weight;
389:   PetscInt  lid0, gid1, cpid1;
390: } Edge;

392: static int gamg_hem_compare(const void *a, const void *b)
393: {
394:   PetscReal va = ((Edge *)a)->weight, vb = ((Edge *)b)->weight;
395:   return (va < vb) ? 1 : (va == vb) ? 0 : -1; /* 0 for equal */
396: }

398: /*
399:   MatCoarsenApply_HEM_private - parallel heavy edge matching

401:   Input Parameter:
402: . perm - permutation
403: . a_Gmat - global matrix of the graph

405:   Output Parameter:
406:    . a_locals_llist - array of list of local nodes rooted at local node
407: */
408: static PetscErrorCode MatCoarsenApply_HEM_private(IS perm, Mat a_Gmat, PetscCoarsenData **a_locals_llist)
409: {
410:   PetscBool         isMPI;
411:   MPI_Comm          comm;
412:   PetscInt          sub_it, kk, n, ix, *idx, *ii, iter, Iend, my0;
413:   PetscMPIInt       rank, size;
414:   const PetscInt    nloc = a_Gmat->rmap->n, n_iter = 4; /* need to figure out how to stop this */
415:   PetscInt         *lid_cprowID, *lid_gid;
416:   PetscBool        *lid_matched;
417:   Mat_SeqAIJ       *matA, *matB = NULL;
418:   Mat_MPIAIJ       *mpimat     = NULL;
419:   PetscScalar       one        = 1.;
420:   PetscCoarsenData *agg_llists = NULL, *deleted_list = NULL;
421:   Mat               cMat, tMat, P;
422:   MatScalar        *ap;
423:   PetscMPIInt       tag1, tag2;

425:   PetscFunctionBegin;
426:   PetscCall(PetscObjectGetComm((PetscObject)a_Gmat, &comm));
427:   PetscCallMPI(MPI_Comm_rank(comm, &rank));
428:   PetscCallMPI(MPI_Comm_size(comm, &size));
429:   PetscCall(MatGetOwnershipRange(a_Gmat, &my0, &Iend));
430:   PetscCall(PetscCommGetNewTag(comm, &tag1));
431:   PetscCall(PetscCommGetNewTag(comm, &tag2));

433:   PetscCall(PetscMalloc1(nloc, &lid_gid)); /* explicit array needed */
434:   PetscCall(PetscMalloc1(nloc, &lid_cprowID));
435:   PetscCall(PetscMalloc1(nloc, &lid_matched));

437:   PetscCall(PetscCDCreate(nloc, &agg_llists));
438:   /* PetscCall(PetscCDSetChuckSize(agg_llists, nloc+1)); */
439:   *a_locals_llist = agg_llists;
440:   PetscCall(PetscCDCreate(size, &deleted_list));
441:   PetscCall(PetscCDSetChuckSize(deleted_list, 100));
442:   /* setup 'lid_gid' for scatters and add self to all lists */
443:   for (kk = 0; kk < nloc; kk++) {
444:     lid_gid[kk] = kk + my0;
445:     PetscCall(PetscCDAppendID(agg_llists, kk, my0 + kk));
446:   }

448:   /* make a copy of the graph, this gets destroyed in iterates */
449:   PetscCall(MatDuplicate(a_Gmat, MAT_COPY_VALUES, &cMat));
450:   PetscCall(PetscObjectTypeCompare((PetscObject)a_Gmat, MATMPIAIJ, &isMPI));
451:   iter = 0;
452:   while (iter++ < n_iter) {
453:     PetscScalar    *cpcol_gid, *cpcol_max_ew, *cpcol_max_pe, *lid_max_ew;
454:     PetscBool      *cpcol_matched;
455:     PetscMPIInt    *cpcol_pe, proc;
456:     Vec             locMaxEdge, locMaxPE, ghostMaxEdge, ghostMaxPE;
457:     PetscInt        nEdges, n_nz_row, jj;
458:     Edge           *Edges;
459:     PetscInt        gid;
460:     const PetscInt *perm_ix, n_sub_its = 120;

462:     /* get submatrices of cMat */
463:     if (isMPI) {
464:       mpimat = (Mat_MPIAIJ *)cMat->data;
465:       matA   = (Mat_SeqAIJ *)mpimat->A->data;
466:       matB   = (Mat_SeqAIJ *)mpimat->B->data;
467:       if (!matB->compressedrow.use) {
468:         /* force construction of compressed row data structure since code below requires it */
469:         PetscCall(MatCheckCompressedRow(mpimat->B, matB->nonzerorowcnt, &matB->compressedrow, matB->i, mpimat->B->rmap->n, -1.0));
470:       }
471:     } else {
472:       matA = (Mat_SeqAIJ *)cMat->data;
473:     }

475:     /* set max edge on nodes */
476:     PetscCall(MatCreateVecs(cMat, &locMaxEdge, NULL));
477:     PetscCall(MatCreateVecs(cMat, &locMaxPE, NULL));

479:     /* get 'cpcol_pe' & 'cpcol_gid' & init. 'cpcol_matched' using 'mpimat->lvec' */
480:     if (mpimat) {
481:       Vec         vec;
482:       PetscScalar vval;

484:       PetscCall(MatCreateVecs(cMat, &vec, NULL));
485:       /* cpcol_pe */
486:       vval = (PetscScalar)(rank);
487:       for (kk = 0, gid = my0; kk < nloc; kk++, gid++) { PetscCall(VecSetValues(vec, 1, &gid, &vval, INSERT_VALUES)); /* set with GID */ }
488:       PetscCall(VecAssemblyBegin(vec));
489:       PetscCall(VecAssemblyEnd(vec));
490:       PetscCall(VecScatterBegin(mpimat->Mvctx, vec, mpimat->lvec, INSERT_VALUES, SCATTER_FORWARD));
491:       PetscCall(VecScatterEnd(mpimat->Mvctx, vec, mpimat->lvec, INSERT_VALUES, SCATTER_FORWARD));
492:       PetscCall(VecGetArray(mpimat->lvec, &cpcol_gid)); /* get proc ID in 'cpcol_gid' */
493:       PetscCall(VecGetLocalSize(mpimat->lvec, &n));
494:       PetscCall(PetscMalloc1(n, &cpcol_pe));
495:       for (kk = 0; kk < n; kk++) cpcol_pe[kk] = (PetscMPIInt)PetscRealPart(cpcol_gid[kk]);
496:       PetscCall(VecRestoreArray(mpimat->lvec, &cpcol_gid));

498:       /* cpcol_gid */
499:       for (kk = 0, gid = my0; kk < nloc; kk++, gid++) {
500:         vval = (PetscScalar)(gid);
501:         PetscCall(VecSetValues(vec, 1, &gid, &vval, INSERT_VALUES)); /* set with GID */
502:       }
503:       PetscCall(VecAssemblyBegin(vec));
504:       PetscCall(VecAssemblyEnd(vec));
505:       PetscCall(VecScatterBegin(mpimat->Mvctx, vec, mpimat->lvec, INSERT_VALUES, SCATTER_FORWARD));
506:       PetscCall(VecScatterEnd(mpimat->Mvctx, vec, mpimat->lvec, INSERT_VALUES, SCATTER_FORWARD));
507:       PetscCall(VecDestroy(&vec));
508:       PetscCall(VecGetArray(mpimat->lvec, &cpcol_gid)); /* get proc ID in 'cpcol_gid' */

510:       /* cpcol_matched */
511:       PetscCall(VecGetLocalSize(mpimat->lvec, &n));
512:       PetscCall(PetscMalloc1(n, &cpcol_matched));
513:       for (kk = 0; kk < n; kk++) cpcol_matched[kk] = PETSC_FALSE;
514:     }

516:     /* need an inverse map - locals */
517:     for (kk = 0; kk < nloc; kk++) lid_cprowID[kk] = -1;
518:     /* set index into compressed row 'lid_cprowID' */
519:     if (matB) {
520:       for (ix = 0; ix < matB->compressedrow.nrows; ix++) {
521:         PetscInt lid = matB->compressedrow.rindex[ix];
522:         if (lid >= 0) lid_cprowID[lid] = ix;
523:       }
524:     }
525:     /* compute 'locMaxEdge' & 'locMaxPE', and create list of edges, count edges' */
526:     for (nEdges = 0, kk = 0, gid = my0; kk < nloc; kk++, gid++) {
527:       PetscReal   max_e = 0., tt;
528:       PetscScalar vval;
529:       PetscInt    lid    = kk;
530:       PetscMPIInt max_pe = rank, pe;

532:       ii  = matA->i;
533:       n   = ii[lid + 1] - ii[lid];
534:       idx = matA->j + ii[lid];
535:       ap  = matA->a + ii[lid];
536:       for (jj = 0; jj < n; jj++) {
537:         PetscInt lidj = idx[jj];
538:         if (lidj != lid && PetscRealPart(ap[jj]) > max_e) max_e = PetscRealPart(ap[jj]);
539:         if (lidj > lid) nEdges++;
540:       }
541:       if ((ix = lid_cprowID[lid]) != -1) { /* if I have any ghost neighbors */
542:         ii  = matB->compressedrow.i;
543:         n   = ii[ix + 1] - ii[ix];
544:         ap  = matB->a + ii[ix];
545:         idx = matB->j + ii[ix];
546:         for (jj = 0; jj < n; jj++) {
547:           if ((tt = PetscRealPart(ap[jj])) > max_e) max_e = tt;
548:           nEdges++;
549:           if ((pe = cpcol_pe[idx[jj]]) > max_pe) max_pe = pe;
550:         }
551:       }
552:       vval = max_e;
553:       PetscCall(VecSetValues(locMaxEdge, 1, &gid, &vval, INSERT_VALUES));

555:       vval = (PetscScalar)max_pe;
556:       PetscCall(VecSetValues(locMaxPE, 1, &gid, &vval, INSERT_VALUES));
557:     }
558:     PetscCall(VecAssemblyBegin(locMaxEdge));
559:     PetscCall(VecAssemblyEnd(locMaxEdge));
560:     PetscCall(VecAssemblyBegin(locMaxPE));
561:     PetscCall(VecAssemblyEnd(locMaxPE));

563:     /* get 'cpcol_max_ew' & 'cpcol_max_pe' */
564:     if (mpimat) {
565:       PetscCall(VecDuplicate(mpimat->lvec, &ghostMaxEdge));
566:       PetscCall(VecScatterBegin(mpimat->Mvctx, locMaxEdge, ghostMaxEdge, INSERT_VALUES, SCATTER_FORWARD));
567:       PetscCall(VecScatterEnd(mpimat->Mvctx, locMaxEdge, ghostMaxEdge, INSERT_VALUES, SCATTER_FORWARD));
568:       PetscCall(VecGetArray(ghostMaxEdge, &cpcol_max_ew));

570:       PetscCall(VecDuplicate(mpimat->lvec, &ghostMaxPE));
571:       PetscCall(VecScatterBegin(mpimat->Mvctx, locMaxPE, ghostMaxPE, INSERT_VALUES, SCATTER_FORWARD));
572:       PetscCall(VecScatterEnd(mpimat->Mvctx, locMaxPE, ghostMaxPE, INSERT_VALUES, SCATTER_FORWARD));
573:       PetscCall(VecGetArray(ghostMaxPE, &cpcol_max_pe));
574:     }

576:     /* setup sorted list of edges */
577:     PetscCall(PetscMalloc1(nEdges, &Edges));
578:     PetscCall(ISGetIndices(perm, &perm_ix));
579:     for (nEdges = n_nz_row = kk = 0; kk < nloc; kk++) {
580:       PetscInt nn, lid = perm_ix[kk];
581:       ii = matA->i;
582:       nn = n = ii[lid + 1] - ii[lid];
583:       idx    = matA->j + ii[lid];
584:       ap     = matA->a + ii[lid];
585:       for (jj = 0; jj < n; jj++) {
586:         PetscInt lidj = idx[jj];
587:         if (lidj > lid) {
588:           Edges[nEdges].lid0   = lid;
589:           Edges[nEdges].gid1   = lidj + my0;
590:           Edges[nEdges].cpid1  = -1;
591:           Edges[nEdges].weight = PetscRealPart(ap[jj]);
592:           nEdges++;
593:         }
594:       }
595:       if ((ix = lid_cprowID[lid]) != -1) { /* if I have any ghost neighbors */
596:         ii  = matB->compressedrow.i;
597:         n   = ii[ix + 1] - ii[ix];
598:         ap  = matB->a + ii[ix];
599:         idx = matB->j + ii[ix];
600:         nn += n;
601:         for (jj = 0; jj < n; jj++) {
602:           Edges[nEdges].lid0   = lid;
603:           Edges[nEdges].gid1   = (PetscInt)PetscRealPart(cpcol_gid[idx[jj]]);
604:           Edges[nEdges].cpid1  = idx[jj];
605:           Edges[nEdges].weight = PetscRealPart(ap[jj]);
606:           nEdges++;
607:         }
608:       }
609:       if (nn > 1) n_nz_row++;
610:       else if (iter == 1) {
611:         /* should select this because it is technically in the MIS but lets not */
612:         PetscCall(PetscCDRemoveAll(agg_llists, lid));
613:       }
614:     }
615:     PetscCall(ISRestoreIndices(perm, &perm_ix));

617:     qsort(Edges, nEdges, sizeof(Edge), gamg_hem_compare);

619:     /* projection matrix */
620:     PetscCall(MatCreateAIJ(comm, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE, 1, NULL, 1, NULL, &P));

622:     /* clear matched flags */
623:     for (kk = 0; kk < nloc; kk++) lid_matched[kk] = PETSC_FALSE;
624:     /* process - communicate - process */
625:     for (sub_it = 0; sub_it < n_sub_its; sub_it++) {
626:       PetscInt nactive_edges;

628:       PetscCall(VecGetArray(locMaxEdge, &lid_max_ew));
629:       for (kk = nactive_edges = 0; kk < nEdges; kk++) {
630:         /* HEM */
631:         const Edge    *e    = &Edges[kk];
632:         const PetscInt lid0 = e->lid0, gid1 = e->gid1, cpid1 = e->cpid1, gid0 = lid0 + my0, lid1 = gid1 - my0;
633:         PetscBool      isOK = PETSC_TRUE;

635:         /* skip if either (local) vertex is done already */
636:         if (lid_matched[lid0] || (gid1 >= my0 && gid1 < Iend && lid_matched[gid1 - my0])) continue;

638:         /* skip if ghost vertex is done */
639:         if (cpid1 != -1 && cpcol_matched[cpid1]) continue;

641:         nactive_edges++;
642:         /* skip if I have a bigger edge someplace (lid_max_ew gets updated) */
643:         if (PetscRealPart(lid_max_ew[lid0]) > e->weight + PETSC_SMALL) continue;

645:         if (cpid1 == -1) {
646:           if (PetscRealPart(lid_max_ew[lid1]) > e->weight + PETSC_SMALL) continue;
647:         } else {
648:           /* see if edge might get matched on other proc */
649:           PetscReal g_max_e = PetscRealPart(cpcol_max_ew[cpid1]);
650:           if (g_max_e > e->weight + PETSC_SMALL) continue;
651:           else if (e->weight > g_max_e - PETSC_SMALL && (PetscMPIInt)PetscRealPart(cpcol_max_pe[cpid1]) > rank) {
652:             /* check for max_e == to this edge and larger processor that will deal with this */
653:             continue;
654:           }
655:         }

657:         /* check ghost for v0 */
658:         if (isOK) {
659:           PetscReal max_e, ew;
660:           if ((ix = lid_cprowID[lid0]) != -1) { /* if I have any ghost neighbors */
661:             ii  = matB->compressedrow.i;
662:             n   = ii[ix + 1] - ii[ix];
663:             ap  = matB->a + ii[ix];
664:             idx = matB->j + ii[ix];
665:             for (jj = 0; jj < n && isOK; jj++) {
666:               PetscInt lidj = idx[jj];
667:               if (cpcol_matched[lidj]) continue;
668:               ew    = PetscRealPart(ap[jj]);
669:               max_e = PetscRealPart(cpcol_max_ew[lidj]);
670:               /* check for max_e == to this edge and larger processor that will deal with this */
671:               if (ew > max_e - PETSC_SMALL && ew > PetscRealPart(lid_max_ew[lid0]) - PETSC_SMALL && (PetscMPIInt)PetscRealPart(cpcol_max_pe[lidj]) > rank) isOK = PETSC_FALSE;
672:             }
673:           }

675:           /* for v1 */
676:           if (cpid1 == -1 && isOK) {
677:             if ((ix = lid_cprowID[lid1]) != -1) { /* if I have any ghost neighbors */
678:               ii  = matB->compressedrow.i;
679:               n   = ii[ix + 1] - ii[ix];
680:               ap  = matB->a + ii[ix];
681:               idx = matB->j + ii[ix];
682:               for (jj = 0; jj < n && isOK; jj++) {
683:                 PetscInt lidj = idx[jj];
684:                 if (cpcol_matched[lidj]) continue;
685:                 ew    = PetscRealPart(ap[jj]);
686:                 max_e = PetscRealPart(cpcol_max_ew[lidj]);
687:                 /* check for max_e == to this edge and larger processor that will deal with this */
688:                 if (ew > max_e - PETSC_SMALL && ew > PetscRealPart(lid_max_ew[lid1]) - PETSC_SMALL && (PetscMPIInt)PetscRealPart(cpcol_max_pe[lidj]) > rank) isOK = PETSC_FALSE;
689:               }
690:             }
691:           }
692:         }

694:         /* do it */
695:         if (isOK) {
696:           if (cpid1 == -1) {
697:             lid_matched[lid1] = PETSC_TRUE; /* keep track of what we've done this round */
698:             PetscCall(PetscCDAppendRemove(agg_llists, lid0, lid1));
699:           } else if (sub_it != n_sub_its - 1) {
700:             /* add gid1 to list of ghost deleted by me -- I need their children */
701:             proc = cpcol_pe[cpid1];

703:             cpcol_matched[cpid1] = PETSC_TRUE; /* messing with VecGetArray array -- needed??? */

705:             PetscCall(PetscCDAppendID(deleted_list, proc, cpid1)); /* cache to send messages */
706:             PetscCall(PetscCDAppendID(deleted_list, proc, lid0));
707:           } else continue;

709:           lid_matched[lid0] = PETSC_TRUE; /* keep track of what we've done this round */
710:           /* set projection */
711:           PetscCall(MatSetValues(P, 1, &gid0, 1, &gid0, &one, INSERT_VALUES));
712:           PetscCall(MatSetValues(P, 1, &gid1, 1, &gid0, &one, INSERT_VALUES));
713:         } /* matched */
714:       }   /* edge loop */

716:       /* deal with deleted ghost on first pass */
717:       if (size > 1 && sub_it != n_sub_its - 1) {
718: #define REQ_BF_SIZE 100
719:         PetscCDIntNd *pos;
720:         PetscBool     ise = PETSC_FALSE;
721:         PetscInt      nSend1, **sbuffs1, nSend2;
722:         MPI_Request  *sreqs2[REQ_BF_SIZE], *rreqs2[REQ_BF_SIZE];
723:         MPI_Status    status;

725:         /* send request */
726:         for (proc = 0, nSend1 = 0; proc < size; proc++) {
727:           PetscCall(PetscCDEmptyAt(deleted_list, proc, &ise));
728:           if (!ise) nSend1++;
729:         }
730:         PetscCall(PetscMalloc1(nSend1, &sbuffs1));
731:         for (proc = 0, nSend1 = 0; proc < size; proc++) {
732:           /* count ghosts */
733:           PetscCall(PetscCDSizeAt(deleted_list, proc, &n));
734:           if (n > 0) {
735: #define CHUNCK_SIZE 100
736:             PetscInt    *sbuff, *pt;
737:             MPI_Request *request;
738:             n /= 2;
739:             PetscCall(PetscMalloc1(2 + 2 * n + n * CHUNCK_SIZE + 2, &sbuff));
740:             /* save requests */
741:             sbuffs1[nSend1] = sbuff;
742:             request         = (MPI_Request *)sbuff;
743:             sbuff = pt = (PetscInt *)(request + 1);
744:             *pt++      = n;
745:             *pt++      = rank;

747:             PetscCall(PetscCDGetHeadPos(deleted_list, proc, &pos));
748:             while (pos) {
749:               PetscInt lid0, cpid, gid;
750:               PetscCall(PetscCDIntNdGetID(pos, &cpid));
751:               gid = (PetscInt)PetscRealPart(cpcol_gid[cpid]);
752:               PetscCall(PetscCDGetNextPos(deleted_list, proc, &pos));
753:               PetscCall(PetscCDIntNdGetID(pos, &lid0));
754:               PetscCall(PetscCDGetNextPos(deleted_list, proc, &pos));
755:               *pt++ = gid;
756:               *pt++ = lid0;
757:             }
758:             /* send request tag1 [n, proc, n*[gid1,lid0] ] */
759:             PetscCallMPI(MPI_Isend(sbuff, 2 * n + 2, MPIU_INT, proc, tag1, comm, request));
760:             /* post receive */
761:             request        = (MPI_Request *)pt;
762:             rreqs2[nSend1] = request; /* cache recv request */
763:             pt             = (PetscInt *)(request + 1);
764:             PetscCallMPI(MPI_Irecv(pt, n * CHUNCK_SIZE, MPIU_INT, proc, tag2, comm, request));
765:             /* clear list */
766:             PetscCall(PetscCDRemoveAll(deleted_list, proc));
767:             nSend1++;
768:           }
769:         }
770:         /* receive requests, send response, clear lists */
771:         kk = nactive_edges;
772:         PetscCall(MPIU_Allreduce(&kk, &nactive_edges, 1, MPIU_INT, MPI_SUM, comm)); /* not correct synchronization and global */
773:         nSend2 = 0;
774:         while (1) {
775: #define BF_SZ 10000
776:           PetscMPIInt  flag, count;
777:           PetscInt     rbuff[BF_SZ], *pt, *pt2, *pt3, count2, *sbuff, count3;
778:           MPI_Request *request;

780:           PetscCallMPI(MPI_Iprobe(MPI_ANY_SOURCE, tag1, comm, &flag, &status));
781:           if (!flag) break;
782:           PetscCallMPI(MPI_Get_count(&status, MPIU_INT, &count));
783:           PetscCheck(count <= BF_SZ, PETSC_COMM_SELF, PETSC_ERR_SUP, "buffer too small for receive: %d", count);
784:           proc = status.MPI_SOURCE;
785:           /* receive request tag1 [n, proc, n*[gid1,lid0] ] */
786:           PetscCallMPI(MPI_Recv(rbuff, count, MPIU_INT, proc, tag1, comm, &status));
787:           /* count sends */
788:           pt     = rbuff;
789:           count3 = count2 = 0;
790:           n               = *pt++;
791:           kk              = *pt++;
792:           while (n--) {
793:             PetscInt gid1 = *pt++, lid1 = gid1 - my0;
794:             kk = *pt++;
795:             PetscCheck(!lid_matched[lid1], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Received deleted gid %" PetscInt_FMT ", deleted by (lid) %" PetscInt_FMT " from proc %" PetscInt_FMT, sub_it, gid1, kk);
796:             lid_matched[lid1] = PETSC_TRUE; /* keep track of what we've done this round */
797:             PetscCall(PetscCDSizeAt(agg_llists, lid1, &kk));
798:             count2 += kk + 2;
799:             count3++; /* number of verts requested (n) */
800:           }
801:           PetscCheck(count2 <= count3 * CHUNCK_SIZE, PETSC_COMM_SELF, PETSC_ERR_SUP, "Irecv will be too small: %" PetscInt_FMT, count2);
802:           /* send tag2 *[lid0, n, n*[gid] ] */
803:           PetscCall(PetscMalloc(count2 * sizeof(PetscInt) + sizeof(MPI_Request), &sbuff));
804:           request          = (MPI_Request *)sbuff;
805:           sreqs2[nSend2++] = request; /* cache request */
806:           PetscCheck(nSend2 != REQ_BF_SIZE, PETSC_COMM_SELF, PETSC_ERR_SUP, "buffer too small for requests: %" PetscInt_FMT, nSend2);
807:           pt2 = sbuff = (PetscInt *)(request + 1);
808:           pt          = rbuff;
809:           n           = *pt++;
810:           kk          = *pt++;
811:           while (n--) {
812:             /* read [n, proc, n*[gid1,lid0] */
813:             PetscInt gid1 = *pt++, lid1 = gid1 - my0, lid0 = *pt++;
814:             /* write [lid0, n, n*[gid] ] */
815:             *pt2++ = lid0;
816:             pt3    = pt2++; /* save pointer for later */
817:             PetscCall(PetscCDGetHeadPos(agg_llists, lid1, &pos));
818:             while (pos) {
819:               PetscInt gid;
820:               PetscCall(PetscCDIntNdGetID(pos, &gid));
821:               PetscCall(PetscCDGetNextPos(agg_llists, lid1, &pos));
822:               *pt2++ = gid;
823:             }
824:             *pt3 = (pt2 - pt3) - 1;
825:             /* clear list */
826:             PetscCall(PetscCDRemoveAll(agg_llists, lid1));
827:           }
828:           /* send requested data tag2 *[lid0, n, n*[gid1] ] */
829:           PetscCallMPI(MPI_Isend(sbuff, count2, MPIU_INT, proc, tag2, comm, request));
830:         }

832:         /* receive tag2 *[lid0, n, n*[gid] ] */
833:         for (kk = 0; kk < nSend1; kk++) {
834:           PetscMPIInt  count;
835:           MPI_Request *request;
836:           PetscInt    *pt, *pt2;

838:           request = rreqs2[kk]; /* no need to free -- buffer is in 'sbuffs1' */
839:           PetscCallMPI(MPI_Wait(request, &status));
840:           PetscCallMPI(MPI_Get_count(&status, MPIU_INT, &count));
841:           pt = pt2 = (PetscInt *)(request + 1);
842:           while (pt - pt2 < count) {
843:             PetscInt lid0 = *pt++, n = *pt++;
844:             while (n--) {
845:               PetscInt gid1 = *pt++;
846:               PetscCall(PetscCDAppendID(agg_llists, lid0, gid1));
847:             }
848:           }
849:         }

851:         /* wait for tag1 isends */
852:         while (nSend1--) {
853:           MPI_Request *request;
854:           request = (MPI_Request *)sbuffs1[nSend1];
855:           PetscCallMPI(MPI_Wait(request, &status));
856:           PetscCall(PetscFree(request));
857:         }
858:         PetscCall(PetscFree(sbuffs1));

860:         /* wait for tag2 isends */
861:         while (nSend2--) {
862:           MPI_Request *request = sreqs2[nSend2];
863:           PetscCallMPI(MPI_Wait(request, &status));
864:           PetscCall(PetscFree(request));
865:         }

867:         PetscCall(VecRestoreArray(ghostMaxEdge, &cpcol_max_ew));
868:         PetscCall(VecRestoreArray(ghostMaxPE, &cpcol_max_pe));

870:         /* get 'cpcol_matched' - use locMaxPE, ghostMaxEdge, cpcol_max_ew */
871:         for (kk = 0, gid = my0; kk < nloc; kk++, gid++) {
872:           PetscScalar vval = lid_matched[kk] ? 1.0 : 0.0;
873:           PetscCall(VecSetValues(locMaxPE, 1, &gid, &vval, INSERT_VALUES)); /* set with GID */
874:         }
875:         PetscCall(VecAssemblyBegin(locMaxPE));
876:         PetscCall(VecAssemblyEnd(locMaxPE));
877:         PetscCall(VecScatterBegin(mpimat->Mvctx, locMaxPE, ghostMaxEdge, INSERT_VALUES, SCATTER_FORWARD));
878:         PetscCall(VecScatterEnd(mpimat->Mvctx, locMaxPE, ghostMaxEdge, INSERT_VALUES, SCATTER_FORWARD));
879:         PetscCall(VecGetArray(ghostMaxEdge, &cpcol_max_ew));
880:         PetscCall(VecGetLocalSize(mpimat->lvec, &n));
881:         for (kk = 0; kk < n; kk++) cpcol_matched[kk] = (PetscBool)(PetscRealPart(cpcol_max_ew[kk]) != 0.0);
882:         PetscCall(VecRestoreArray(ghostMaxEdge, &cpcol_max_ew));
883:       } /* size > 1 */

885:       /* compute 'locMaxEdge' */
886:       PetscCall(VecRestoreArray(locMaxEdge, &lid_max_ew));
887:       for (kk = 0, gid = my0; kk < nloc; kk++, gid++) {
888:         PetscReal   max_e = 0., tt;
889:         PetscScalar vval;
890:         PetscInt    lid = kk;

892:         if (lid_matched[lid]) vval = 0.;
893:         else {
894:           ii  = matA->i;
895:           n   = ii[lid + 1] - ii[lid];
896:           idx = matA->j + ii[lid];
897:           ap  = matA->a + ii[lid];
898:           for (jj = 0; jj < n; jj++) {
899:             PetscInt lidj = idx[jj];
900:             if (lid_matched[lidj]) continue; /* this is new - can change local max */
901:             if (lidj != lid && PetscRealPart(ap[jj]) > max_e) max_e = PetscRealPart(ap[jj]);
902:           }
903:           if (lid_cprowID && (ix = lid_cprowID[lid]) != -1) { /* if I have any ghost neighbors */
904:             ii  = matB->compressedrow.i;
905:             n   = ii[ix + 1] - ii[ix];
906:             ap  = matB->a + ii[ix];
907:             idx = matB->j + ii[ix];
908:             for (jj = 0; jj < n; jj++) {
909:               PetscInt lidj = idx[jj];
910:               if (cpcol_matched[lidj]) continue;
911:               if ((tt = PetscRealPart(ap[jj])) > max_e) max_e = tt;
912:             }
913:           }
914:         }
915:         vval = (PetscScalar)max_e;
916:         PetscCall(VecSetValues(locMaxEdge, 1, &gid, &vval, INSERT_VALUES)); /* set with GID */
917:       }
918:       PetscCall(VecAssemblyBegin(locMaxEdge));
919:       PetscCall(VecAssemblyEnd(locMaxEdge));

921:       if (size > 1 && sub_it != n_sub_its - 1) {
922:         /* compute 'cpcol_max_ew' */
923:         PetscCall(VecScatterBegin(mpimat->Mvctx, locMaxEdge, ghostMaxEdge, INSERT_VALUES, SCATTER_FORWARD));
924:         PetscCall(VecScatterEnd(mpimat->Mvctx, locMaxEdge, ghostMaxEdge, INSERT_VALUES, SCATTER_FORWARD));
925:         PetscCall(VecGetArray(ghostMaxEdge, &cpcol_max_ew));
926:         PetscCall(VecGetArray(locMaxEdge, &lid_max_ew));

928:         /* compute 'cpcol_max_pe' */
929:         for (kk = 0, gid = my0; kk < nloc; kk++, gid++) {
930:           PetscInt    lid = kk;
931:           PetscReal   ew, v1_max_e, v0_max_e = PetscRealPart(lid_max_ew[lid]);
932:           PetscScalar vval;
933:           PetscMPIInt max_pe = rank, pe;

935:           if (lid_matched[lid]) vval = (PetscScalar)rank;
936:           else if ((ix = lid_cprowID[lid]) != -1) { /* if I have any ghost neighbors */ ii = matB->compressedrow.i;
937:             n                                                                              = ii[ix + 1] - ii[ix];
938:             ap                                                                             = matB->a + ii[ix];
939:             idx                                                                            = matB->j + ii[ix];
940:             for (jj = 0; jj < n; jj++) {
941:               PetscInt lidj = idx[jj];
942:               if (cpcol_matched[lidj]) continue;
943:               ew       = PetscRealPart(ap[jj]);
944:               v1_max_e = PetscRealPart(cpcol_max_ew[lidj]);
945:               /* get max pe that has a max_e == to this edge w */
946:               if ((pe = cpcol_pe[idx[jj]]) > max_pe && ew > v1_max_e - PETSC_SMALL && ew > v0_max_e - PETSC_SMALL) max_pe = pe;
947:             }
948:             vval = (PetscScalar)max_pe;
949:           }
950:           PetscCall(VecSetValues(locMaxPE, 1, &gid, &vval, INSERT_VALUES));
951:         }
952:         PetscCall(VecAssemblyBegin(locMaxPE));
953:         PetscCall(VecAssemblyEnd(locMaxPE));

955:         PetscCall(VecScatterBegin(mpimat->Mvctx, locMaxPE, ghostMaxPE, INSERT_VALUES, SCATTER_FORWARD));
956:         PetscCall(VecScatterEnd(mpimat->Mvctx, locMaxPE, ghostMaxPE, INSERT_VALUES, SCATTER_FORWARD));
957:         PetscCall(VecGetArray(ghostMaxPE, &cpcol_max_pe));
958:         PetscCall(VecRestoreArray(locMaxEdge, &lid_max_ew));
959:       } /* deal with deleted ghost */
960:       PetscCall(PetscInfo(a_Gmat, "\t %" PetscInt_FMT ".%" PetscInt_FMT ": %" PetscInt_FMT " active edges.\n", iter, sub_it, nactive_edges));
961:       if (!nactive_edges) break;
962:     } /* sub_it loop */

964:     /* clean up iteration */
965:     PetscCall(PetscFree(Edges));
966:     if (mpimat) {
967:       PetscCall(VecRestoreArray(ghostMaxEdge, &cpcol_max_ew));
968:       PetscCall(VecDestroy(&ghostMaxEdge));
969:       PetscCall(VecRestoreArray(ghostMaxPE, &cpcol_max_pe));
970:       PetscCall(VecDestroy(&ghostMaxPE));
971:       PetscCall(PetscFree(cpcol_pe));
972:       PetscCall(PetscFree(cpcol_matched));
973:     }

975:     PetscCall(VecDestroy(&locMaxEdge));
976:     PetscCall(VecDestroy(&locMaxPE));

978:     if (mpimat) PetscCall(VecRestoreArray(mpimat->lvec, &cpcol_gid));

980:     /* create next G if needed */
981:     if (iter == n_iter) { /* hard wired test - need to look at full surrounded nodes or something */
982:       PetscCall(MatDestroy(&P));
983:       PetscCall(MatDestroy(&cMat));
984:       break;
985:     } else {
986:       Vec diag;
987:       /* add identity for unmatched vertices so they stay alive */
988:       for (kk = 0, gid = my0; kk < nloc; kk++, gid++) {
989:         if (!lid_matched[kk]) {
990:           gid = kk + my0;
991:           PetscCall(MatGetRow(cMat, gid, &n, NULL, NULL));
992:           if (n > 1) PetscCall(MatSetValues(P, 1, &gid, 1, &gid, &one, INSERT_VALUES));
993:           PetscCall(MatRestoreRow(cMat, gid, &n, NULL, NULL));
994:         }
995:       }
996:       PetscCall(MatAssemblyBegin(P, MAT_FINAL_ASSEMBLY));
997:       PetscCall(MatAssemblyEnd(P, MAT_FINAL_ASSEMBLY));

999:       /* project to make new graph with collapsed edges */
1000:       PetscCall(MatPtAP(cMat, P, MAT_INITIAL_MATRIX, 1.0, &tMat));
1001:       PetscCall(MatDestroy(&P));
1002:       PetscCall(MatDestroy(&cMat));
1003:       cMat = tMat;
1004:       PetscCall(MatCreateVecs(cMat, &diag, NULL));
1005:       PetscCall(MatGetDiagonal(cMat, diag)); /* effectively PCJACOBI */
1006:       PetscCall(VecReciprocal(diag));
1007:       PetscCall(VecSqrtAbs(diag));
1008:       PetscCall(MatDiagonalScale(cMat, diag, diag));
1009:       PetscCall(VecDestroy(&diag));
1010:     }
1011:   } /* coarsen iterator */

1013:   /* make fake matrix */
1014:   if (size > 1) {
1015:     Mat           mat;
1016:     PetscCDIntNd *pos;
1017:     PetscInt      gid, NN, MM, jj = 0, mxsz = 0;

1019:     for (kk = 0; kk < nloc; kk++) {
1020:       PetscCall(PetscCDSizeAt(agg_llists, kk, &jj));
1021:       if (jj > mxsz) mxsz = jj;
1022:     }
1023:     PetscCall(MatGetSize(a_Gmat, &MM, &NN));
1024:     if (mxsz > MM - nloc) mxsz = MM - nloc;

1026:     PetscCall(MatCreateAIJ(comm, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE, 0, NULL, mxsz, NULL, &mat));

1028:     for (kk = 0, gid = my0; kk < nloc; kk++, gid++) {
1029:       /* for (pos=PetscCDGetHeadPos(agg_llists,kk) ; pos ; pos=PetscCDGetNextPos(agg_llists,kk,pos)) { */
1030:       PetscCall(PetscCDGetHeadPos(agg_llists, kk, &pos));
1031:       while (pos) {
1032:         PetscInt gid1;
1033:         PetscCall(PetscCDIntNdGetID(pos, &gid1));
1034:         PetscCall(PetscCDGetNextPos(agg_llists, kk, &pos));

1036:         if (gid1 < my0 || gid1 >= my0 + nloc) PetscCall(MatSetValues(mat, 1, &gid, 1, &gid1, &one, ADD_VALUES));
1037:       }
1038:     }
1039:     PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
1040:     PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
1041:     PetscCall(PetscCDSetMat(agg_llists, mat));
1042:   }

1044:   PetscCall(PetscFree(lid_cprowID));
1045:   PetscCall(PetscFree(lid_gid));
1046:   PetscCall(PetscFree(lid_matched));
1047:   PetscCall(PetscCDDestroy(deleted_list));
1048:   PetscFunctionReturn(PETSC_SUCCESS);
1049: }

1051: /*
1052:    HEM coarsen, simple greedy.
1053: */
1054: static PetscErrorCode MatCoarsenApply_HEM(MatCoarsen coarse)
1055: {
1056:   Mat mat = coarse->graph;

1058:   PetscFunctionBegin;
1059:   if (!coarse->perm) {
1060:     IS       perm;
1061:     PetscInt n, m;

1063:     PetscCall(MatGetLocalSize(mat, &m, &n));
1064:     PetscCall(ISCreateStride(PetscObjectComm((PetscObject)mat), m, 0, 1, &perm));
1065:     PetscCall(MatCoarsenApply_HEM_private(perm, mat, &coarse->agg_lists));
1066:     PetscCall(ISDestroy(&perm));
1067:   } else {
1068:     PetscCall(MatCoarsenApply_HEM_private(coarse->perm, mat, &coarse->agg_lists));
1069:   }
1070:   PetscFunctionReturn(PETSC_SUCCESS);
1071: }

1073: static PetscErrorCode MatCoarsenView_HEM(MatCoarsen coarse, PetscViewer viewer)
1074: {
1075:   PetscMPIInt rank;
1076:   PetscBool   iascii;

1078:   PetscFunctionBegin;
1079:   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)coarse), &rank));
1080:   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1081:   if (iascii) {
1082:     PetscCall(PetscViewerASCIIPushSynchronized(viewer));
1083:     PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "  [%d] HEM aggregator\n", rank));
1084:     PetscCall(PetscViewerFlush(viewer));
1085:     PetscCall(PetscViewerASCIIPopSynchronized(viewer));
1086:   }
1087:   PetscFunctionReturn(PETSC_SUCCESS);
1088: }

1090: /*MC
1091:    MATCOARSENHEM - A coarsener that uses HEM a simple greedy coarsener

1093:    Level: beginner

1095: .seealso: `MatCoarsen`, `MatCoarsenSetType()`, `MatCoarsenGetData()`, `MatCoarsenType`, `MatCoarsenCreate()`
1096: M*/

1098: PETSC_EXTERN PetscErrorCode MatCoarsenCreate_HEM(MatCoarsen coarse)
1099: {
1100:   PetscFunctionBegin;
1101:   coarse->ops->apply = MatCoarsenApply_HEM;
1102:   coarse->ops->view  = MatCoarsenView_HEM;
1103:   PetscFunctionReturn(PETSC_SUCCESS);
1104: }