Actual source code: hem.c

  1: #include <petsc/private/matimpl.h>
  2: #include <../src/mat/impls/aij/seq/aij.h>
  3: #include <../src/mat/impls/aij/mpi/mpiaij.h>
  4: #include <petscdm.h>

  6: /* linked list methods
  7:  *
  8:  *  PetscCDCreate
  9:  */
 10: PetscErrorCode PetscCDCreate(PetscInt a_size, PetscCoarsenData **a_out)
 11: {
 12:   PetscCoarsenData *ail;

 14:   PetscFunctionBegin;
 15:   /* allocate pool, partially */
 16:   PetscCall(PetscNew(&ail));
 17:   *a_out               = ail;
 18:   ail->pool_list.next  = NULL;
 19:   ail->pool_list.array = NULL;
 20:   ail->chk_sz          = 0;
 21:   /* allocate array */
 22:   ail->size = a_size;
 23:   PetscCall(PetscCalloc1(a_size, &ail->array));
 24:   ail->extra_nodes = NULL;
 25:   ail->mat         = NULL;
 26:   PetscFunctionReturn(PETSC_SUCCESS);
 27: }

 29: /* PetscCDDestroy
 30:  */
 31: PetscErrorCode PetscCDDestroy(PetscCoarsenData *ail)
 32: {
 33:   PetscCDArrNd *n = &ail->pool_list;

 35:   PetscFunctionBegin;
 36:   n = n->next;
 37:   while (n) {
 38:     PetscCDArrNd *lstn = n;
 39:     n                  = n->next;
 40:     PetscCall(PetscFree(lstn));
 41:   }
 42:   if (ail->pool_list.array) PetscCall(PetscFree(ail->pool_list.array));
 43:   PetscCall(PetscFree(ail->array));
 44:   if (ail->mat) PetscCall(MatDestroy(&ail->mat));
 45:   /* delete this (+agg+pool array) */
 46:   PetscCall(PetscFree(ail));
 47:   PetscFunctionReturn(PETSC_SUCCESS);
 48: }

 50: /* PetscCDSetChunkSize
 51:  */
 52: PetscErrorCode PetscCDSetChunkSize(PetscCoarsenData *ail, PetscInt a_sz)
 53: {
 54:   PetscFunctionBegin;
 55:   ail->chk_sz = a_sz;
 56:   PetscFunctionReturn(PETSC_SUCCESS);
 57: }

 59: /*  PetscCDGetNewNode
 60:  */
 61: static PetscErrorCode PetscCDGetNewNode(PetscCoarsenData *ail, PetscCDIntNd **a_out, PetscInt a_id)
 62: {
 63:   PetscFunctionBegin;
 64:   *a_out = NULL; /* squelch -Wmaybe-uninitialized */
 65:   if (ail->extra_nodes) {
 66:     PetscCDIntNd *node = ail->extra_nodes;
 67:     ail->extra_nodes   = node->next;
 68:     node->gid          = a_id;
 69:     node->next         = NULL;
 70:     *a_out             = node;
 71:   } else {
 72:     if (!ail->pool_list.array) {
 73:       if (!ail->chk_sz) ail->chk_sz = 10; /* use a chuck size of ail->size? */
 74:       PetscCall(PetscMalloc1(ail->chk_sz, &ail->pool_list.array));
 75:       ail->new_node       = ail->pool_list.array;
 76:       ail->new_left       = ail->chk_sz;
 77:       ail->new_node->next = NULL;
 78:     } else if (!ail->new_left) {
 79:       PetscCDArrNd *node;
 80:       PetscCall(PetscMalloc(ail->chk_sz * sizeof(PetscCDIntNd) + sizeof(PetscCDArrNd), &node));
 81:       node->array         = (PetscCDIntNd *)(node + 1);
 82:       node->next          = ail->pool_list.next;
 83:       ail->pool_list.next = node;
 84:       ail->new_left       = ail->chk_sz;
 85:       ail->new_node       = node->array;
 86:     }
 87:     ail->new_node->gid  = a_id;
 88:     ail->new_node->next = NULL;
 89:     *a_out              = ail->new_node++;
 90:     ail->new_left--;
 91:   }
 92:   PetscFunctionReturn(PETSC_SUCCESS);
 93: }

 95: /* PetscCDIntNdSetID
 96:  */
 97: PetscErrorCode PetscCDIntNdSetID(PetscCDIntNd *a_this, PetscInt a_id)
 98: {
 99:   PetscFunctionBegin;
100:   a_this->gid = a_id;
101:   PetscFunctionReturn(PETSC_SUCCESS);
102: }

104: /* PetscCDIntNdGetID
105:  */
106: PetscErrorCode PetscCDIntNdGetID(const PetscCDIntNd *a_this, PetscInt *a_gid)
107: {
108:   PetscFunctionBegin;
109:   *a_gid = a_this->gid;
110:   PetscFunctionReturn(PETSC_SUCCESS);
111: }

113: /* PetscCDGetHeadPos
114:  */
115: PetscErrorCode PetscCDGetHeadPos(const PetscCoarsenData *ail, PetscInt a_idx, PetscCDIntNd **pos)
116: {
117:   PetscFunctionBegin;
118:   PetscCheck(a_idx < ail->size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "a_idx >= ail->size: a_idx=%" PetscInt_FMT ".", a_idx);
119:   *pos = ail->array[a_idx];
120:   PetscFunctionReturn(PETSC_SUCCESS);
121: }

123: /* PetscCDGetNextPos
124:  */
125: PetscErrorCode PetscCDGetNextPos(const PetscCoarsenData *ail, PetscInt l_idx, PetscCDIntNd **pos)
126: {
127:   PetscFunctionBegin;
128:   PetscCheck(*pos, PETSC_COMM_SELF, PETSC_ERR_PLIB, "NULL input position.");
129:   *pos = (*pos)->next;
130:   PetscFunctionReturn(PETSC_SUCCESS);
131: }

133: /* PetscCDAppendID
134:  */
135: PetscErrorCode PetscCDAppendID(PetscCoarsenData *ail, PetscInt a_idx, PetscInt a_id)
136: {
137:   PetscCDIntNd *n, *n2;

139:   PetscFunctionBegin;
140:   PetscCall(PetscCDGetNewNode(ail, &n, a_id));
141:   PetscCheck(a_idx < ail->size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Index %" PetscInt_FMT " out of range.", a_idx);
142:   if (!(n2 = ail->array[a_idx])) ail->array[a_idx] = n;
143:   else {
144:     do {
145:       if (!n2->next) {
146:         n2->next = n;
147:         PetscCheck(!n->next, PETSC_COMM_SELF, PETSC_ERR_PLIB, "n should not have a next");
148:         break;
149:       }
150:       n2 = n2->next;
151:     } while (n2);
152:     PetscCheck(n2, PETSC_COMM_SELF, PETSC_ERR_PLIB, "n2 should be non-null");
153:   }
154:   PetscFunctionReturn(PETSC_SUCCESS);
155: }

157: /* PetscCDAppendNode
158:  */
159: PetscErrorCode PetscCDAppendNode(PetscCoarsenData *ail, PetscInt a_idx, PetscCDIntNd *a_n)
160: {
161:   PetscCDIntNd *n2;

163:   PetscFunctionBegin;
164:   PetscCheck(a_idx < ail->size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Index %" PetscInt_FMT " out of range.", a_idx);
165:   if (!(n2 = ail->array[a_idx])) ail->array[a_idx] = a_n;
166:   else {
167:     do {
168:       if (!n2->next) {
169:         n2->next  = a_n;
170:         a_n->next = NULL;
171:         break;
172:       }
173:       n2 = n2->next;
174:     } while (n2);
175:     PetscCheck(n2, PETSC_COMM_SELF, PETSC_ERR_PLIB, "n2 should be non-null");
176:   }
177:   PetscFunctionReturn(PETSC_SUCCESS);
178: }

180: /* PetscCDRemoveNextNode: a_last->next, this exposes single linked list structure to API (not used)
181:  */
182: PetscErrorCode PetscCDRemoveNextNode(PetscCoarsenData *ail, PetscInt a_idx, PetscCDIntNd *a_last)
183: {
184:   PetscCDIntNd *del;

186:   PetscFunctionBegin;
187:   PetscCheck(a_idx < ail->size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Index %" PetscInt_FMT " out of range.", a_idx);
188:   PetscCheck(a_last->next, PETSC_COMM_SELF, PETSC_ERR_PLIB, "a_last should have a next");
189:   del          = a_last->next;
190:   a_last->next = del->next;
191:   /* del->next = NULL; -- this still used in a iterator so keep it intact -- need to fix this with a double linked list */
192:   /* could reuse n2 but PetscCDAppendNode sometimes uses it */
193:   PetscFunctionReturn(PETSC_SUCCESS);
194: }

196: /* PetscCDPrint
197:  */
198: PetscErrorCode PetscCDPrint(const PetscCoarsenData *ail, PetscInt my0, MPI_Comm comm)
199: {
200:   PetscCDIntNd *n, *n2;
201:   PetscInt      ii;

203:   PetscFunctionBegin;
204:   for (ii = 0; ii < ail->size; ii++) {
205:     n2 = n = ail->array[ii];
206:     if (n) PetscCall(PetscSynchronizedPrintf(comm, "list %" PetscInt_FMT ":", ii + my0));
207:     while (n) {
208:       PetscCall(PetscSynchronizedPrintf(comm, " %" PetscInt_FMT, n->gid));
209:       n = n->next;
210:     }
211:     if (n2) PetscCall(PetscSynchronizedPrintf(comm, "\n"));
212:   }
213:   PetscCall(PetscSynchronizedFlush(comm, PETSC_STDOUT));
214:   PetscFunctionReturn(PETSC_SUCCESS);
215: }

217: /* PetscCDMoveAppend - take list in a_srcidx and appends to destidx
218:  */
219: PetscErrorCode PetscCDMoveAppend(PetscCoarsenData *ail, PetscInt a_destidx, PetscInt a_srcidx)
220: {
221:   PetscCDIntNd *n;

223:   PetscFunctionBegin;
224:   PetscCheck(a_srcidx < ail->size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Index %" PetscInt_FMT " out of range.", a_srcidx);
225:   PetscCheck(a_destidx < ail->size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Index %" PetscInt_FMT " out of range.", a_destidx);
226:   PetscCheck(a_destidx != a_srcidx, PETSC_COMM_SELF, PETSC_ERR_PLIB, "a_destidx==a_srcidx %" PetscInt_FMT ".", a_destidx);
227:   n = ail->array[a_destidx];
228:   if (!n) ail->array[a_destidx] = ail->array[a_srcidx];
229:   else {
230:     do {
231:       if (!n->next) {
232:         n->next = ail->array[a_srcidx]; // append
233:         break;
234:       }
235:       n = n->next;
236:     } while (1);
237:   }
238:   ail->array[a_srcidx] = NULL; // empty
239:   PetscFunctionReturn(PETSC_SUCCESS);
240: }

242: /* PetscCDRemoveAllAt - empty one list and move data to cache
243:  */
244: PetscErrorCode PetscCDRemoveAllAt(PetscCoarsenData *ail, PetscInt a_idx)
245: {
246:   PetscCDIntNd *rem, *n1;

248:   PetscFunctionBegin;
249:   PetscCheck(a_idx < ail->size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Index %" PetscInt_FMT " out of range.", a_idx);
250:   rem               = ail->array[a_idx];
251:   ail->array[a_idx] = NULL;
252:   if (!(n1 = ail->extra_nodes)) ail->extra_nodes = rem;
253:   else {
254:     while (n1->next) n1 = n1->next;
255:     n1->next = rem;
256:   }
257:   PetscFunctionReturn(PETSC_SUCCESS);
258: }

260: /* PetscCDCountAt
261:  */
262: PetscErrorCode PetscCDCountAt(const PetscCoarsenData *ail, PetscInt a_idx, PetscInt *a_sz)
263: {
264:   PetscCDIntNd *n1;
265:   PetscInt      sz = 0;

267:   PetscFunctionBegin;
268:   PetscCheck(a_idx < ail->size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Index %" PetscInt_FMT " out of range.", a_idx);
269:   n1 = ail->array[a_idx];
270:   while (n1) {
271:     n1 = n1->next;
272:     sz++;
273:   }
274:   *a_sz = sz;
275:   PetscFunctionReturn(PETSC_SUCCESS);
276: }

278: /* PetscCDSize
279:  */
280: PetscErrorCode PetscCDCount(const PetscCoarsenData *ail, PetscInt *a_sz)
281: {
282:   PetscInt sz = 0;

284:   PetscFunctionBegin;
285:   for (int ii = 0; ii < ail->size; ii++) {
286:     PetscCDIntNd *n1 = ail->array[ii];
287:     while (n1) {
288:       n1 = n1->next;
289:       sz++;
290:     }
291:   }
292:   *a_sz = sz;
293:   PetscFunctionReturn(PETSC_SUCCESS);
294: }

296: /* PetscCDIsEmptyAt - Is the list empty? (not used)
297:  */
298: PetscErrorCode PetscCDIsEmptyAt(const PetscCoarsenData *ail, PetscInt a_idx, PetscBool *a_e)
299: {
300:   PetscFunctionBegin;
301:   PetscCheck(a_idx < ail->size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Index %" PetscInt_FMT " out of range.", a_idx);
302:   *a_e = (PetscBool)(ail->array[a_idx] == NULL);
303:   PetscFunctionReturn(PETSC_SUCCESS);
304: }

306: /* PetscCDGetNonemptyIS - used for C-F methods
307:  */
308: PetscErrorCode PetscCDGetNonemptyIS(PetscCoarsenData *ail, IS *a_mis)
309: {
310:   PetscCDIntNd *n;
311:   PetscInt      ii, kk;
312:   PetscInt     *permute;

314:   PetscFunctionBegin;
315:   for (ii = kk = 0; ii < ail->size; ii++) {
316:     n = ail->array[ii];
317:     if (n) kk++;
318:   }
319:   PetscCall(PetscMalloc1(kk, &permute));
320:   for (ii = kk = 0; ii < ail->size; ii++) {
321:     n = ail->array[ii];
322:     if (n) permute[kk++] = ii;
323:   }
324:   PetscCall(ISCreateGeneral(PETSC_COMM_SELF, kk, permute, PETSC_OWN_POINTER, a_mis));
325:   PetscFunctionReturn(PETSC_SUCCESS);
326: }

328: /* PetscCDGetMat
329:  */
330: PetscErrorCode PetscCDGetMat(PetscCoarsenData *ail, Mat *a_mat)
331: {
332:   PetscFunctionBegin;
333:   *a_mat = ail->mat;
334:   PetscFunctionReturn(PETSC_SUCCESS);
335: }

337: /* PetscCDSetMat
338:  */
339: PetscErrorCode PetscCDSetMat(PetscCoarsenData *ail, Mat a_mat)
340: {
341:   PetscFunctionBegin;
342:   if (ail->mat) {
343:     PetscCall(MatDestroy(&ail->mat)); //should not happen
344:   }
345:   ail->mat = a_mat;
346:   PetscFunctionReturn(PETSC_SUCCESS);
347: }

349: /* PetscCDClearMat
350:  */
351: PetscErrorCode PetscCDClearMat(PetscCoarsenData *ail)
352: {
353:   PetscFunctionBegin;
354:   ail->mat = NULL;
355:   PetscFunctionReturn(PETSC_SUCCESS);
356: }

358: /* PetscCDGetASMBlocks - get IS of aggregates for ASM smoothers
359:  */
360: PetscErrorCode PetscCDGetASMBlocks(const PetscCoarsenData *ail, const PetscInt a_bs, PetscInt *a_sz, IS **a_local_is)
361: {
362:   PetscCDIntNd *n;
363:   PetscInt      lsz, ii, kk, *idxs, jj, gid;
364:   IS           *is_loc = NULL;

366:   PetscFunctionBegin;
367:   for (ii = kk = 0; ii < ail->size; ii++) {
368:     if (ail->array[ii]) kk++;
369:   }
370:   *a_sz = kk;
371:   PetscCall(PetscMalloc1(kk, &is_loc));
372:   for (ii = kk = 0; ii < ail->size; ii++) {
373:     for (lsz = 0, n = ail->array[ii]; n; lsz++, n = n->next) /* void */
374:       ;
375:     if (lsz) {
376:       PetscCall(PetscMalloc1(a_bs * lsz, &idxs));
377:       for (lsz = 0, n = ail->array[ii]; n; n = n->next) {
378:         PetscCall(PetscCDIntNdGetID(n, &gid));
379:         for (jj = 0; jj < a_bs; lsz++, jj++) idxs[lsz] = a_bs * gid + jj;
380:       }
381:       PetscCall(ISCreateGeneral(PETSC_COMM_SELF, lsz, idxs, PETSC_OWN_POINTER, &is_loc[kk++]));
382:     }
383:   }
384:   PetscCheck(*a_sz == kk, PETSC_COMM_SELF, PETSC_ERR_PLIB, "*a_sz %" PetscInt_FMT " != kk %" PetscInt_FMT, *a_sz, kk);
385:   *a_local_is = is_loc; /* out */
386:   PetscFunctionReturn(PETSC_SUCCESS);
387: }

389: /* edge for priority queue */
390: typedef struct edge_tag {
391:   PetscReal weight;
392:   PetscInt  lid0, gid1, ghost1_idx;
393: } Edge;

395: #define MY_MEPS (PETSC_MACHINE_EPSILON * 100)
396: static int gamg_hem_compare(const void *a, const void *b)
397: {
398:   PetscReal va = ((Edge *)a)->weight, vb = ((Edge *)b)->weight;
399:   return (va <= vb - MY_MEPS) ? 1 : (va > vb + MY_MEPS) ? -1 : 0; /* 0 for equal */
400: }

402: /*
403:   MatCoarsenApply_HEM_private - parallel heavy edge matching

405:   Input Parameter:
406:    . a_Gmat - global matrix of the graph
407:    . n_iter - number of matching iterations
408:    . threshold - threshold for filtering graphs

410:   Output Parameter:
411:    . a_locals_llist - array of list of local nodes rooted at local node
412: */
413: static PetscErrorCode MatCoarsenApply_HEM_private(Mat a_Gmat, const PetscInt n_iter, const PetscReal threshold, PetscCoarsenData **a_locals_llist)
414: {
415: #define REQ_BF_SIZE 100
416:   PetscBool         isMPI;
417:   MPI_Comm          comm;
418:   PetscInt          ix, *ii, *aj, Iend, my0, ncomm_procs, bc_agg = -1, *rbuff = NULL, rbuff_sz = 0;
419:   PetscMPIInt       rank, size, comm_procs[REQ_BF_SIZE], *lid_max_pe;
420:   const PetscInt    nloc = a_Gmat->rmap->n, request_size = PetscCeilReal((PetscReal)sizeof(MPI_Request) / (PetscReal)sizeof(PetscInt));
421:   PetscInt         *lid_cprowID;
422:   PetscBool        *lid_matched;
423:   Mat_SeqAIJ       *matA, *matB = NULL;
424:   Mat_MPIAIJ       *mpimat     = NULL;
425:   PetscScalar       one        = 1.;
426:   PetscCoarsenData *agg_llists = NULL, *ghost_deleted_list = NULL, *bc_list = NULL;
427:   Mat               cMat, tMat, P;
428:   MatScalar        *ap;
429:   IS                info_is;

431:   PetscFunctionBegin;
432:   PetscCall(PetscObjectGetComm((PetscObject)a_Gmat, &comm));
433:   PetscCallMPI(MPI_Comm_rank(comm, &rank));
434:   PetscCallMPI(MPI_Comm_size(comm, &size));
435:   PetscCall(MatGetOwnershipRange(a_Gmat, &my0, &Iend));
436:   PetscCall(ISCreate(comm, &info_is));
437:   PetscCall(PetscInfo(info_is, "Start %" PetscInt_FMT " iterations of HEM.\n", n_iter));

439:   PetscCall(PetscMalloc1(nloc, &lid_matched));
440:   PetscCall(PetscMalloc1(nloc, &lid_cprowID));
441:   PetscCall(PetscMalloc1(nloc, &lid_max_pe));

443:   PetscCall(PetscCDCreate(nloc, &agg_llists));
444:   PetscCall(PetscCDSetChunkSize(agg_llists, nloc + 1));
445:   *a_locals_llist = agg_llists;
446:   /* add self to all lists */
447:   for (int kk = 0; kk < nloc; kk++) PetscCall(PetscCDAppendID(agg_llists, kk, my0 + kk));
448:   /* make a copy of the graph, this gets destroyed in iterates */
449:   PetscCall(MatDuplicate(a_Gmat, MAT_COPY_VALUES, &cMat));
450:   PetscCall(MatConvert(cMat, MATAIJ, MAT_INPLACE_MATRIX, &cMat));
451:   isMPI = (PetscBool)(size > 1);
452:   if (isMPI) {
453:     /* list of deleted ghosts, should compress this */
454:     PetscCall(PetscCDCreate(size, &ghost_deleted_list));
455:     PetscCall(PetscCDSetChunkSize(ghost_deleted_list, 100));
456:   }
457:   for (int iter = 0; iter < n_iter; iter++) {
458:     const PetscScalar *lghost_max_ew, *lid_max_ew;
459:     PetscBool         *lghost_matched;
460:     PetscMPIInt       *lghost_pe, *lghost_max_pe;
461:     Vec                locMaxEdge, ghostMaxEdge, ghostMaxPE, locMaxPE;
462:     PetscInt          *lghost_gid, nEdges, nEdges0, num_ghosts = 0;
463:     Edge              *Edges;
464:     const int          n_sub_its = 1000; // in case of a bug, stop at some point
465:     /* get submatrices of cMat */
466:     for (int kk = 0; kk < nloc; kk++) lid_cprowID[kk] = -1;
467:     if (isMPI) {
468:       mpimat = (Mat_MPIAIJ *)cMat->data;
469:       matA   = (Mat_SeqAIJ *)mpimat->A->data;
470:       matB   = (Mat_SeqAIJ *)mpimat->B->data;
471:       if (!matB->compressedrow.use) {
472:         /* force construction of compressed row data structure since code below requires it */
473:         PetscCall(MatCheckCompressedRow(mpimat->B, matB->nonzerorowcnt, &matB->compressedrow, matB->i, mpimat->B->rmap->n, -1.0));
474:       }
475:       /* set index into compressed row 'lid_cprowID' */
476:       for (ix = 0; ix < matB->compressedrow.nrows; ix++) {
477:         PetscInt *ridx = matB->compressedrow.rindex, lid = ridx[ix];
478:         if (ridx[ix] >= 0) lid_cprowID[lid] = ix;
479:       }
480:     } else {
481:       matA = (Mat_SeqAIJ *)cMat->data;
482:     }
483:     /* set matched flags: true for empty list */
484:     for (int kk = 0; kk < nloc; kk++) {
485:       PetscCall(PetscCDCountAt(agg_llists, kk, &ix));
486:       if (ix > 0) lid_matched[kk] = PETSC_FALSE;
487:       else lid_matched[kk] = PETSC_TRUE; // call deleted gids as matched
488:     }
489:     /* max edge and pe vecs */
490:     PetscCall(MatCreateVecs(cMat, &locMaxEdge, NULL));
491:     PetscCall(MatCreateVecs(cMat, &locMaxPE, NULL));
492:     /* get 'lghost_pe' & 'lghost_gid' & init. 'lghost_matched' using 'mpimat->lvec' */
493:     if (isMPI) {
494:       Vec                vec;
495:       PetscScalar        vval;
496:       const PetscScalar *buf;
497:       PetscCall(MatCreateVecs(cMat, &vec, NULL));
498:       PetscCall(VecGetLocalSize(mpimat->lvec, &num_ghosts));
499:       /* lghost_matched */
500:       for (PetscInt kk = 0, gid = my0; kk < nloc; kk++, gid++) {
501:         PetscScalar vval = lid_matched[kk] ? 1.0 : 0.0;
502:         PetscCall(VecSetValues(vec, 1, &gid, &vval, INSERT_VALUES));
503:       }
504:       PetscCall(VecAssemblyBegin(vec));
505:       PetscCall(VecAssemblyEnd(vec));
506:       PetscCall(VecScatterBegin(mpimat->Mvctx, vec, mpimat->lvec, INSERT_VALUES, SCATTER_FORWARD));
507:       PetscCall(VecScatterEnd(mpimat->Mvctx, vec, mpimat->lvec, INSERT_VALUES, SCATTER_FORWARD));
508:       PetscCall(VecGetArrayRead(mpimat->lvec, &buf)); /* get proc ID in 'buf' */
509:       PetscCall(PetscMalloc1(num_ghosts, &lghost_matched));
510:       for (int kk = 0; kk < num_ghosts; kk++) {
511:         lghost_matched[kk] = (PetscBool)(PetscRealPart(buf[kk]) != 0); // the proc of the ghost for now
512:       }
513:       PetscCall(VecRestoreArrayRead(mpimat->lvec, &buf));
514:       /* lghost_pe */
515:       vval = (PetscScalar)(rank);
516:       for (PetscInt kk = 0, gid = my0; kk < nloc; kk++, gid++) PetscCall(VecSetValues(vec, 1, &gid, &vval, INSERT_VALUES)); /* set with GID */
517:       PetscCall(VecAssemblyBegin(vec));
518:       PetscCall(VecAssemblyEnd(vec));
519:       PetscCall(VecScatterBegin(mpimat->Mvctx, vec, mpimat->lvec, INSERT_VALUES, SCATTER_FORWARD));
520:       PetscCall(VecScatterEnd(mpimat->Mvctx, vec, mpimat->lvec, INSERT_VALUES, SCATTER_FORWARD));
521:       PetscCall(VecGetArrayRead(mpimat->lvec, &buf)); /* get proc ID in 'buf' */
522:       PetscCall(PetscMalloc1(num_ghosts, &lghost_pe));
523:       for (int kk = 0; kk < num_ghosts; kk++) lghost_pe[kk] = (PetscMPIInt)PetscRealPart(buf[kk]); // the proc of the ghost for now
524:       PetscCall(VecRestoreArrayRead(mpimat->lvec, &buf));
525:       /* lghost_gid */
526:       for (PetscInt kk = 0, gid = my0; kk < nloc; kk++, gid++) {
527:         vval = (PetscScalar)(gid);
528:         PetscCall(VecSetValues(vec, 1, &gid, &vval, INSERT_VALUES)); /* set with GID */
529:       }
530:       PetscCall(VecAssemblyBegin(vec));
531:       PetscCall(VecAssemblyEnd(vec));
532:       PetscCall(VecScatterBegin(mpimat->Mvctx, vec, mpimat->lvec, INSERT_VALUES, SCATTER_FORWARD));
533:       PetscCall(VecScatterEnd(mpimat->Mvctx, vec, mpimat->lvec, INSERT_VALUES, SCATTER_FORWARD));
534:       PetscCall(VecDestroy(&vec));
535:       PetscCall(VecGetArrayRead(mpimat->lvec, &buf)); /* get proc ID in 'lghost_gid' */
536:       PetscCall(PetscMalloc1(num_ghosts, &lghost_gid));
537:       for (int kk = 0; kk < num_ghosts; kk++) lghost_gid[kk] = (PetscInt)PetscRealPart(buf[kk]);
538:       PetscCall(VecRestoreArrayRead(mpimat->lvec, &buf));
539:     }
540:     // get 'comm_procs' (could hoist)
541:     for (int kk = 0; kk < REQ_BF_SIZE; kk++) comm_procs[kk] = -1;
542:     for (ix = 0, ncomm_procs = 0; ix < num_ghosts; ix++) {
543:       PetscMPIInt proc = lghost_pe[ix], idx = -1;
544:       for (int k = 0; k < ncomm_procs && idx == -1; k++)
545:         if (comm_procs[k] == proc) idx = k;
546:       if (idx == -1) { comm_procs[ncomm_procs++] = proc; }
547:       PetscCheck(ncomm_procs != REQ_BF_SIZE, PETSC_COMM_SELF, PETSC_ERR_SUP, "Receive request array too small: %d", (int)ncomm_procs);
548:     }
549:     /* count edges, compute initial 'locMaxEdge', 'locMaxPE' */
550:     nEdges0 = 0;
551:     for (PetscInt kk = 0, gid = my0; kk < nloc; kk++, gid++) {
552:       PetscReal   max_e = 0., tt;
553:       PetscScalar vval;
554:       PetscInt    lid = kk, max_pe = rank, pe, n;
555:       ii = matA->i;
556:       n  = ii[lid + 1] - ii[lid];
557:       aj = PetscSafePointerPlusOffset(matA->j, ii[lid]);
558:       ap = PetscSafePointerPlusOffset(matA->a, ii[lid]);
559:       for (int jj = 0; jj < n; jj++) {
560:         PetscInt lidj = aj[jj];
561:         if ((tt = PetscRealPart(ap[jj])) > threshold && lidj != lid) {
562:           if (tt > max_e) max_e = tt;
563:           if (lidj > lid) nEdges0++;
564:         }
565:       }
566:       if ((ix = lid_cprowID[lid]) != -1) { /* if I have any ghost neighbors */
567:         ii = matB->compressedrow.i;
568:         n  = ii[ix + 1] - ii[ix];
569:         ap = matB->a + ii[ix];
570:         aj = matB->j + ii[ix];
571:         for (int jj = 0; jj < n; jj++) {
572:           if ((tt = PetscRealPart(ap[jj])) > threshold) {
573:             if (tt > max_e) max_e = tt;
574:             nEdges0++;
575:             if ((pe = lghost_pe[aj[jj]]) > max_pe) max_pe = pe;
576:           }
577:         }
578:       }
579:       vval = max_e;
580:       PetscCall(VecSetValues(locMaxEdge, 1, &gid, &vval, INSERT_VALUES));
581:       vval = (PetscScalar)max_pe;
582:       PetscCall(VecSetValues(locMaxPE, 1, &gid, &vval, INSERT_VALUES));
583:       if (iter == 0 && max_e <= MY_MEPS) { // add BCs to fake aggregate
584:         lid_matched[lid] = PETSC_TRUE;
585:         if (bc_agg == -1) {
586:           bc_agg = lid;
587:           PetscCall(PetscCDCreate(1, &bc_list));
588:         }
589:         PetscCall(PetscCDRemoveAllAt(agg_llists, lid));
590:         PetscCall(PetscCDAppendID(bc_list, 0, my0 + lid));
591:       }
592:     }
593:     PetscCall(VecAssemblyBegin(locMaxEdge));
594:     PetscCall(VecAssemblyEnd(locMaxEdge));
595:     PetscCall(VecAssemblyBegin(locMaxPE));
596:     PetscCall(VecAssemblyEnd(locMaxPE));
597:     /* make 'ghostMaxEdge_max_ew', 'lghost_max_pe' */
598:     if (mpimat) {
599:       const PetscScalar *buf;
600:       PetscCall(VecDuplicate(mpimat->lvec, &ghostMaxEdge));
601:       PetscCall(VecScatterBegin(mpimat->Mvctx, locMaxEdge, ghostMaxEdge, INSERT_VALUES, SCATTER_FORWARD));
602:       PetscCall(VecScatterEnd(mpimat->Mvctx, locMaxEdge, ghostMaxEdge, INSERT_VALUES, SCATTER_FORWARD));

604:       PetscCall(VecDuplicate(mpimat->lvec, &ghostMaxPE));
605:       PetscCall(VecScatterBegin(mpimat->Mvctx, locMaxPE, ghostMaxPE, INSERT_VALUES, SCATTER_FORWARD));
606:       PetscCall(VecScatterEnd(mpimat->Mvctx, locMaxPE, ghostMaxPE, INSERT_VALUES, SCATTER_FORWARD));
607:       PetscCall(VecGetArrayRead(ghostMaxPE, &buf));
608:       PetscCall(PetscMalloc1(num_ghosts, &lghost_max_pe));
609:       for (int kk = 0; kk < num_ghosts; kk++) lghost_max_pe[kk] = (PetscMPIInt)PetscRealPart(buf[kk]); // the MAX proc of the ghost now
610:       PetscCall(VecRestoreArrayRead(ghostMaxPE, &buf));
611:     }
612:     { // make lid_max_pe
613:       const PetscScalar *buf;
614:       PetscCall(VecGetArrayRead(locMaxPE, &buf));
615:       for (int kk = 0; kk < nloc; kk++) lid_max_pe[kk] = (PetscMPIInt)PetscRealPart(buf[kk]); // the MAX proc of the ghost now
616:       PetscCall(VecRestoreArrayRead(locMaxPE, &buf));
617:     }
618:     /* setup sorted list of edges, and make 'Edges' */
619:     PetscCall(PetscMalloc1(nEdges0, &Edges));
620:     nEdges = 0;
621:     for (int kk = 0, n; kk < nloc; kk++) {
622:       const PetscInt lid = kk;
623:       PetscReal      tt;
624:       ii = matA->i;
625:       n  = ii[lid + 1] - ii[lid];
626:       aj = PetscSafePointerPlusOffset(matA->j, ii[lid]);
627:       ap = PetscSafePointerPlusOffset(matA->a, ii[lid]);
628:       for (int jj = 0; jj < n; jj++) {
629:         PetscInt lidj = aj[jj];
630:         if ((tt = PetscRealPart(ap[jj])) > threshold && lidj != lid) {
631:           if (lidj > lid) {
632:             Edges[nEdges].lid0       = lid;
633:             Edges[nEdges].gid1       = lidj + my0;
634:             Edges[nEdges].ghost1_idx = -1;
635:             Edges[nEdges].weight     = tt;
636:             nEdges++;
637:           }
638:         }
639:       }
640:       if ((ix = lid_cprowID[lid]) != -1) { /* if I have any ghost neighbor */
641:         ii = matB->compressedrow.i;
642:         n  = ii[ix + 1] - ii[ix];
643:         ap = matB->a + ii[ix];
644:         aj = matB->j + ii[ix];
645:         for (int jj = 0; jj < n; jj++) {
646:           if ((tt = PetscRealPart(ap[jj])) > threshold) {
647:             Edges[nEdges].lid0       = lid;
648:             Edges[nEdges].gid1       = lghost_gid[aj[jj]];
649:             Edges[nEdges].ghost1_idx = aj[jj];
650:             Edges[nEdges].weight     = tt;
651:             nEdges++;
652:           }
653:         }
654:       }
655:     }
656:     PetscCheck(nEdges == nEdges0, PETSC_COMM_SELF, PETSC_ERR_SUP, "nEdges != nEdges0: %d %d", (int)nEdges0, (int)nEdges);
657:     if (Edges) qsort(Edges, nEdges, sizeof(Edge), gamg_hem_compare);

659:     PetscCall(PetscInfo(info_is, "[%d] HEM iteration %d with %d edges\n", rank, iter, (int)nEdges));

661:     /* projection matrix */
662:     PetscCall(MatCreate(comm, &P));
663:     PetscCall(MatSetType(P, MATAIJ));
664:     PetscCall(MatSetSizes(P, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE));
665:     PetscCall(MatMPIAIJSetPreallocation(P, 1, NULL, 1, NULL));
666:     PetscCall(MatSeqAIJSetPreallocation(P, 1, NULL));
667:     PetscCall(MatSetUp(P));
668:     /* process - communicate - process */
669:     for (int sub_it = 0, old_num_edge = 0; /* sub_it < n_sub_its */; /* sub_it++ */) {
670:       PetscInt    nactive_edges = 0, n_act_n[3], gn_act_n[3];
671:       PetscMPIInt tag1, tag2;
672:       PetscCall(VecGetArrayRead(locMaxEdge, &lid_max_ew));
673:       if (isMPI) {
674:         PetscCall(VecGetArrayRead(ghostMaxEdge, &lghost_max_ew));
675:         PetscCall(PetscCommGetNewTag(comm, &tag1));
676:         PetscCall(PetscCommGetNewTag(comm, &tag2));
677:       }
678:       for (int kk = 0; kk < nEdges; kk++) {
679:         /* HEM */
680:         const Edge    *e    = &Edges[kk];
681:         const PetscInt lid0 = e->lid0, gid1 = e->gid1, ghost1_idx = e->ghost1_idx, gid0 = lid0 + my0, lid1 = gid1 - my0;
682:         PetscBool      isOK = PETSC_TRUE, print = PETSC_FALSE;
683:         if (print) PetscCall(PetscSynchronizedPrintf(comm, "\t[%d] edge (%d %d), %d %d %d\n", rank, (int)gid0, (int)gid1, lid_matched[lid0], (ghost1_idx != -1 && lghost_matched[ghost1_idx]), (ghost1_idx == -1 && lid_matched[lid1])));
684:         /* skip if either vertex is matched already */
685:         if (lid_matched[lid0] || (ghost1_idx != -1 && lghost_matched[ghost1_idx]) || (ghost1_idx == -1 && lid_matched[lid1])) continue;

687:         nactive_edges++;
688:         PetscCheck(PetscRealPart(lid_max_ew[lid0]) >= e->weight - MY_MEPS, PETSC_COMM_SELF, PETSC_ERR_SUP, "edge weight %e > max %e", (double)e->weight, (double)PetscRealPart(lid_max_ew[lid0]));
689:         if (print) PetscCall(PetscSynchronizedPrintf(comm, "\t[%d] active edge (%d %d), diff0 = %10.4e\n", rank, (int)gid0, (int)gid1, (double)(PetscRealPart(lid_max_ew[lid0]) - (double)e->weight)));
690:         // smaller edge, lid_max_ew get updated - e0
691:         if (PetscRealPart(lid_max_ew[lid0]) > e->weight + MY_MEPS) {
692:           if (print)
693:             PetscCall(PetscSynchronizedPrintf(comm, "\t\t[%d] 1) e0 SKIPPING small edge %20.14e edge (%d %d), diff = %10.4e to proc %d. max = %20.14e, w = %20.14e\n", rank, (double)e->weight, (int)gid0, (int)gid1, (double)(PetscRealPart(lid_max_ew[lid0]) - e->weight), ghost1_idx != -1 ? (int)lghost_pe[ghost1_idx] : rank, (double)PetscRealPart(lid_max_ew[lid0]),
694:                                               (double)e->weight));
695:           continue; // we are basically filter edges here
696:         }
697:         // e1 - local
698:         if (ghost1_idx == -1) {
699:           if (PetscRealPart(lid_max_ew[lid1]) > e->weight + MY_MEPS) {
700:             if (print)
701:               PetscCall(PetscSynchronizedPrintf(comm, "\t\t%c[%d] 2) e1 SKIPPING small local edge %20.14e edge (%d %d), diff = %10.4e\n", ghost1_idx != -1 ? '\t' : ' ', rank, (double)e->weight, (int)gid0, (int)gid1, (double)(PetscRealPart(lid_max_ew[lid1]) - e->weight)));
702:             continue; // we are basically filter edges here
703:           }
704:         } else { // e1 - ghost
705:           /* see if edge might get matched on other proc */
706:           PetscReal g_max_e1 = PetscRealPart(lghost_max_ew[ghost1_idx]);
707:           if (print)
708:             PetscCall(PetscSynchronizedPrintf(comm, "\t\t\t[%d] CHECK GHOST e1, edge (%d %d), E0 MAX EDGE WEIGHT = %10.4e, EDGE WEIGHT = %10.4e, diff1 = %10.4e, ghost proc %d with max pe %d on e0 and %d on e1\n", rank, (int)gid0, (int)gid1, (double)PetscRealPart(lid_max_ew[lid0]),
709:                                               (double)e->weight, (double)(PetscRealPart(lghost_max_ew[ghost1_idx]) - e->weight), (int)lghost_pe[ghost1_idx], lid_max_pe[lid0], lghost_max_pe[ghost1_idx]));
710:           if (g_max_e1 > e->weight + MY_MEPS) {
711:             /* PetscCall(PetscSynchronizedPrintf(comm,"\t\t\t\t[%d] 3) ghost e1 SKIPPING small edge (%d %d), diff = %10.4e from proc %d with max pe %d. max = %20.14e, w = %20.14e\n", rank, (int)gid0, (int)gid1, g_max_e1 - e->weight, (int)lghost_pe[ghost1_idx], lghost_max_pe[ghost1_idx], g_max_e1, e->weight )); */
712:             continue;
713:           } else if (g_max_e1 >= e->weight - MY_MEPS && lghost_pe[ghost1_idx] > rank) { // is 'lghost_max_pe[ghost1_idx] > rank' needed?
714:             /* check for max_ea == to this edge and larger processor that will deal with this */
715:             if (print)
716:               PetscCall(PetscSynchronizedPrintf(comm, "\t\t\t[%d] ghost e1 SKIPPING EQUAL (%d %d), diff = %10.4e from larger proc %d with max pe %d. max = %20.14e, w = %20.14e\n", rank, (int)gid0, (int)gid1,
717:                                                 (double)(PetscRealPart(lid_max_ew[lid0]) - (double)e->weight), (int)lghost_pe[ghost1_idx], (int)lghost_max_pe[ghost1_idx], (double)g_max_e1, (double)e->weight));
718:             isOK = PETSC_FALSE; // this guy could delete me
719:             continue;
720:           } else {
721:             /* PetscCall(PetscSynchronizedPrintf(comm,"\t[%d] Edge (%d %d) passes gid0 tests, diff = %10.4e from proc %d with max pe %d. max = %20.14e, w = %20.14e\n", rank, (int)gid0, (int)gid1, g_max_e1 - e->weight, (int)lghost_pe[ghost1_idx], lghost_max_pe[ghost1_idx], g_max_e1, e->weight )); */
722:           }
723:         }
724:         /* check ghost for v0 */
725:         if (isOK) {
726:           PetscReal max_e, ew;
727:           if ((ix = lid_cprowID[lid0]) != -1) { /* if I have any ghost neighbors */
728:             int n;
729:             ii = matB->compressedrow.i;
730:             n  = ii[ix + 1] - ii[ix];
731:             ap = matB->a + ii[ix];
732:             aj = matB->j + ii[ix];
733:             for (int jj = 0; jj < n && isOK; jj++) {
734:               PetscInt lidj = aj[jj];
735:               if (lghost_matched[lidj]) continue;
736:               ew = PetscRealPart(ap[jj]);
737:               if (ew <= threshold) continue;
738:               max_e = PetscRealPart(lghost_max_ew[lidj]);
739:               /* check for max_e == to this edge and larger processor that will deal with this */
740:               if (ew >= PetscRealPart(lid_max_ew[lid0]) - MY_MEPS && lghost_max_pe[lidj] > rank) isOK = PETSC_FALSE;
741:               PetscCheck(ew <= max_e + MY_MEPS, PETSC_COMM_SELF, PETSC_ERR_SUP, "edge weight %e > max %e. ncols = %d, gid0 = %d, gid1 = %d", (double)PetscRealPart(ew), (double)PetscRealPart(max_e), (int)n, (int)(lid0 + my0), (int)lghost_gid[lidj]);
742:               if (print)
743:                 PetscCall(PetscSynchronizedPrintf(comm, "\t\t\t\t[%d] e0: looked at ghost adj (%d %d), diff = %10.4e, ghost on proc %d (max %d). isOK = %d, %d %d %d; ew = %e, lid0 max ew = %e, diff = %e, eps = %e\n", rank, (int)gid0, (int)lghost_gid[lidj], (double)(max_e - ew), lghost_pe[lidj], lghost_max_pe[lidj], isOK, (double)(ew) >= (double)(max_e - MY_MEPS), ew >= PetscRealPart(lid_max_ew[lid0]) - MY_MEPS, lghost_pe[lidj] > rank, (double)ew, (double)PetscRealPart(lid_max_ew[lid0]), (double)(ew - PetscRealPart(lid_max_ew[lid0])), (double)MY_MEPS));
744:             }
745:             if (!isOK && print) PetscCall(PetscSynchronizedPrintf(comm, "\t\t[%d] skip edge (%d %d) from ghost inspection\n", rank, (int)gid0, (int)gid1));
746:           }
747:           /* check local v1 */
748:           if (ghost1_idx == -1) {
749:             if ((ix = lid_cprowID[lid1]) != -1) { /* if I have any ghost neighbors */
750:               int n;
751:               ii = matB->compressedrow.i;
752:               n  = ii[ix + 1] - ii[ix];
753:               ap = matB->a + ii[ix];
754:               aj = matB->j + ii[ix];
755:               for (int jj = 0; jj < n && isOK; jj++) {
756:                 PetscInt lidj = aj[jj];
757:                 if (lghost_matched[lidj]) continue;
758:                 ew = PetscRealPart(ap[jj]);
759:                 if (ew <= threshold) continue;
760:                 max_e = PetscRealPart(lghost_max_ew[lidj]);
761:                 /* check for max_e == to this edge and larger processor that will deal with this */
762:                 if (ew >= PetscRealPart(lid_max_ew[lid1]) - MY_MEPS && lghost_max_pe[lidj] > rank) isOK = PETSC_FALSE;
763:                 PetscCheck(ew <= max_e + MY_MEPS, PETSC_COMM_SELF, PETSC_ERR_SUP, "edge weight %e > max %e", (double)PetscRealPart(ew), (double)PetscRealPart(max_e));
764:                 if (print)
765:                   PetscCall(PetscSynchronizedPrintf(comm, "\t\t\t\t\t[%d] e1: looked at ghost adj (%d %d), diff = %10.4e, ghost on proc %d (max %d)\n", rank, (int)gid0, (int)lghost_gid[lidj], (double)(max_e - ew), lghost_pe[lidj], lghost_max_pe[lidj]));
766:               }
767:             }
768:             if (!isOK && print) PetscCall(PetscSynchronizedPrintf(comm, "\t\t[%d] skip edge (%d %d) from ghost inspection\n", rank, (int)gid0, (int)gid1));
769:           }
770:         }
771:         PetscReal e1_max_w = (ghost1_idx == -1 ? PetscRealPart(lid_max_ew[lid0]) : PetscRealPart(lghost_max_ew[ghost1_idx]));
772:         if (print)
773:           PetscCall(PetscSynchronizedPrintf(comm, "\t[%d] MATCHING (%d %d) e1 max weight = %e, e1 weight diff %e, %s. isOK = %d\n", rank, (int)gid0, (int)gid1, (double)e1_max_w, (double)(e1_max_w - e->weight), ghost1_idx == -1 ? "local" : "ghost", isOK));
774:         /* do it */
775:         if (isOK) {
776:           if (ghost1_idx == -1) {
777:             PetscCheck(!lid_matched[lid1], PETSC_COMM_SELF, PETSC_ERR_SUP, "local %d is matched", (int)gid1);
778:             lid_matched[lid1] = PETSC_TRUE;                       /* keep track of what we've done this round */
779:             PetscCall(PetscCDMoveAppend(agg_llists, lid0, lid1)); // takes lid1's list and appends to lid0's
780:           } else {
781:             /* add gid1 to list of ghost deleted by me -- I need their children */
782:             PetscMPIInt proc = lghost_pe[ghost1_idx];
783:             PetscCheck(!lghost_matched[ghost1_idx], PETSC_COMM_SELF, PETSC_ERR_SUP, "ghost %d is matched", (int)lghost_gid[ghost1_idx]);
784:             lghost_matched[ghost1_idx] = PETSC_TRUE;
785:             PetscCall(PetscCDAppendID(ghost_deleted_list, proc, ghost1_idx)); /* cache to send messages */
786:             PetscCall(PetscCDAppendID(ghost_deleted_list, proc, lid0));
787:           }
788:           lid_matched[lid0] = PETSC_TRUE; /* keep track of what we've done this round */
789:           /* set projection */
790:           PetscCall(MatSetValues(P, 1, &gid0, 1, &gid0, &one, INSERT_VALUES));
791:           PetscCall(MatSetValues(P, 1, &gid1, 1, &gid0, &one, INSERT_VALUES));
792:           //PetscCall(PetscPrintf(comm,"\t %d.%d) match active EDGE %d : (%d %d)\n",iter,sub_it, (int)nactive_edges, (int)gid0, (int)gid1));
793:         } /* matched */
794:       } /* edge loop */
795:       PetscCall(PetscSynchronizedFlush(comm, PETSC_STDOUT));
796:       if (isMPI) PetscCall(VecRestoreArrayRead(ghostMaxEdge, &lghost_max_ew));
797:       PetscCall(VecRestoreArrayRead(locMaxEdge, &lid_max_ew));
798:       // count active for test, latter, update deleted ghosts
799:       n_act_n[0] = nactive_edges;
800:       if (ghost_deleted_list) PetscCall(PetscCDCount(ghost_deleted_list, &n_act_n[2]));
801:       else n_act_n[2] = 0;
802:       PetscCall(PetscCDCount(agg_llists, &n_act_n[1]));
803:       PetscCall(MPIU_Allreduce(n_act_n, gn_act_n, 3, MPIU_INT, MPI_SUM, comm));
804:       PetscCall(PetscInfo(info_is, "[%d] %d.%d) nactive edges=%" PetscInt_FMT ", ncomm_procs=%d, nEdges=%d, %" PetscInt_FMT " deleted ghosts, N=%" PetscInt_FMT "\n", rank, iter, sub_it, gn_act_n[0], (int)ncomm_procs, (int)nEdges, gn_act_n[2], gn_act_n[1]));
805:       /* deal with deleted ghost */
806:       if (isMPI) {
807:         PetscCDIntNd *pos;
808:         PetscInt     *sbuffs1[REQ_BF_SIZE], ndel;
809:         PetscInt     *sbuffs2[REQ_BF_SIZE];
810:         MPI_Status    status;

812:         /* send deleted ghosts */
813:         for (int proc_idx = 0; proc_idx < ncomm_procs; proc_idx++) {
814:           const PetscMPIInt proc = comm_procs[proc_idx];
815:           PetscInt         *sbuff, *pt, scount;
816:           MPI_Request      *request;
817:           /* count ghosts */
818:           PetscCall(PetscCDCountAt(ghost_deleted_list, proc, &ndel));
819:           ndel /= 2; // two entries for each proc
820:           scount = 2 + 2 * ndel;
821:           PetscCall(PetscMalloc1(scount + request_size, &sbuff));
822:           /* save requests */
823:           sbuffs1[proc_idx] = sbuff;
824:           request           = (MPI_Request *)sbuff;
825:           sbuff = pt = (PetscInt *)(sbuff + request_size);
826:           /* write [ndel, proc, n*[gid1,gid0] */
827:           *pt++ = ndel; // number of deleted to send
828:           *pt++ = rank; // proc (not used)
829:           PetscCall(PetscCDGetHeadPos(ghost_deleted_list, proc, &pos));
830:           while (pos) {
831:             PetscInt lid0, ghost_idx, gid1;
832:             PetscCall(PetscCDIntNdGetID(pos, &ghost_idx));
833:             gid1 = lghost_gid[ghost_idx];
834:             PetscCall(PetscCDGetNextPos(ghost_deleted_list, proc, &pos));
835:             PetscCall(PetscCDIntNdGetID(pos, &lid0));
836:             PetscCall(PetscCDGetNextPos(ghost_deleted_list, proc, &pos));
837:             *pt++ = gid1;
838:             *pt++ = lid0 + my0; // gid0
839:           }
840:           PetscCheck(pt - sbuff == scount, PETSC_COMM_SELF, PETSC_ERR_SUP, "sbuff-pt != scount: %d", (int)(pt - sbuff));
841:           /* MPI_Isend:  tag1 [ndel, proc, n*[gid1,gid0] ] */
842:           PetscCallMPI(MPI_Isend(sbuff, scount, MPIU_INT, proc, tag1, comm, request));
843:           PetscCall(PetscCDRemoveAllAt(ghost_deleted_list, proc)); // done with this list
844:         }
845:         /* receive deleted, send back partial aggregates, clear lists */
846:         for (int proc_idx = 0; proc_idx < ncomm_procs; proc_idx++) {
847:           PetscCallMPI(MPI_Probe(comm_procs[proc_idx] /* MPI_ANY_SOURCE */, tag1, comm, &status));
848:           {
849:             PetscInt         *pt, *pt2, *pt3, *sbuff, tmp;
850:             MPI_Request      *request;
851:             int               rcount, scount, ndel;
852:             const PetscMPIInt proc = status.MPI_SOURCE;
853:             PetscCallMPI(MPI_Get_count(&status, MPIU_INT, &rcount));
854:             if (rcount > rbuff_sz) {
855:               if (rbuff) PetscCall(PetscFree(rbuff));
856:               PetscCall(PetscMalloc1(rcount, &rbuff));
857:               rbuff_sz = rcount;
858:             }
859:             /* MPI_Recv: tag1 [ndel, proc, ndel*[gid1,gid0] ] */
860:             PetscCallMPI(MPI_Recv(rbuff, rcount, MPIU_INT, proc, tag1, comm, &status));
861:             /* read and count sends *[lid0, n, n*[gid] ] */
862:             pt     = rbuff;
863:             scount = 0;
864:             ndel   = *pt++; // number of deleted to recv
865:             tmp    = *pt++; // proc (not used)
866:             while (ndel--) {
867:               PetscInt gid1 = *pt++, lid1 = gid1 - my0;
868:               int      gh_gid0 = *pt++; // gid on other proc (not used here to count)
869:               PetscCheck(lid1 >= 0 && lid1 < nloc, PETSC_COMM_SELF, PETSC_ERR_SUP, "received ghost deleted %d", (int)gid1);
870:               PetscCheck(!lid_matched[lid1], PETSC_COMM_SELF, PETSC_ERR_PLIB, "%d) received matched local gid %" PetscInt_FMT ",%d, with ghost (lid) %" PetscInt_FMT " from proc %d", sub_it, gid1, gh_gid0, tmp, proc);
871:               lid_matched[lid1] = PETSC_TRUE;                    /* keep track of what we've done this round */
872:               PetscCall(PetscCDCountAt(agg_llists, lid1, &tmp)); // n
873:               /* PetscCheck(tmp == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "sending %d (!= 1) size aggregate. gid-0 %d, from %d (gid-1 %d)", (int)tmp, (int) gid, proc, gh_gid0); */
874:               scount += tmp + 2; // lid0, n, n*[gid]
875:             }
876:             PetscCheck((pt - rbuff) == rcount, PETSC_COMM_SELF, PETSC_ERR_SUP, "receive buffer size != num read: %d; rcount: %d", (int)(pt - rbuff), rcount);
877:             /* send tag2: *[gid0, n, n*[gid] ] */
878:             PetscCall(PetscMalloc1(scount + request_size, &sbuff));
879:             sbuffs2[proc_idx] = sbuff; /* cache request */
880:             request           = (MPI_Request *)sbuff;
881:             pt2 = sbuff = sbuff + request_size;
882:             // read again: n, proc, n*[gid1,gid0]
883:             pt   = rbuff;
884:             ndel = *pt++;
885:             tmp  = *pt++; // proc (not used)
886:             while (ndel--) {
887:               PetscInt gid1 = *pt++, lid1 = gid1 - my0, gh_gid0 = *pt++;
888:               /* write [gid0, aggSz, aggSz[gid] ] */
889:               *pt2++ = gh_gid0;
890:               pt3    = pt2++; /* save pointer for later */
891:               PetscCall(PetscCDGetHeadPos(agg_llists, lid1, &pos));
892:               while (pos) {
893:                 PetscInt gid;
894:                 PetscCall(PetscCDIntNdGetID(pos, &gid));
895:                 PetscCall(PetscCDGetNextPos(agg_llists, lid1, &pos));
896:                 *pt2++ = gid;
897:               }
898:               *pt3 = (pt2 - pt3) - 1;
899:               /* clear list */
900:               PetscCall(PetscCDRemoveAllAt(agg_llists, lid1));
901:             }
902:             PetscCheck((pt2 - sbuff) == scount, PETSC_COMM_SELF, PETSC_ERR_SUP, "buffer size != num write: %d %d", (int)(pt2 - sbuff), (int)scount);
903:             /* MPI_Isend: requested data tag2 *[lid0, n, n*[gid1] ] */
904:             PetscCallMPI(MPI_Isend(sbuff, scount, MPIU_INT, proc, tag2, comm, request));
905:           }
906:         } // proc_idx
907:         /* receive tag2 *[gid0, n, n*[gid] ] */
908:         for (int proc_idx = 0; proc_idx < ncomm_procs; proc_idx++) {
909:           PetscMPIInt proc;
910:           PetscInt   *pt;
911:           int         rcount;
912:           PetscCallMPI(MPI_Probe(comm_procs[proc_idx] /* MPI_ANY_SOURCE */, tag2, comm, &status));
913:           PetscCallMPI(MPI_Get_count(&status, MPIU_INT, &rcount));
914:           if (rcount > rbuff_sz) {
915:             if (rbuff) PetscCall(PetscFree(rbuff));
916:             PetscCall(PetscMalloc1(rcount, &rbuff));
917:             rbuff_sz = rcount;
918:           }
919:           proc = status.MPI_SOURCE;
920:           /* MPI_Recv:  tag1 [n, proc, n*[gid1,lid0] ] */
921:           PetscCallMPI(MPI_Recv(rbuff, rcount, MPIU_INT, proc, tag2, comm, &status));
922:           pt = rbuff;
923:           while (pt - rbuff < rcount) {
924:             PetscInt gid0 = *pt++, n = *pt++;
925:             while (n--) {
926:               PetscInt gid1 = *pt++;
927:               PetscCall(PetscCDAppendID(agg_llists, gid0 - my0, gid1));
928:             }
929:           }
930:           PetscCheck((pt - rbuff) == rcount, PETSC_COMM_SELF, PETSC_ERR_SUP, "recv buffer size != num read: %d %d", (int)(pt - rbuff), (int)rcount);
931:         }
932:         /* wait for tag1 isends */
933:         for (int proc_idx = 0; proc_idx < ncomm_procs; proc_idx++) {
934:           MPI_Request *request;
935:           request = (MPI_Request *)sbuffs1[proc_idx];
936:           PetscCallMPI(MPI_Wait(request, &status));
937:           PetscCall(PetscFree(sbuffs1[proc_idx]));
938:         }
939:         /* wait for tag2 isends */
940:         for (int proc_idx = 0; proc_idx < ncomm_procs; proc_idx++) {
941:           MPI_Request *request = (MPI_Request *)sbuffs2[proc_idx];
942:           PetscCallMPI(MPI_Wait(request, &status));
943:           PetscCall(PetscFree(sbuffs2[proc_idx]));
944:         }
945:       } /* MPI */
946:       /* set 'lghost_matched' - use locMaxEdge, ghostMaxEdge (recomputed next) */
947:       if (isMPI) {
948:         const PetscScalar *sbuff;
949:         for (PetscInt kk = 0, gid = my0; kk < nloc; kk++, gid++) {
950:           PetscScalar vval = lid_matched[kk] ? 1.0 : 0.0;
951:           PetscCall(VecSetValues(locMaxEdge, 1, &gid, &vval, INSERT_VALUES)); /* set with GID */
952:         }
953:         PetscCall(VecAssemblyBegin(locMaxEdge));
954:         PetscCall(VecAssemblyEnd(locMaxEdge));
955:         PetscCall(VecScatterBegin(mpimat->Mvctx, locMaxEdge, ghostMaxEdge, INSERT_VALUES, SCATTER_FORWARD));
956:         PetscCall(VecScatterEnd(mpimat->Mvctx, locMaxEdge, ghostMaxEdge, INSERT_VALUES, SCATTER_FORWARD));
957:         PetscCall(VecGetArrayRead(ghostMaxEdge, &sbuff));
958:         for (int kk = 0; kk < num_ghosts; kk++) { lghost_matched[kk] = (PetscBool)(PetscRealPart(sbuff[kk]) != 0.0); }
959:         PetscCall(VecRestoreArrayRead(ghostMaxEdge, &sbuff));
960:       }
961:       /* compute 'locMaxEdge' inside sub iteration b/c max weight can drop as neighbors are matched */
962:       for (PetscInt kk = 0, gid = my0; kk < nloc; kk++, gid++) {
963:         PetscReal      max_e = 0., tt;
964:         PetscScalar    vval;
965:         const PetscInt lid    = kk;
966:         int            max_pe = rank, pe, n;
967:         ii                    = matA->i;
968:         n                     = ii[lid + 1] - ii[lid];
969:         aj                    = PetscSafePointerPlusOffset(matA->j, ii[lid]);
970:         ap                    = PetscSafePointerPlusOffset(matA->a, ii[lid]);
971:         for (int jj = 0; jj < n; jj++) {
972:           PetscInt lidj = aj[jj];
973:           if (lid_matched[lidj]) continue; /* this is new - can change local max */
974:           if (lidj != lid && PetscRealPart(ap[jj]) > max_e) max_e = PetscRealPart(ap[jj]);
975:         }
976:         if (lid_cprowID && (ix = lid_cprowID[lid]) != -1) { /* if I have any ghost neighbors */
977:           ii = matB->compressedrow.i;
978:           n  = ii[ix + 1] - ii[ix];
979:           ap = matB->a + ii[ix];
980:           aj = matB->j + ii[ix];
981:           for (int jj = 0; jj < n; jj++) {
982:             PetscInt lidj = aj[jj];
983:             if (lghost_matched[lidj]) continue;
984:             if ((tt = PetscRealPart(ap[jj])) > max_e) max_e = tt;
985:           }
986:         }
987:         vval = (PetscScalar)max_e;
988:         PetscCall(VecSetValues(locMaxEdge, 1, &gid, &vval, INSERT_VALUES)); /* set with GID */
989:         // max PE with max edge
990:         if (lid_cprowID && (ix = lid_cprowID[lid]) != -1) { /* if I have any ghost neighbors */
991:           ii = matB->compressedrow.i;
992:           n  = ii[ix + 1] - ii[ix];
993:           ap = matB->a + ii[ix];
994:           aj = matB->j + ii[ix];
995:           for (int jj = 0; jj < n; jj++) {
996:             PetscInt lidj = aj[jj];
997:             if (lghost_matched[lidj]) continue;
998:             if ((pe = lghost_pe[aj[jj]]) > max_pe && PetscRealPart(ap[jj]) >= max_e - MY_MEPS) { max_pe = pe; }
999:           }
1000:         }
1001:         vval = (PetscScalar)max_pe;
1002:         PetscCall(VecSetValues(locMaxPE, 1, &gid, &vval, INSERT_VALUES));
1003:       }
1004:       PetscCall(VecAssemblyBegin(locMaxEdge));
1005:       PetscCall(VecAssemblyEnd(locMaxEdge));
1006:       PetscCall(VecAssemblyBegin(locMaxPE));
1007:       PetscCall(VecAssemblyEnd(locMaxPE));
1008:       /* compute 'lghost_max_ew' and 'lghost_max_pe' to get ready for next iteration*/
1009:       if (isMPI) {
1010:         const PetscScalar *buf;
1011:         PetscCall(VecScatterBegin(mpimat->Mvctx, locMaxEdge, ghostMaxEdge, INSERT_VALUES, SCATTER_FORWARD));
1012:         PetscCall(VecScatterEnd(mpimat->Mvctx, locMaxEdge, ghostMaxEdge, INSERT_VALUES, SCATTER_FORWARD));
1013:         PetscCall(VecScatterBegin(mpimat->Mvctx, locMaxPE, ghostMaxPE, INSERT_VALUES, SCATTER_FORWARD));
1014:         PetscCall(VecScatterEnd(mpimat->Mvctx, locMaxPE, ghostMaxPE, INSERT_VALUES, SCATTER_FORWARD));
1015:         PetscCall(VecGetArrayRead(ghostMaxPE, &buf));
1016:         for (int kk = 0; kk < num_ghosts; kk++) {
1017:           lghost_max_pe[kk] = (PetscMPIInt)PetscRealPart(buf[kk]); // the MAX proc of the ghost now
1018:         }
1019:         PetscCall(VecRestoreArrayRead(ghostMaxPE, &buf));
1020:       }
1021:       // if no active edges, stop
1022:       if (gn_act_n[0] < 1) break;
1023:       // inc and check (self stopping iteration
1024:       PetscCheck(old_num_edge != gn_act_n[0], PETSC_COMM_SELF, PETSC_ERR_SUP, "HEM stalled step %d/%d", sub_it + 1, n_sub_its);
1025:       sub_it++;
1026:       PetscCheck(sub_it < n_sub_its, PETSC_COMM_SELF, PETSC_ERR_SUP, "failed to finish HEM step %d/%d", sub_it + 1, n_sub_its);
1027:       old_num_edge = gn_act_n[0];
1028:     } /* sub_it loop */
1029:     /* clean up iteration */
1030:     PetscCall(PetscFree(Edges));
1031:     if (mpimat) { // can be hoisted
1032:       PetscCall(VecRestoreArrayRead(ghostMaxEdge, &lghost_max_ew));
1033:       PetscCall(VecDestroy(&ghostMaxEdge));
1034:       PetscCall(VecDestroy(&ghostMaxPE));
1035:       PetscCall(PetscFree(lghost_pe));
1036:       PetscCall(PetscFree(lghost_gid));
1037:       PetscCall(PetscFree(lghost_matched));
1038:       PetscCall(PetscFree(lghost_max_pe));
1039:     }
1040:     PetscCall(VecDestroy(&locMaxEdge));
1041:     PetscCall(VecDestroy(&locMaxPE));
1042:     /* create next graph */
1043:     {
1044:       Vec diag;
1045:       /* add identity for unmatched vertices so they stay alive */
1046:       for (PetscInt kk = 0, gid1, gid = my0; kk < nloc; kk++, gid++) {
1047:         if (!lid_matched[kk]) {
1048:           const PetscInt lid = kk;
1049:           PetscCDIntNd  *pos;
1050:           PetscCall(PetscCDGetHeadPos(agg_llists, lid, &pos));
1051:           PetscCheck(pos, PETSC_COMM_SELF, PETSC_ERR_PLIB, "empty list in singleton: %d", (int)gid);
1052:           PetscCall(PetscCDIntNdGetID(pos, &gid1));
1053:           PetscCheck(gid1 == gid, PETSC_COMM_SELF, PETSC_ERR_PLIB, "first in list (%d) in singleton not %d", (int)gid1, (int)gid);
1054:           PetscCall(MatSetValues(P, 1, &gid, 1, &gid, &one, INSERT_VALUES));
1055:         }
1056:       }
1057:       PetscCall(MatAssemblyBegin(P, MAT_FINAL_ASSEMBLY));
1058:       PetscCall(MatAssemblyEnd(P, MAT_FINAL_ASSEMBLY));

1060:       /* project to make new graph with collapsed edges */
1061:       PetscCall(MatPtAP(cMat, P, MAT_INITIAL_MATRIX, 1.0, &tMat));
1062:       PetscCall(MatDestroy(&P));
1063:       PetscCall(MatDestroy(&cMat));
1064:       cMat = tMat;
1065:       PetscCall(MatCreateVecs(cMat, &diag, NULL));
1066:       PetscCall(MatGetDiagonal(cMat, diag));
1067:       PetscCall(VecReciprocal(diag));
1068:       PetscCall(VecSqrtAbs(diag));
1069:       PetscCall(MatDiagonalScale(cMat, diag, diag));
1070:       PetscCall(VecDestroy(&diag));
1071:     }
1072:   } /* coarsen iterator */

1074:   /* make fake matrix with Mat->B only for smoothed agg QR. Need this if we make an aux graph (ie, PtAP) with k > 1 */
1075:   if (size > 1) {
1076:     Mat           mat;
1077:     PetscCDIntNd *pos;
1078:     PetscInt      NN, MM, jj = 0, mxsz = 0;

1080:     for (int kk = 0; kk < nloc; kk++) {
1081:       PetscCall(PetscCDCountAt(agg_llists, kk, &jj));
1082:       if (jj > mxsz) mxsz = jj;
1083:     }
1084:     PetscCall(MatGetSize(a_Gmat, &MM, &NN));
1085:     if (mxsz > MM - nloc) mxsz = MM - nloc;
1086:     /* matrix of ghost adj for square graph */
1087:     PetscCall(MatCreateAIJ(comm, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE, 0, NULL, mxsz, NULL, &mat));
1088:     for (PetscInt lid = 0, gid = my0; lid < nloc; lid++, gid++) {
1089:       PetscCall(PetscCDGetHeadPos(agg_llists, lid, &pos));
1090:       while (pos) {
1091:         PetscInt gid1;
1092:         PetscCall(PetscCDIntNdGetID(pos, &gid1));
1093:         PetscCall(PetscCDGetNextPos(agg_llists, lid, &pos));
1094:         if (gid1 < my0 || gid1 >= my0 + nloc) PetscCall(MatSetValues(mat, 1, &gid, 1, &gid1, &one, ADD_VALUES));
1095:       }
1096:     }
1097:     PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
1098:     PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
1099:     PetscCall(PetscCDSetMat(agg_llists, mat));
1100:     PetscCall(PetscCDDestroy(ghost_deleted_list));
1101:     if (rbuff_sz) PetscCall(PetscFree(rbuff)); // always true
1102:   }
1103:   // move BCs into some node
1104:   if (bc_list) {
1105:     PetscCDIntNd *pos;
1106:     PetscCall(PetscCDGetHeadPos(bc_list, 0, &pos));
1107:     while (pos) {
1108:       PetscInt gid1;
1109:       PetscCall(PetscCDIntNdGetID(pos, &gid1));
1110:       PetscCall(PetscCDGetNextPos(bc_list, 0, &pos));
1111:       PetscCall(PetscCDAppendID(agg_llists, bc_agg, gid1));
1112:     }
1113:     PetscCall(PetscCDRemoveAllAt(bc_list, 0));
1114:     PetscCall(PetscCDDestroy(bc_list));
1115:   }
1116:   {
1117:     // check sizes -- all vertices must get in graph
1118:     PetscInt sz, globalsz, MM;
1119:     PetscCall(MatGetSize(a_Gmat, &MM, NULL));
1120:     PetscCall(PetscCDCount(agg_llists, &sz));
1121:     PetscCall(MPIU_Allreduce(&sz, &globalsz, 1, MPIU_INT, MPI_SUM, comm));
1122:     PetscCheck(MM == globalsz, comm, PETSC_ERR_SUP, "lost %d equations ?", (int)(MM - globalsz));
1123:   }
1124:   // cleanup
1125:   PetscCall(MatDestroy(&cMat));
1126:   PetscCall(PetscFree(lid_cprowID));
1127:   PetscCall(PetscFree(lid_max_pe));
1128:   PetscCall(PetscFree(lid_matched));
1129:   PetscCall(ISDestroy(&info_is));
1130:   PetscFunctionReturn(PETSC_SUCCESS);
1131: }

1133: /*
1134:    HEM coarsen, simple greedy.
1135: */
1136: static PetscErrorCode MatCoarsenApply_HEM(MatCoarsen coarse)
1137: {
1138:   Mat mat = coarse->graph;

1140:   PetscFunctionBegin;
1141:   PetscCall(MatCoarsenApply_HEM_private(mat, coarse->max_it, coarse->threshold, &coarse->agg_lists));
1142:   PetscFunctionReturn(PETSC_SUCCESS);
1143: }

1145: static PetscErrorCode MatCoarsenView_HEM(MatCoarsen coarse, PetscViewer viewer)
1146: {
1147:   PetscMPIInt rank;
1148:   PetscBool   iascii;

1150:   PetscFunctionBegin;
1151:   PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)coarse), &rank));
1152:   PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1153:   if (iascii) {
1154:     PetscCDIntNd *pos, *pos2;
1155:     PetscCall(PetscViewerASCIIPrintf(viewer, "%d matching steps with threshold = %g\n", (int)coarse->max_it, (double)coarse->threshold));
1156:     PetscCall(PetscViewerASCIIPushSynchronized(viewer));
1157:     for (PetscInt kk = 0; kk < coarse->agg_lists->size; kk++) {
1158:       PetscCall(PetscCDGetHeadPos(coarse->agg_lists, kk, &pos));
1159:       if ((pos2 = pos)) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "selected local %d: ", (int)kk));
1160:       while (pos) {
1161:         PetscInt gid1;
1162:         PetscCall(PetscCDIntNdGetID(pos, &gid1));
1163:         PetscCall(PetscCDGetNextPos(coarse->agg_lists, kk, &pos));
1164:         PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " %d ", (int)gid1));
1165:       }
1166:       if (pos2) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "\n"));
1167:     }
1168:     PetscCall(PetscViewerFlush(viewer));
1169:     PetscCall(PetscViewerASCIIPopSynchronized(viewer));
1170:   }
1171:   PetscFunctionReturn(PETSC_SUCCESS);
1172: }

1174: /*
1175:   MatCoarsenCreate_HEM - A coarsener that uses HEM a simple greedy coarsener
1176: */
1177: PETSC_EXTERN PetscErrorCode MatCoarsenCreate_HEM(MatCoarsen coarse)
1178: {
1179:   PetscFunctionBegin;
1180:   coarse->ops->apply = MatCoarsenApply_HEM;
1181:   coarse->ops->view  = MatCoarsenView_HEM;
1182:   coarse->max_it     = 4;
1183:   PetscFunctionReturn(PETSC_SUCCESS);
1184: }