Actual source code: hem.c
1: #include <petsc/private/matimpl.h>
2: #include <../src/mat/impls/aij/seq/aij.h>
3: #include <../src/mat/impls/aij/mpi/mpiaij.h>
4: #include <petscdm.h>
6: /* linked list methods
7: *
8: * PetscCDCreate
9: */
10: PetscErrorCode PetscCDCreate(PetscInt a_size, PetscCoarsenData **a_out)
11: {
12: PetscCoarsenData *ail;
14: PetscFunctionBegin;
15: /* allocate pool, partially */
16: PetscCall(PetscNew(&ail));
17: *a_out = ail;
18: ail->pool_list.next = NULL;
19: ail->pool_list.array = NULL;
20: ail->chk_sz = 0;
21: /* allocate array */
22: ail->size = a_size;
23: PetscCall(PetscCalloc1(a_size, &ail->array));
24: ail->extra_nodes = NULL;
25: ail->mat = NULL;
26: PetscFunctionReturn(PETSC_SUCCESS);
27: }
29: /* PetscCDDestroy
30: */
31: PetscErrorCode PetscCDDestroy(PetscCoarsenData *ail)
32: {
33: PetscCDArrNd *n = &ail->pool_list;
35: PetscFunctionBegin;
36: n = n->next;
37: while (n) {
38: PetscCDArrNd *lstn = n;
39: n = n->next;
40: PetscCall(PetscFree(lstn));
41: }
42: if (ail->pool_list.array) PetscCall(PetscFree(ail->pool_list.array));
43: PetscCall(PetscFree(ail->array));
44: if (ail->mat) PetscCall(MatDestroy(&ail->mat));
45: /* delete this (+agg+pool array) */
46: PetscCall(PetscFree(ail));
47: PetscFunctionReturn(PETSC_SUCCESS);
48: }
50: /* PetscCDSetChunkSize
51: */
52: PetscErrorCode PetscCDSetChunkSize(PetscCoarsenData *ail, PetscInt a_sz)
53: {
54: PetscFunctionBegin;
55: ail->chk_sz = a_sz;
56: PetscFunctionReturn(PETSC_SUCCESS);
57: }
59: /* PetscCDGetNewNode
60: */
61: static PetscErrorCode PetscCDGetNewNode(PetscCoarsenData *ail, PetscCDIntNd **a_out, PetscInt a_id)
62: {
63: PetscFunctionBegin;
64: *a_out = NULL; /* squelch -Wmaybe-uninitialized */
65: if (ail->extra_nodes) {
66: PetscCDIntNd *node = ail->extra_nodes;
67: ail->extra_nodes = node->next;
68: node->gid = a_id;
69: node->next = NULL;
70: *a_out = node;
71: } else {
72: if (!ail->pool_list.array) {
73: if (!ail->chk_sz) ail->chk_sz = 10; /* use a chuck size of ail->size? */
74: PetscCall(PetscMalloc1(ail->chk_sz, &ail->pool_list.array));
75: ail->new_node = ail->pool_list.array;
76: ail->new_left = ail->chk_sz;
77: ail->new_node->next = NULL;
78: } else if (!ail->new_left) {
79: PetscCDArrNd *node;
80: PetscCall(PetscMalloc(ail->chk_sz * sizeof(PetscCDIntNd) + sizeof(PetscCDArrNd), &node));
81: node->array = (PetscCDIntNd *)(node + 1);
82: node->next = ail->pool_list.next;
83: ail->pool_list.next = node;
84: ail->new_left = ail->chk_sz;
85: ail->new_node = node->array;
86: }
87: ail->new_node->gid = a_id;
88: ail->new_node->next = NULL;
89: *a_out = ail->new_node++;
90: ail->new_left--;
91: }
92: PetscFunctionReturn(PETSC_SUCCESS);
93: }
95: /* PetscCDIntNdSetID
96: */
97: PetscErrorCode PetscCDIntNdSetID(PetscCDIntNd *a_this, PetscInt a_id)
98: {
99: PetscFunctionBegin;
100: a_this->gid = a_id;
101: PetscFunctionReturn(PETSC_SUCCESS);
102: }
104: /* PetscCDIntNdGetID
105: */
106: PetscErrorCode PetscCDIntNdGetID(const PetscCDIntNd *a_this, PetscInt *a_gid)
107: {
108: PetscFunctionBegin;
109: *a_gid = a_this->gid;
110: PetscFunctionReturn(PETSC_SUCCESS);
111: }
113: /* PetscCDGetHeadPos
114: */
115: PetscErrorCode PetscCDGetHeadPos(const PetscCoarsenData *ail, PetscInt a_idx, PetscCDIntNd **pos)
116: {
117: PetscFunctionBegin;
118: PetscCheck(a_idx < ail->size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "a_idx >= ail->size: a_idx=%" PetscInt_FMT ".", a_idx);
119: *pos = ail->array[a_idx];
120: PetscFunctionReturn(PETSC_SUCCESS);
121: }
123: /* PetscCDGetNextPos
124: */
125: PetscErrorCode PetscCDGetNextPos(const PetscCoarsenData *ail, PetscInt l_idx, PetscCDIntNd **pos)
126: {
127: PetscFunctionBegin;
128: PetscCheck(*pos, PETSC_COMM_SELF, PETSC_ERR_PLIB, "NULL input position.");
129: *pos = (*pos)->next;
130: PetscFunctionReturn(PETSC_SUCCESS);
131: }
133: /* PetscCDAppendID
134: */
135: PetscErrorCode PetscCDAppendID(PetscCoarsenData *ail, PetscInt a_idx, PetscInt a_id)
136: {
137: PetscCDIntNd *n, *n2;
139: PetscFunctionBegin;
140: PetscCall(PetscCDGetNewNode(ail, &n, a_id));
141: PetscCheck(a_idx < ail->size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Index %" PetscInt_FMT " out of range.", a_idx);
142: if (!(n2 = ail->array[a_idx])) ail->array[a_idx] = n;
143: else {
144: do {
145: if (!n2->next) {
146: n2->next = n;
147: PetscCheck(!n->next, PETSC_COMM_SELF, PETSC_ERR_PLIB, "n should not have a next");
148: break;
149: }
150: n2 = n2->next;
151: } while (n2);
152: PetscCheck(n2, PETSC_COMM_SELF, PETSC_ERR_PLIB, "n2 should be non-null");
153: }
154: PetscFunctionReturn(PETSC_SUCCESS);
155: }
157: /* PetscCDAppendNode
158: */
159: PetscErrorCode PetscCDAppendNode(PetscCoarsenData *ail, PetscInt a_idx, PetscCDIntNd *a_n)
160: {
161: PetscCDIntNd *n2;
163: PetscFunctionBegin;
164: PetscCheck(a_idx < ail->size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Index %" PetscInt_FMT " out of range.", a_idx);
165: if (!(n2 = ail->array[a_idx])) ail->array[a_idx] = a_n;
166: else {
167: do {
168: if (!n2->next) {
169: n2->next = a_n;
170: a_n->next = NULL;
171: break;
172: }
173: n2 = n2->next;
174: } while (n2);
175: PetscCheck(n2, PETSC_COMM_SELF, PETSC_ERR_PLIB, "n2 should be non-null");
176: }
177: PetscFunctionReturn(PETSC_SUCCESS);
178: }
180: /* PetscCDRemoveNextNode: a_last->next, this exposes single linked list structure to API (not used)
181: */
182: PetscErrorCode PetscCDRemoveNextNode(PetscCoarsenData *ail, PetscInt a_idx, PetscCDIntNd *a_last)
183: {
184: PetscCDIntNd *del;
186: PetscFunctionBegin;
187: PetscCheck(a_idx < ail->size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Index %" PetscInt_FMT " out of range.", a_idx);
188: PetscCheck(a_last->next, PETSC_COMM_SELF, PETSC_ERR_PLIB, "a_last should have a next");
189: del = a_last->next;
190: a_last->next = del->next;
191: /* del->next = NULL; -- this still used in a iterator so keep it intact -- need to fix this with a double linked list */
192: /* could reuse n2 but PetscCDAppendNode sometimes uses it */
193: PetscFunctionReturn(PETSC_SUCCESS);
194: }
196: /* PetscCDPrint
197: */
198: PetscErrorCode PetscCDPrint(const PetscCoarsenData *ail, PetscInt my0, MPI_Comm comm)
199: {
200: PetscCDIntNd *n, *n2;
201: PetscInt ii;
203: PetscFunctionBegin;
204: for (ii = 0; ii < ail->size; ii++) {
205: n2 = n = ail->array[ii];
206: if (n) PetscCall(PetscSynchronizedPrintf(comm, "list %" PetscInt_FMT ":", ii + my0));
207: while (n) {
208: PetscCall(PetscSynchronizedPrintf(comm, " %" PetscInt_FMT, n->gid));
209: n = n->next;
210: }
211: if (n2) PetscCall(PetscSynchronizedPrintf(comm, "\n"));
212: }
213: PetscCall(PetscSynchronizedFlush(comm, PETSC_STDOUT));
214: PetscFunctionReturn(PETSC_SUCCESS);
215: }
217: /* PetscCDMoveAppend - take list in a_srcidx and appends to destidx
218: */
219: PetscErrorCode PetscCDMoveAppend(PetscCoarsenData *ail, PetscInt a_destidx, PetscInt a_srcidx)
220: {
221: PetscCDIntNd *n;
223: PetscFunctionBegin;
224: PetscCheck(a_srcidx < ail->size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Index %" PetscInt_FMT " out of range.", a_srcidx);
225: PetscCheck(a_destidx < ail->size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Index %" PetscInt_FMT " out of range.", a_destidx);
226: PetscCheck(a_destidx != a_srcidx, PETSC_COMM_SELF, PETSC_ERR_PLIB, "a_destidx==a_srcidx %" PetscInt_FMT ".", a_destidx);
227: n = ail->array[a_destidx];
228: if (!n) ail->array[a_destidx] = ail->array[a_srcidx];
229: else {
230: do {
231: if (!n->next) {
232: n->next = ail->array[a_srcidx]; // append
233: break;
234: }
235: n = n->next;
236: } while (1);
237: }
238: ail->array[a_srcidx] = NULL; // empty
239: PetscFunctionReturn(PETSC_SUCCESS);
240: }
242: /* PetscCDRemoveAllAt - empty one list and move data to cache
243: */
244: PetscErrorCode PetscCDRemoveAllAt(PetscCoarsenData *ail, PetscInt a_idx)
245: {
246: PetscCDIntNd *rem, *n1;
248: PetscFunctionBegin;
249: PetscCheck(a_idx < ail->size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Index %" PetscInt_FMT " out of range.", a_idx);
250: rem = ail->array[a_idx];
251: ail->array[a_idx] = NULL;
252: if (!(n1 = ail->extra_nodes)) ail->extra_nodes = rem;
253: else {
254: while (n1->next) n1 = n1->next;
255: n1->next = rem;
256: }
257: PetscFunctionReturn(PETSC_SUCCESS);
258: }
260: /* PetscCDCountAt
261: */
262: PetscErrorCode PetscCDCountAt(const PetscCoarsenData *ail, PetscInt a_idx, PetscInt *a_sz)
263: {
264: PetscCDIntNd *n1;
265: PetscInt sz = 0;
267: PetscFunctionBegin;
268: PetscCheck(a_idx < ail->size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Index %" PetscInt_FMT " out of range.", a_idx);
269: n1 = ail->array[a_idx];
270: while (n1) {
271: n1 = n1->next;
272: sz++;
273: }
274: *a_sz = sz;
275: PetscFunctionReturn(PETSC_SUCCESS);
276: }
278: /* PetscCDSize
279: */
280: PetscErrorCode PetscCDCount(const PetscCoarsenData *ail, PetscInt *a_sz)
281: {
282: PetscInt sz = 0;
284: PetscFunctionBegin;
285: for (int ii = 0; ii < ail->size; ii++) {
286: PetscCDIntNd *n1 = ail->array[ii];
287: while (n1) {
288: n1 = n1->next;
289: sz++;
290: }
291: }
292: *a_sz = sz;
293: PetscFunctionReturn(PETSC_SUCCESS);
294: }
296: /* PetscCDIsEmptyAt - Is the list empty? (not used)
297: */
298: PetscErrorCode PetscCDIsEmptyAt(const PetscCoarsenData *ail, PetscInt a_idx, PetscBool *a_e)
299: {
300: PetscFunctionBegin;
301: PetscCheck(a_idx < ail->size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Index %" PetscInt_FMT " out of range.", a_idx);
302: *a_e = (PetscBool)(ail->array[a_idx] == NULL);
303: PetscFunctionReturn(PETSC_SUCCESS);
304: }
306: /* PetscCDGetNonemptyIS - used for C-F methods
307: */
308: PetscErrorCode PetscCDGetNonemptyIS(PetscCoarsenData *ail, IS *a_mis)
309: {
310: PetscCDIntNd *n;
311: PetscInt ii, kk;
312: PetscInt *permute;
314: PetscFunctionBegin;
315: for (ii = kk = 0; ii < ail->size; ii++) {
316: n = ail->array[ii];
317: if (n) kk++;
318: }
319: PetscCall(PetscMalloc1(kk, &permute));
320: for (ii = kk = 0; ii < ail->size; ii++) {
321: n = ail->array[ii];
322: if (n) permute[kk++] = ii;
323: }
324: PetscCall(ISCreateGeneral(PETSC_COMM_SELF, kk, permute, PETSC_OWN_POINTER, a_mis));
325: PetscFunctionReturn(PETSC_SUCCESS);
326: }
328: /* PetscCDGetMat
329: */
330: PetscErrorCode PetscCDGetMat(PetscCoarsenData *ail, Mat *a_mat)
331: {
332: PetscFunctionBegin;
333: *a_mat = ail->mat;
334: PetscFunctionReturn(PETSC_SUCCESS);
335: }
337: /* PetscCDSetMat
338: */
339: PetscErrorCode PetscCDSetMat(PetscCoarsenData *ail, Mat a_mat)
340: {
341: PetscFunctionBegin;
342: if (ail->mat) {
343: PetscCall(MatDestroy(&ail->mat)); //should not happen
344: }
345: ail->mat = a_mat;
346: PetscFunctionReturn(PETSC_SUCCESS);
347: }
349: /* PetscCDClearMat
350: */
351: PetscErrorCode PetscCDClearMat(PetscCoarsenData *ail)
352: {
353: PetscFunctionBegin;
354: ail->mat = NULL;
355: PetscFunctionReturn(PETSC_SUCCESS);
356: }
358: /* PetscCDGetASMBlocks - get IS of aggregates for ASM smoothers
359: */
360: PetscErrorCode PetscCDGetASMBlocks(const PetscCoarsenData *ail, const PetscInt a_bs, PetscInt *a_sz, IS **a_local_is)
361: {
362: PetscCDIntNd *n;
363: PetscInt lsz, ii, kk, *idxs, jj, gid;
364: IS *is_loc = NULL;
366: PetscFunctionBegin;
367: for (ii = kk = 0; ii < ail->size; ii++) {
368: if (ail->array[ii]) kk++;
369: }
370: *a_sz = kk;
371: PetscCall(PetscMalloc1(kk, &is_loc));
372: for (ii = kk = 0; ii < ail->size; ii++) {
373: for (lsz = 0, n = ail->array[ii]; n; lsz++, n = n->next) /* void */
374: ;
375: if (lsz) {
376: PetscCall(PetscMalloc1(a_bs * lsz, &idxs));
377: for (lsz = 0, n = ail->array[ii]; n; n = n->next) {
378: PetscCall(PetscCDIntNdGetID(n, &gid));
379: for (jj = 0; jj < a_bs; lsz++, jj++) idxs[lsz] = a_bs * gid + jj;
380: }
381: PetscCall(ISCreateGeneral(PETSC_COMM_SELF, lsz, idxs, PETSC_OWN_POINTER, &is_loc[kk++]));
382: }
383: }
384: PetscCheck(*a_sz == kk, PETSC_COMM_SELF, PETSC_ERR_PLIB, "*a_sz %" PetscInt_FMT " != kk %" PetscInt_FMT, *a_sz, kk);
385: *a_local_is = is_loc; /* out */
386: PetscFunctionReturn(PETSC_SUCCESS);
387: }
389: /* edge for priority queue */
390: typedef struct edge_tag {
391: PetscReal weight;
392: PetscInt lid0, gid1, ghost1_idx;
393: } Edge;
395: #define MY_MEPS (PETSC_MACHINE_EPSILON * 100)
396: static int gamg_hem_compare(const void *a, const void *b)
397: {
398: PetscReal va = ((Edge *)a)->weight, vb = ((Edge *)b)->weight;
399: return (va <= vb - MY_MEPS) ? 1 : (va > vb + MY_MEPS) ? -1 : 0; /* 0 for equal */
400: }
402: /*
403: MatCoarsenApply_HEM_private - parallel heavy edge matching
405: Input Parameter:
406: . a_Gmat - global matrix of the graph
407: . n_iter - number of matching iterations
408: . threshold - threshold for filtering graphs
410: Output Parameter:
411: . a_locals_llist - array of list of local nodes rooted at local node
412: */
413: static PetscErrorCode MatCoarsenApply_HEM_private(Mat a_Gmat, const PetscInt n_iter, const PetscReal threshold, PetscCoarsenData **a_locals_llist)
414: {
415: #define REQ_BF_SIZE 100
416: PetscBool isMPI;
417: MPI_Comm comm;
418: PetscInt ix, *ii, *aj, Iend, my0, ncomm_procs, bc_agg = -1, *rbuff = NULL, rbuff_sz = 0;
419: PetscMPIInt rank, size, comm_procs[REQ_BF_SIZE], *lid_max_pe;
420: const PetscInt nloc = a_Gmat->rmap->n, request_size = PetscCeilReal((PetscReal)sizeof(MPI_Request) / (PetscReal)sizeof(PetscInt));
421: PetscInt *lid_cprowID;
422: PetscBool *lid_matched;
423: Mat_SeqAIJ *matA, *matB = NULL;
424: Mat_MPIAIJ *mpimat = NULL;
425: PetscScalar one = 1.;
426: PetscCoarsenData *agg_llists = NULL, *ghost_deleted_list = NULL, *bc_list = NULL;
427: Mat cMat, tMat, P;
428: MatScalar *ap;
429: IS info_is;
431: PetscFunctionBegin;
432: PetscCall(PetscObjectGetComm((PetscObject)a_Gmat, &comm));
433: PetscCallMPI(MPI_Comm_rank(comm, &rank));
434: PetscCallMPI(MPI_Comm_size(comm, &size));
435: PetscCall(MatGetOwnershipRange(a_Gmat, &my0, &Iend));
436: PetscCall(ISCreate(comm, &info_is));
437: PetscCall(PetscInfo(info_is, "Start %" PetscInt_FMT " iterations of HEM.\n", n_iter));
439: PetscCall(PetscMalloc1(nloc, &lid_matched));
440: PetscCall(PetscMalloc1(nloc, &lid_cprowID));
441: PetscCall(PetscMalloc1(nloc, &lid_max_pe));
443: PetscCall(PetscCDCreate(nloc, &agg_llists));
444: PetscCall(PetscCDSetChunkSize(agg_llists, nloc + 1));
445: *a_locals_llist = agg_llists;
446: /* add self to all lists */
447: for (int kk = 0; kk < nloc; kk++) PetscCall(PetscCDAppendID(agg_llists, kk, my0 + kk));
448: /* make a copy of the graph, this gets destroyed in iterates */
449: PetscCall(MatDuplicate(a_Gmat, MAT_COPY_VALUES, &cMat));
450: PetscCall(MatConvert(cMat, MATAIJ, MAT_INPLACE_MATRIX, &cMat));
451: isMPI = (PetscBool)(size > 1);
452: if (isMPI) {
453: /* list of deleted ghosts, should compress this */
454: PetscCall(PetscCDCreate(size, &ghost_deleted_list));
455: PetscCall(PetscCDSetChunkSize(ghost_deleted_list, 100));
456: }
457: for (int iter = 0; iter < n_iter; iter++) {
458: const PetscScalar *lghost_max_ew, *lid_max_ew;
459: PetscBool *lghost_matched;
460: PetscMPIInt *lghost_pe, *lghost_max_pe;
461: Vec locMaxEdge, ghostMaxEdge, ghostMaxPE, locMaxPE;
462: PetscInt *lghost_gid, nEdges, nEdges0, num_ghosts = 0;
463: Edge *Edges;
464: const int n_sub_its = 1000; // in case of a bug, stop at some point
465: /* get submatrices of cMat */
466: for (int kk = 0; kk < nloc; kk++) lid_cprowID[kk] = -1;
467: if (isMPI) {
468: mpimat = (Mat_MPIAIJ *)cMat->data;
469: matA = (Mat_SeqAIJ *)mpimat->A->data;
470: matB = (Mat_SeqAIJ *)mpimat->B->data;
471: if (!matB->compressedrow.use) {
472: /* force construction of compressed row data structure since code below requires it */
473: PetscCall(MatCheckCompressedRow(mpimat->B, matB->nonzerorowcnt, &matB->compressedrow, matB->i, mpimat->B->rmap->n, -1.0));
474: }
475: /* set index into compressed row 'lid_cprowID' */
476: for (ix = 0; ix < matB->compressedrow.nrows; ix++) {
477: PetscInt *ridx = matB->compressedrow.rindex, lid = ridx[ix];
478: if (ridx[ix] >= 0) lid_cprowID[lid] = ix;
479: }
480: } else {
481: matA = (Mat_SeqAIJ *)cMat->data;
482: }
483: /* set matched flags: true for empty list */
484: for (int kk = 0; kk < nloc; kk++) {
485: PetscCall(PetscCDCountAt(agg_llists, kk, &ix));
486: if (ix > 0) lid_matched[kk] = PETSC_FALSE;
487: else lid_matched[kk] = PETSC_TRUE; // call deleted gids as matched
488: }
489: /* max edge and pe vecs */
490: PetscCall(MatCreateVecs(cMat, &locMaxEdge, NULL));
491: PetscCall(MatCreateVecs(cMat, &locMaxPE, NULL));
492: /* get 'lghost_pe' & 'lghost_gid' & init. 'lghost_matched' using 'mpimat->lvec' */
493: if (isMPI) {
494: Vec vec;
495: PetscScalar vval;
496: const PetscScalar *buf;
497: PetscCall(MatCreateVecs(cMat, &vec, NULL));
498: PetscCall(VecGetLocalSize(mpimat->lvec, &num_ghosts));
499: /* lghost_matched */
500: for (PetscInt kk = 0, gid = my0; kk < nloc; kk++, gid++) {
501: PetscScalar vval = lid_matched[kk] ? 1.0 : 0.0;
502: PetscCall(VecSetValues(vec, 1, &gid, &vval, INSERT_VALUES));
503: }
504: PetscCall(VecAssemblyBegin(vec));
505: PetscCall(VecAssemblyEnd(vec));
506: PetscCall(VecScatterBegin(mpimat->Mvctx, vec, mpimat->lvec, INSERT_VALUES, SCATTER_FORWARD));
507: PetscCall(VecScatterEnd(mpimat->Mvctx, vec, mpimat->lvec, INSERT_VALUES, SCATTER_FORWARD));
508: PetscCall(VecGetArrayRead(mpimat->lvec, &buf)); /* get proc ID in 'buf' */
509: PetscCall(PetscMalloc1(num_ghosts, &lghost_matched));
510: for (int kk = 0; kk < num_ghosts; kk++) {
511: lghost_matched[kk] = (PetscBool)(PetscRealPart(buf[kk]) != 0); // the proc of the ghost for now
512: }
513: PetscCall(VecRestoreArrayRead(mpimat->lvec, &buf));
514: /* lghost_pe */
515: vval = (PetscScalar)(rank);
516: for (PetscInt kk = 0, gid = my0; kk < nloc; kk++, gid++) PetscCall(VecSetValues(vec, 1, &gid, &vval, INSERT_VALUES)); /* set with GID */
517: PetscCall(VecAssemblyBegin(vec));
518: PetscCall(VecAssemblyEnd(vec));
519: PetscCall(VecScatterBegin(mpimat->Mvctx, vec, mpimat->lvec, INSERT_VALUES, SCATTER_FORWARD));
520: PetscCall(VecScatterEnd(mpimat->Mvctx, vec, mpimat->lvec, INSERT_VALUES, SCATTER_FORWARD));
521: PetscCall(VecGetArrayRead(mpimat->lvec, &buf)); /* get proc ID in 'buf' */
522: PetscCall(PetscMalloc1(num_ghosts, &lghost_pe));
523: for (int kk = 0; kk < num_ghosts; kk++) lghost_pe[kk] = (PetscMPIInt)PetscRealPart(buf[kk]); // the proc of the ghost for now
524: PetscCall(VecRestoreArrayRead(mpimat->lvec, &buf));
525: /* lghost_gid */
526: for (PetscInt kk = 0, gid = my0; kk < nloc; kk++, gid++) {
527: vval = (PetscScalar)(gid);
528: PetscCall(VecSetValues(vec, 1, &gid, &vval, INSERT_VALUES)); /* set with GID */
529: }
530: PetscCall(VecAssemblyBegin(vec));
531: PetscCall(VecAssemblyEnd(vec));
532: PetscCall(VecScatterBegin(mpimat->Mvctx, vec, mpimat->lvec, INSERT_VALUES, SCATTER_FORWARD));
533: PetscCall(VecScatterEnd(mpimat->Mvctx, vec, mpimat->lvec, INSERT_VALUES, SCATTER_FORWARD));
534: PetscCall(VecDestroy(&vec));
535: PetscCall(VecGetArrayRead(mpimat->lvec, &buf)); /* get proc ID in 'lghost_gid' */
536: PetscCall(PetscMalloc1(num_ghosts, &lghost_gid));
537: for (int kk = 0; kk < num_ghosts; kk++) lghost_gid[kk] = (PetscInt)PetscRealPart(buf[kk]);
538: PetscCall(VecRestoreArrayRead(mpimat->lvec, &buf));
539: }
540: // get 'comm_procs' (could hoist)
541: for (int kk = 0; kk < REQ_BF_SIZE; kk++) comm_procs[kk] = -1;
542: for (ix = 0, ncomm_procs = 0; ix < num_ghosts; ix++) {
543: PetscMPIInt proc = lghost_pe[ix], idx = -1;
544: for (int k = 0; k < ncomm_procs && idx == -1; k++)
545: if (comm_procs[k] == proc) idx = k;
546: if (idx == -1) { comm_procs[ncomm_procs++] = proc; }
547: PetscCheck(ncomm_procs != REQ_BF_SIZE, PETSC_COMM_SELF, PETSC_ERR_SUP, "Receive request array too small: %d", (int)ncomm_procs);
548: }
549: /* count edges, compute initial 'locMaxEdge', 'locMaxPE' */
550: nEdges0 = 0;
551: for (PetscInt kk = 0, gid = my0; kk < nloc; kk++, gid++) {
552: PetscReal max_e = 0., tt;
553: PetscScalar vval;
554: PetscInt lid = kk, max_pe = rank, pe, n;
555: ii = matA->i;
556: n = ii[lid + 1] - ii[lid];
557: aj = PetscSafePointerPlusOffset(matA->j, ii[lid]);
558: ap = PetscSafePointerPlusOffset(matA->a, ii[lid]);
559: for (int jj = 0; jj < n; jj++) {
560: PetscInt lidj = aj[jj];
561: if ((tt = PetscRealPart(ap[jj])) > threshold && lidj != lid) {
562: if (tt > max_e) max_e = tt;
563: if (lidj > lid) nEdges0++;
564: }
565: }
566: if ((ix = lid_cprowID[lid]) != -1) { /* if I have any ghost neighbors */
567: ii = matB->compressedrow.i;
568: n = ii[ix + 1] - ii[ix];
569: ap = matB->a + ii[ix];
570: aj = matB->j + ii[ix];
571: for (int jj = 0; jj < n; jj++) {
572: if ((tt = PetscRealPart(ap[jj])) > threshold) {
573: if (tt > max_e) max_e = tt;
574: nEdges0++;
575: if ((pe = lghost_pe[aj[jj]]) > max_pe) max_pe = pe;
576: }
577: }
578: }
579: vval = max_e;
580: PetscCall(VecSetValues(locMaxEdge, 1, &gid, &vval, INSERT_VALUES));
581: vval = (PetscScalar)max_pe;
582: PetscCall(VecSetValues(locMaxPE, 1, &gid, &vval, INSERT_VALUES));
583: if (iter == 0 && max_e <= MY_MEPS) { // add BCs to fake aggregate
584: lid_matched[lid] = PETSC_TRUE;
585: if (bc_agg == -1) {
586: bc_agg = lid;
587: PetscCall(PetscCDCreate(1, &bc_list));
588: }
589: PetscCall(PetscCDRemoveAllAt(agg_llists, lid));
590: PetscCall(PetscCDAppendID(bc_list, 0, my0 + lid));
591: }
592: }
593: PetscCall(VecAssemblyBegin(locMaxEdge));
594: PetscCall(VecAssemblyEnd(locMaxEdge));
595: PetscCall(VecAssemblyBegin(locMaxPE));
596: PetscCall(VecAssemblyEnd(locMaxPE));
597: /* make 'ghostMaxEdge_max_ew', 'lghost_max_pe' */
598: if (mpimat) {
599: const PetscScalar *buf;
600: PetscCall(VecDuplicate(mpimat->lvec, &ghostMaxEdge));
601: PetscCall(VecScatterBegin(mpimat->Mvctx, locMaxEdge, ghostMaxEdge, INSERT_VALUES, SCATTER_FORWARD));
602: PetscCall(VecScatterEnd(mpimat->Mvctx, locMaxEdge, ghostMaxEdge, INSERT_VALUES, SCATTER_FORWARD));
604: PetscCall(VecDuplicate(mpimat->lvec, &ghostMaxPE));
605: PetscCall(VecScatterBegin(mpimat->Mvctx, locMaxPE, ghostMaxPE, INSERT_VALUES, SCATTER_FORWARD));
606: PetscCall(VecScatterEnd(mpimat->Mvctx, locMaxPE, ghostMaxPE, INSERT_VALUES, SCATTER_FORWARD));
607: PetscCall(VecGetArrayRead(ghostMaxPE, &buf));
608: PetscCall(PetscMalloc1(num_ghosts, &lghost_max_pe));
609: for (int kk = 0; kk < num_ghosts; kk++) lghost_max_pe[kk] = (PetscMPIInt)PetscRealPart(buf[kk]); // the MAX proc of the ghost now
610: PetscCall(VecRestoreArrayRead(ghostMaxPE, &buf));
611: }
612: { // make lid_max_pe
613: const PetscScalar *buf;
614: PetscCall(VecGetArrayRead(locMaxPE, &buf));
615: for (int kk = 0; kk < nloc; kk++) lid_max_pe[kk] = (PetscMPIInt)PetscRealPart(buf[kk]); // the MAX proc of the ghost now
616: PetscCall(VecRestoreArrayRead(locMaxPE, &buf));
617: }
618: /* setup sorted list of edges, and make 'Edges' */
619: PetscCall(PetscMalloc1(nEdges0, &Edges));
620: nEdges = 0;
621: for (int kk = 0, n; kk < nloc; kk++) {
622: const PetscInt lid = kk;
623: PetscReal tt;
624: ii = matA->i;
625: n = ii[lid + 1] - ii[lid];
626: aj = PetscSafePointerPlusOffset(matA->j, ii[lid]);
627: ap = PetscSafePointerPlusOffset(matA->a, ii[lid]);
628: for (int jj = 0; jj < n; jj++) {
629: PetscInt lidj = aj[jj];
630: if ((tt = PetscRealPart(ap[jj])) > threshold && lidj != lid) {
631: if (lidj > lid) {
632: Edges[nEdges].lid0 = lid;
633: Edges[nEdges].gid1 = lidj + my0;
634: Edges[nEdges].ghost1_idx = -1;
635: Edges[nEdges].weight = tt;
636: nEdges++;
637: }
638: }
639: }
640: if ((ix = lid_cprowID[lid]) != -1) { /* if I have any ghost neighbor */
641: ii = matB->compressedrow.i;
642: n = ii[ix + 1] - ii[ix];
643: ap = matB->a + ii[ix];
644: aj = matB->j + ii[ix];
645: for (int jj = 0; jj < n; jj++) {
646: if ((tt = PetscRealPart(ap[jj])) > threshold) {
647: Edges[nEdges].lid0 = lid;
648: Edges[nEdges].gid1 = lghost_gid[aj[jj]];
649: Edges[nEdges].ghost1_idx = aj[jj];
650: Edges[nEdges].weight = tt;
651: nEdges++;
652: }
653: }
654: }
655: }
656: PetscCheck(nEdges == nEdges0, PETSC_COMM_SELF, PETSC_ERR_SUP, "nEdges != nEdges0: %d %d", (int)nEdges0, (int)nEdges);
657: if (Edges) qsort(Edges, nEdges, sizeof(Edge), gamg_hem_compare);
659: PetscCall(PetscInfo(info_is, "[%d] HEM iteration %d with %d edges\n", rank, iter, (int)nEdges));
661: /* projection matrix */
662: PetscCall(MatCreate(comm, &P));
663: PetscCall(MatSetType(P, MATAIJ));
664: PetscCall(MatSetSizes(P, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE));
665: PetscCall(MatMPIAIJSetPreallocation(P, 1, NULL, 1, NULL));
666: PetscCall(MatSeqAIJSetPreallocation(P, 1, NULL));
667: PetscCall(MatSetUp(P));
668: /* process - communicate - process */
669: for (int sub_it = 0, old_num_edge = 0; /* sub_it < n_sub_its */; /* sub_it++ */) {
670: PetscInt nactive_edges = 0, n_act_n[3], gn_act_n[3];
671: PetscMPIInt tag1, tag2;
672: PetscCall(VecGetArrayRead(locMaxEdge, &lid_max_ew));
673: if (isMPI) {
674: PetscCall(VecGetArrayRead(ghostMaxEdge, &lghost_max_ew));
675: PetscCall(PetscCommGetNewTag(comm, &tag1));
676: PetscCall(PetscCommGetNewTag(comm, &tag2));
677: }
678: for (int kk = 0; kk < nEdges; kk++) {
679: /* HEM */
680: const Edge *e = &Edges[kk];
681: const PetscInt lid0 = e->lid0, gid1 = e->gid1, ghost1_idx = e->ghost1_idx, gid0 = lid0 + my0, lid1 = gid1 - my0;
682: PetscBool isOK = PETSC_TRUE, print = PETSC_FALSE;
683: if (print) PetscCall(PetscSynchronizedPrintf(comm, "\t[%d] edge (%d %d), %d %d %d\n", rank, (int)gid0, (int)gid1, lid_matched[lid0], (ghost1_idx != -1 && lghost_matched[ghost1_idx]), (ghost1_idx == -1 && lid_matched[lid1])));
684: /* skip if either vertex is matched already */
685: if (lid_matched[lid0] || (ghost1_idx != -1 && lghost_matched[ghost1_idx]) || (ghost1_idx == -1 && lid_matched[lid1])) continue;
687: nactive_edges++;
688: PetscCheck(PetscRealPart(lid_max_ew[lid0]) >= e->weight - MY_MEPS, PETSC_COMM_SELF, PETSC_ERR_SUP, "edge weight %e > max %e", (double)e->weight, (double)PetscRealPart(lid_max_ew[lid0]));
689: if (print) PetscCall(PetscSynchronizedPrintf(comm, "\t[%d] active edge (%d %d), diff0 = %10.4e\n", rank, (int)gid0, (int)gid1, (double)(PetscRealPart(lid_max_ew[lid0]) - (double)e->weight)));
690: // smaller edge, lid_max_ew get updated - e0
691: if (PetscRealPart(lid_max_ew[lid0]) > e->weight + MY_MEPS) {
692: if (print)
693: PetscCall(PetscSynchronizedPrintf(comm, "\t\t[%d] 1) e0 SKIPPING small edge %20.14e edge (%d %d), diff = %10.4e to proc %d. max = %20.14e, w = %20.14e\n", rank, (double)e->weight, (int)gid0, (int)gid1, (double)(PetscRealPart(lid_max_ew[lid0]) - e->weight), ghost1_idx != -1 ? (int)lghost_pe[ghost1_idx] : rank, (double)PetscRealPart(lid_max_ew[lid0]),
694: (double)e->weight));
695: continue; // we are basically filter edges here
696: }
697: // e1 - local
698: if (ghost1_idx == -1) {
699: if (PetscRealPart(lid_max_ew[lid1]) > e->weight + MY_MEPS) {
700: if (print)
701: PetscCall(PetscSynchronizedPrintf(comm, "\t\t%c[%d] 2) e1 SKIPPING small local edge %20.14e edge (%d %d), diff = %10.4e\n", ghost1_idx != -1 ? '\t' : ' ', rank, (double)e->weight, (int)gid0, (int)gid1, (double)(PetscRealPart(lid_max_ew[lid1]) - e->weight)));
702: continue; // we are basically filter edges here
703: }
704: } else { // e1 - ghost
705: /* see if edge might get matched on other proc */
706: PetscReal g_max_e1 = PetscRealPart(lghost_max_ew[ghost1_idx]);
707: if (print)
708: PetscCall(PetscSynchronizedPrintf(comm, "\t\t\t[%d] CHECK GHOST e1, edge (%d %d), E0 MAX EDGE WEIGHT = %10.4e, EDGE WEIGHT = %10.4e, diff1 = %10.4e, ghost proc %d with max pe %d on e0 and %d on e1\n", rank, (int)gid0, (int)gid1, (double)PetscRealPart(lid_max_ew[lid0]),
709: (double)e->weight, (double)(PetscRealPart(lghost_max_ew[ghost1_idx]) - e->weight), (int)lghost_pe[ghost1_idx], lid_max_pe[lid0], lghost_max_pe[ghost1_idx]));
710: if (g_max_e1 > e->weight + MY_MEPS) {
711: /* PetscCall(PetscSynchronizedPrintf(comm,"\t\t\t\t[%d] 3) ghost e1 SKIPPING small edge (%d %d), diff = %10.4e from proc %d with max pe %d. max = %20.14e, w = %20.14e\n", rank, (int)gid0, (int)gid1, g_max_e1 - e->weight, (int)lghost_pe[ghost1_idx], lghost_max_pe[ghost1_idx], g_max_e1, e->weight )); */
712: continue;
713: } else if (g_max_e1 >= e->weight - MY_MEPS && lghost_pe[ghost1_idx] > rank) { // is 'lghost_max_pe[ghost1_idx] > rank' needed?
714: /* check for max_ea == to this edge and larger processor that will deal with this */
715: if (print)
716: PetscCall(PetscSynchronizedPrintf(comm, "\t\t\t[%d] ghost e1 SKIPPING EQUAL (%d %d), diff = %10.4e from larger proc %d with max pe %d. max = %20.14e, w = %20.14e\n", rank, (int)gid0, (int)gid1,
717: (double)(PetscRealPart(lid_max_ew[lid0]) - (double)e->weight), (int)lghost_pe[ghost1_idx], (int)lghost_max_pe[ghost1_idx], (double)g_max_e1, (double)e->weight));
718: isOK = PETSC_FALSE; // this guy could delete me
719: continue;
720: } else {
721: /* PetscCall(PetscSynchronizedPrintf(comm,"\t[%d] Edge (%d %d) passes gid0 tests, diff = %10.4e from proc %d with max pe %d. max = %20.14e, w = %20.14e\n", rank, (int)gid0, (int)gid1, g_max_e1 - e->weight, (int)lghost_pe[ghost1_idx], lghost_max_pe[ghost1_idx], g_max_e1, e->weight )); */
722: }
723: }
724: /* check ghost for v0 */
725: if (isOK) {
726: PetscReal max_e, ew;
727: if ((ix = lid_cprowID[lid0]) != -1) { /* if I have any ghost neighbors */
728: int n;
729: ii = matB->compressedrow.i;
730: n = ii[ix + 1] - ii[ix];
731: ap = matB->a + ii[ix];
732: aj = matB->j + ii[ix];
733: for (int jj = 0; jj < n && isOK; jj++) {
734: PetscInt lidj = aj[jj];
735: if (lghost_matched[lidj]) continue;
736: ew = PetscRealPart(ap[jj]);
737: if (ew <= threshold) continue;
738: max_e = PetscRealPart(lghost_max_ew[lidj]);
739: /* check for max_e == to this edge and larger processor that will deal with this */
740: if (ew >= PetscRealPart(lid_max_ew[lid0]) - MY_MEPS && lghost_max_pe[lidj] > rank) isOK = PETSC_FALSE;
741: PetscCheck(ew <= max_e + MY_MEPS, PETSC_COMM_SELF, PETSC_ERR_SUP, "edge weight %e > max %e. ncols = %d, gid0 = %d, gid1 = %d", (double)PetscRealPart(ew), (double)PetscRealPart(max_e), (int)n, (int)(lid0 + my0), (int)lghost_gid[lidj]);
742: if (print)
743: PetscCall(PetscSynchronizedPrintf(comm, "\t\t\t\t[%d] e0: looked at ghost adj (%d %d), diff = %10.4e, ghost on proc %d (max %d). isOK = %d, %d %d %d; ew = %e, lid0 max ew = %e, diff = %e, eps = %e\n", rank, (int)gid0, (int)lghost_gid[lidj], (double)(max_e - ew), lghost_pe[lidj], lghost_max_pe[lidj], isOK, (double)(ew) >= (double)(max_e - MY_MEPS), ew >= PetscRealPart(lid_max_ew[lid0]) - MY_MEPS, lghost_pe[lidj] > rank, (double)ew, (double)PetscRealPart(lid_max_ew[lid0]), (double)(ew - PetscRealPart(lid_max_ew[lid0])), (double)MY_MEPS));
744: }
745: if (!isOK && print) PetscCall(PetscSynchronizedPrintf(comm, "\t\t[%d] skip edge (%d %d) from ghost inspection\n", rank, (int)gid0, (int)gid1));
746: }
747: /* check local v1 */
748: if (ghost1_idx == -1) {
749: if ((ix = lid_cprowID[lid1]) != -1) { /* if I have any ghost neighbors */
750: int n;
751: ii = matB->compressedrow.i;
752: n = ii[ix + 1] - ii[ix];
753: ap = matB->a + ii[ix];
754: aj = matB->j + ii[ix];
755: for (int jj = 0; jj < n && isOK; jj++) {
756: PetscInt lidj = aj[jj];
757: if (lghost_matched[lidj]) continue;
758: ew = PetscRealPart(ap[jj]);
759: if (ew <= threshold) continue;
760: max_e = PetscRealPart(lghost_max_ew[lidj]);
761: /* check for max_e == to this edge and larger processor that will deal with this */
762: if (ew >= PetscRealPart(lid_max_ew[lid1]) - MY_MEPS && lghost_max_pe[lidj] > rank) isOK = PETSC_FALSE;
763: PetscCheck(ew <= max_e + MY_MEPS, PETSC_COMM_SELF, PETSC_ERR_SUP, "edge weight %e > max %e", (double)PetscRealPart(ew), (double)PetscRealPart(max_e));
764: if (print)
765: PetscCall(PetscSynchronizedPrintf(comm, "\t\t\t\t\t[%d] e1: looked at ghost adj (%d %d), diff = %10.4e, ghost on proc %d (max %d)\n", rank, (int)gid0, (int)lghost_gid[lidj], (double)(max_e - ew), lghost_pe[lidj], lghost_max_pe[lidj]));
766: }
767: }
768: if (!isOK && print) PetscCall(PetscSynchronizedPrintf(comm, "\t\t[%d] skip edge (%d %d) from ghost inspection\n", rank, (int)gid0, (int)gid1));
769: }
770: }
771: PetscReal e1_max_w = (ghost1_idx == -1 ? PetscRealPart(lid_max_ew[lid0]) : PetscRealPart(lghost_max_ew[ghost1_idx]));
772: if (print)
773: PetscCall(PetscSynchronizedPrintf(comm, "\t[%d] MATCHING (%d %d) e1 max weight = %e, e1 weight diff %e, %s. isOK = %d\n", rank, (int)gid0, (int)gid1, (double)e1_max_w, (double)(e1_max_w - e->weight), ghost1_idx == -1 ? "local" : "ghost", isOK));
774: /* do it */
775: if (isOK) {
776: if (ghost1_idx == -1) {
777: PetscCheck(!lid_matched[lid1], PETSC_COMM_SELF, PETSC_ERR_SUP, "local %d is matched", (int)gid1);
778: lid_matched[lid1] = PETSC_TRUE; /* keep track of what we've done this round */
779: PetscCall(PetscCDMoveAppend(agg_llists, lid0, lid1)); // takes lid1's list and appends to lid0's
780: } else {
781: /* add gid1 to list of ghost deleted by me -- I need their children */
782: PetscMPIInt proc = lghost_pe[ghost1_idx];
783: PetscCheck(!lghost_matched[ghost1_idx], PETSC_COMM_SELF, PETSC_ERR_SUP, "ghost %d is matched", (int)lghost_gid[ghost1_idx]);
784: lghost_matched[ghost1_idx] = PETSC_TRUE;
785: PetscCall(PetscCDAppendID(ghost_deleted_list, proc, ghost1_idx)); /* cache to send messages */
786: PetscCall(PetscCDAppendID(ghost_deleted_list, proc, lid0));
787: }
788: lid_matched[lid0] = PETSC_TRUE; /* keep track of what we've done this round */
789: /* set projection */
790: PetscCall(MatSetValues(P, 1, &gid0, 1, &gid0, &one, INSERT_VALUES));
791: PetscCall(MatSetValues(P, 1, &gid1, 1, &gid0, &one, INSERT_VALUES));
792: //PetscCall(PetscPrintf(comm,"\t %d.%d) match active EDGE %d : (%d %d)\n",iter,sub_it, (int)nactive_edges, (int)gid0, (int)gid1));
793: } /* matched */
794: } /* edge loop */
795: PetscCall(PetscSynchronizedFlush(comm, PETSC_STDOUT));
796: if (isMPI) PetscCall(VecRestoreArrayRead(ghostMaxEdge, &lghost_max_ew));
797: PetscCall(VecRestoreArrayRead(locMaxEdge, &lid_max_ew));
798: // count active for test, latter, update deleted ghosts
799: n_act_n[0] = nactive_edges;
800: if (ghost_deleted_list) PetscCall(PetscCDCount(ghost_deleted_list, &n_act_n[2]));
801: else n_act_n[2] = 0;
802: PetscCall(PetscCDCount(agg_llists, &n_act_n[1]));
803: PetscCall(MPIU_Allreduce(n_act_n, gn_act_n, 3, MPIU_INT, MPI_SUM, comm));
804: PetscCall(PetscInfo(info_is, "[%d] %d.%d) nactive edges=%" PetscInt_FMT ", ncomm_procs=%d, nEdges=%d, %" PetscInt_FMT " deleted ghosts, N=%" PetscInt_FMT "\n", rank, iter, sub_it, gn_act_n[0], (int)ncomm_procs, (int)nEdges, gn_act_n[2], gn_act_n[1]));
805: /* deal with deleted ghost */
806: if (isMPI) {
807: PetscCDIntNd *pos;
808: PetscInt *sbuffs1[REQ_BF_SIZE], ndel;
809: PetscInt *sbuffs2[REQ_BF_SIZE];
810: MPI_Status status;
812: /* send deleted ghosts */
813: for (int proc_idx = 0; proc_idx < ncomm_procs; proc_idx++) {
814: const PetscMPIInt proc = comm_procs[proc_idx];
815: PetscInt *sbuff, *pt, scount;
816: MPI_Request *request;
817: /* count ghosts */
818: PetscCall(PetscCDCountAt(ghost_deleted_list, proc, &ndel));
819: ndel /= 2; // two entries for each proc
820: scount = 2 + 2 * ndel;
821: PetscCall(PetscMalloc1(scount + request_size, &sbuff));
822: /* save requests */
823: sbuffs1[proc_idx] = sbuff;
824: request = (MPI_Request *)sbuff;
825: sbuff = pt = (PetscInt *)(sbuff + request_size);
826: /* write [ndel, proc, n*[gid1,gid0] */
827: *pt++ = ndel; // number of deleted to send
828: *pt++ = rank; // proc (not used)
829: PetscCall(PetscCDGetHeadPos(ghost_deleted_list, proc, &pos));
830: while (pos) {
831: PetscInt lid0, ghost_idx, gid1;
832: PetscCall(PetscCDIntNdGetID(pos, &ghost_idx));
833: gid1 = lghost_gid[ghost_idx];
834: PetscCall(PetscCDGetNextPos(ghost_deleted_list, proc, &pos));
835: PetscCall(PetscCDIntNdGetID(pos, &lid0));
836: PetscCall(PetscCDGetNextPos(ghost_deleted_list, proc, &pos));
837: *pt++ = gid1;
838: *pt++ = lid0 + my0; // gid0
839: }
840: PetscCheck(pt - sbuff == scount, PETSC_COMM_SELF, PETSC_ERR_SUP, "sbuff-pt != scount: %d", (int)(pt - sbuff));
841: /* MPI_Isend: tag1 [ndel, proc, n*[gid1,gid0] ] */
842: PetscCallMPI(MPI_Isend(sbuff, scount, MPIU_INT, proc, tag1, comm, request));
843: PetscCall(PetscCDRemoveAllAt(ghost_deleted_list, proc)); // done with this list
844: }
845: /* receive deleted, send back partial aggregates, clear lists */
846: for (int proc_idx = 0; proc_idx < ncomm_procs; proc_idx++) {
847: PetscCallMPI(MPI_Probe(comm_procs[proc_idx] /* MPI_ANY_SOURCE */, tag1, comm, &status));
848: {
849: PetscInt *pt, *pt2, *pt3, *sbuff, tmp;
850: MPI_Request *request;
851: int rcount, scount, ndel;
852: const PetscMPIInt proc = status.MPI_SOURCE;
853: PetscCallMPI(MPI_Get_count(&status, MPIU_INT, &rcount));
854: if (rcount > rbuff_sz) {
855: if (rbuff) PetscCall(PetscFree(rbuff));
856: PetscCall(PetscMalloc1(rcount, &rbuff));
857: rbuff_sz = rcount;
858: }
859: /* MPI_Recv: tag1 [ndel, proc, ndel*[gid1,gid0] ] */
860: PetscCallMPI(MPI_Recv(rbuff, rcount, MPIU_INT, proc, tag1, comm, &status));
861: /* read and count sends *[lid0, n, n*[gid] ] */
862: pt = rbuff;
863: scount = 0;
864: ndel = *pt++; // number of deleted to recv
865: tmp = *pt++; // proc (not used)
866: while (ndel--) {
867: PetscInt gid1 = *pt++, lid1 = gid1 - my0;
868: int gh_gid0 = *pt++; // gid on other proc (not used here to count)
869: PetscCheck(lid1 >= 0 && lid1 < nloc, PETSC_COMM_SELF, PETSC_ERR_SUP, "received ghost deleted %d", (int)gid1);
870: PetscCheck(!lid_matched[lid1], PETSC_COMM_SELF, PETSC_ERR_PLIB, "%d) received matched local gid %" PetscInt_FMT ",%d, with ghost (lid) %" PetscInt_FMT " from proc %d", sub_it, gid1, gh_gid0, tmp, proc);
871: lid_matched[lid1] = PETSC_TRUE; /* keep track of what we've done this round */
872: PetscCall(PetscCDCountAt(agg_llists, lid1, &tmp)); // n
873: /* PetscCheck(tmp == 1, PETSC_COMM_SELF, PETSC_ERR_SUP, "sending %d (!= 1) size aggregate. gid-0 %d, from %d (gid-1 %d)", (int)tmp, (int) gid, proc, gh_gid0); */
874: scount += tmp + 2; // lid0, n, n*[gid]
875: }
876: PetscCheck((pt - rbuff) == rcount, PETSC_COMM_SELF, PETSC_ERR_SUP, "receive buffer size != num read: %d; rcount: %d", (int)(pt - rbuff), rcount);
877: /* send tag2: *[gid0, n, n*[gid] ] */
878: PetscCall(PetscMalloc1(scount + request_size, &sbuff));
879: sbuffs2[proc_idx] = sbuff; /* cache request */
880: request = (MPI_Request *)sbuff;
881: pt2 = sbuff = sbuff + request_size;
882: // read again: n, proc, n*[gid1,gid0]
883: pt = rbuff;
884: ndel = *pt++;
885: tmp = *pt++; // proc (not used)
886: while (ndel--) {
887: PetscInt gid1 = *pt++, lid1 = gid1 - my0, gh_gid0 = *pt++;
888: /* write [gid0, aggSz, aggSz[gid] ] */
889: *pt2++ = gh_gid0;
890: pt3 = pt2++; /* save pointer for later */
891: PetscCall(PetscCDGetHeadPos(agg_llists, lid1, &pos));
892: while (pos) {
893: PetscInt gid;
894: PetscCall(PetscCDIntNdGetID(pos, &gid));
895: PetscCall(PetscCDGetNextPos(agg_llists, lid1, &pos));
896: *pt2++ = gid;
897: }
898: *pt3 = (pt2 - pt3) - 1;
899: /* clear list */
900: PetscCall(PetscCDRemoveAllAt(agg_llists, lid1));
901: }
902: PetscCheck((pt2 - sbuff) == scount, PETSC_COMM_SELF, PETSC_ERR_SUP, "buffer size != num write: %d %d", (int)(pt2 - sbuff), (int)scount);
903: /* MPI_Isend: requested data tag2 *[lid0, n, n*[gid1] ] */
904: PetscCallMPI(MPI_Isend(sbuff, scount, MPIU_INT, proc, tag2, comm, request));
905: }
906: } // proc_idx
907: /* receive tag2 *[gid0, n, n*[gid] ] */
908: for (int proc_idx = 0; proc_idx < ncomm_procs; proc_idx++) {
909: PetscMPIInt proc;
910: PetscInt *pt;
911: int rcount;
912: PetscCallMPI(MPI_Probe(comm_procs[proc_idx] /* MPI_ANY_SOURCE */, tag2, comm, &status));
913: PetscCallMPI(MPI_Get_count(&status, MPIU_INT, &rcount));
914: if (rcount > rbuff_sz) {
915: if (rbuff) PetscCall(PetscFree(rbuff));
916: PetscCall(PetscMalloc1(rcount, &rbuff));
917: rbuff_sz = rcount;
918: }
919: proc = status.MPI_SOURCE;
920: /* MPI_Recv: tag1 [n, proc, n*[gid1,lid0] ] */
921: PetscCallMPI(MPI_Recv(rbuff, rcount, MPIU_INT, proc, tag2, comm, &status));
922: pt = rbuff;
923: while (pt - rbuff < rcount) {
924: PetscInt gid0 = *pt++, n = *pt++;
925: while (n--) {
926: PetscInt gid1 = *pt++;
927: PetscCall(PetscCDAppendID(agg_llists, gid0 - my0, gid1));
928: }
929: }
930: PetscCheck((pt - rbuff) == rcount, PETSC_COMM_SELF, PETSC_ERR_SUP, "recv buffer size != num read: %d %d", (int)(pt - rbuff), (int)rcount);
931: }
932: /* wait for tag1 isends */
933: for (int proc_idx = 0; proc_idx < ncomm_procs; proc_idx++) {
934: MPI_Request *request;
935: request = (MPI_Request *)sbuffs1[proc_idx];
936: PetscCallMPI(MPI_Wait(request, &status));
937: PetscCall(PetscFree(sbuffs1[proc_idx]));
938: }
939: /* wait for tag2 isends */
940: for (int proc_idx = 0; proc_idx < ncomm_procs; proc_idx++) {
941: MPI_Request *request = (MPI_Request *)sbuffs2[proc_idx];
942: PetscCallMPI(MPI_Wait(request, &status));
943: PetscCall(PetscFree(sbuffs2[proc_idx]));
944: }
945: } /* MPI */
946: /* set 'lghost_matched' - use locMaxEdge, ghostMaxEdge (recomputed next) */
947: if (isMPI) {
948: const PetscScalar *sbuff;
949: for (PetscInt kk = 0, gid = my0; kk < nloc; kk++, gid++) {
950: PetscScalar vval = lid_matched[kk] ? 1.0 : 0.0;
951: PetscCall(VecSetValues(locMaxEdge, 1, &gid, &vval, INSERT_VALUES)); /* set with GID */
952: }
953: PetscCall(VecAssemblyBegin(locMaxEdge));
954: PetscCall(VecAssemblyEnd(locMaxEdge));
955: PetscCall(VecScatterBegin(mpimat->Mvctx, locMaxEdge, ghostMaxEdge, INSERT_VALUES, SCATTER_FORWARD));
956: PetscCall(VecScatterEnd(mpimat->Mvctx, locMaxEdge, ghostMaxEdge, INSERT_VALUES, SCATTER_FORWARD));
957: PetscCall(VecGetArrayRead(ghostMaxEdge, &sbuff));
958: for (int kk = 0; kk < num_ghosts; kk++) { lghost_matched[kk] = (PetscBool)(PetscRealPart(sbuff[kk]) != 0.0); }
959: PetscCall(VecRestoreArrayRead(ghostMaxEdge, &sbuff));
960: }
961: /* compute 'locMaxEdge' inside sub iteration b/c max weight can drop as neighbors are matched */
962: for (PetscInt kk = 0, gid = my0; kk < nloc; kk++, gid++) {
963: PetscReal max_e = 0., tt;
964: PetscScalar vval;
965: const PetscInt lid = kk;
966: int max_pe = rank, pe, n;
967: ii = matA->i;
968: n = ii[lid + 1] - ii[lid];
969: aj = PetscSafePointerPlusOffset(matA->j, ii[lid]);
970: ap = PetscSafePointerPlusOffset(matA->a, ii[lid]);
971: for (int jj = 0; jj < n; jj++) {
972: PetscInt lidj = aj[jj];
973: if (lid_matched[lidj]) continue; /* this is new - can change local max */
974: if (lidj != lid && PetscRealPart(ap[jj]) > max_e) max_e = PetscRealPart(ap[jj]);
975: }
976: if (lid_cprowID && (ix = lid_cprowID[lid]) != -1) { /* if I have any ghost neighbors */
977: ii = matB->compressedrow.i;
978: n = ii[ix + 1] - ii[ix];
979: ap = matB->a + ii[ix];
980: aj = matB->j + ii[ix];
981: for (int jj = 0; jj < n; jj++) {
982: PetscInt lidj = aj[jj];
983: if (lghost_matched[lidj]) continue;
984: if ((tt = PetscRealPart(ap[jj])) > max_e) max_e = tt;
985: }
986: }
987: vval = (PetscScalar)max_e;
988: PetscCall(VecSetValues(locMaxEdge, 1, &gid, &vval, INSERT_VALUES)); /* set with GID */
989: // max PE with max edge
990: if (lid_cprowID && (ix = lid_cprowID[lid]) != -1) { /* if I have any ghost neighbors */
991: ii = matB->compressedrow.i;
992: n = ii[ix + 1] - ii[ix];
993: ap = matB->a + ii[ix];
994: aj = matB->j + ii[ix];
995: for (int jj = 0; jj < n; jj++) {
996: PetscInt lidj = aj[jj];
997: if (lghost_matched[lidj]) continue;
998: if ((pe = lghost_pe[aj[jj]]) > max_pe && PetscRealPart(ap[jj]) >= max_e - MY_MEPS) { max_pe = pe; }
999: }
1000: }
1001: vval = (PetscScalar)max_pe;
1002: PetscCall(VecSetValues(locMaxPE, 1, &gid, &vval, INSERT_VALUES));
1003: }
1004: PetscCall(VecAssemblyBegin(locMaxEdge));
1005: PetscCall(VecAssemblyEnd(locMaxEdge));
1006: PetscCall(VecAssemblyBegin(locMaxPE));
1007: PetscCall(VecAssemblyEnd(locMaxPE));
1008: /* compute 'lghost_max_ew' and 'lghost_max_pe' to get ready for next iteration*/
1009: if (isMPI) {
1010: const PetscScalar *buf;
1011: PetscCall(VecScatterBegin(mpimat->Mvctx, locMaxEdge, ghostMaxEdge, INSERT_VALUES, SCATTER_FORWARD));
1012: PetscCall(VecScatterEnd(mpimat->Mvctx, locMaxEdge, ghostMaxEdge, INSERT_VALUES, SCATTER_FORWARD));
1013: PetscCall(VecScatterBegin(mpimat->Mvctx, locMaxPE, ghostMaxPE, INSERT_VALUES, SCATTER_FORWARD));
1014: PetscCall(VecScatterEnd(mpimat->Mvctx, locMaxPE, ghostMaxPE, INSERT_VALUES, SCATTER_FORWARD));
1015: PetscCall(VecGetArrayRead(ghostMaxPE, &buf));
1016: for (int kk = 0; kk < num_ghosts; kk++) {
1017: lghost_max_pe[kk] = (PetscMPIInt)PetscRealPart(buf[kk]); // the MAX proc of the ghost now
1018: }
1019: PetscCall(VecRestoreArrayRead(ghostMaxPE, &buf));
1020: }
1021: // if no active edges, stop
1022: if (gn_act_n[0] < 1) break;
1023: // inc and check (self stopping iteration
1024: PetscCheck(old_num_edge != gn_act_n[0], PETSC_COMM_SELF, PETSC_ERR_SUP, "HEM stalled step %d/%d", sub_it + 1, n_sub_its);
1025: sub_it++;
1026: PetscCheck(sub_it < n_sub_its, PETSC_COMM_SELF, PETSC_ERR_SUP, "failed to finish HEM step %d/%d", sub_it + 1, n_sub_its);
1027: old_num_edge = gn_act_n[0];
1028: } /* sub_it loop */
1029: /* clean up iteration */
1030: PetscCall(PetscFree(Edges));
1031: if (mpimat) { // can be hoisted
1032: PetscCall(VecRestoreArrayRead(ghostMaxEdge, &lghost_max_ew));
1033: PetscCall(VecDestroy(&ghostMaxEdge));
1034: PetscCall(VecDestroy(&ghostMaxPE));
1035: PetscCall(PetscFree(lghost_pe));
1036: PetscCall(PetscFree(lghost_gid));
1037: PetscCall(PetscFree(lghost_matched));
1038: PetscCall(PetscFree(lghost_max_pe));
1039: }
1040: PetscCall(VecDestroy(&locMaxEdge));
1041: PetscCall(VecDestroy(&locMaxPE));
1042: /* create next graph */
1043: {
1044: Vec diag;
1045: /* add identity for unmatched vertices so they stay alive */
1046: for (PetscInt kk = 0, gid1, gid = my0; kk < nloc; kk++, gid++) {
1047: if (!lid_matched[kk]) {
1048: const PetscInt lid = kk;
1049: PetscCDIntNd *pos;
1050: PetscCall(PetscCDGetHeadPos(agg_llists, lid, &pos));
1051: PetscCheck(pos, PETSC_COMM_SELF, PETSC_ERR_PLIB, "empty list in singleton: %d", (int)gid);
1052: PetscCall(PetscCDIntNdGetID(pos, &gid1));
1053: PetscCheck(gid1 == gid, PETSC_COMM_SELF, PETSC_ERR_PLIB, "first in list (%d) in singleton not %d", (int)gid1, (int)gid);
1054: PetscCall(MatSetValues(P, 1, &gid, 1, &gid, &one, INSERT_VALUES));
1055: }
1056: }
1057: PetscCall(MatAssemblyBegin(P, MAT_FINAL_ASSEMBLY));
1058: PetscCall(MatAssemblyEnd(P, MAT_FINAL_ASSEMBLY));
1060: /* project to make new graph with collapsed edges */
1061: PetscCall(MatPtAP(cMat, P, MAT_INITIAL_MATRIX, 1.0, &tMat));
1062: PetscCall(MatDestroy(&P));
1063: PetscCall(MatDestroy(&cMat));
1064: cMat = tMat;
1065: PetscCall(MatCreateVecs(cMat, &diag, NULL));
1066: PetscCall(MatGetDiagonal(cMat, diag));
1067: PetscCall(VecReciprocal(diag));
1068: PetscCall(VecSqrtAbs(diag));
1069: PetscCall(MatDiagonalScale(cMat, diag, diag));
1070: PetscCall(VecDestroy(&diag));
1071: }
1072: } /* coarsen iterator */
1074: /* make fake matrix with Mat->B only for smoothed agg QR. Need this if we make an aux graph (ie, PtAP) with k > 1 */
1075: if (size > 1) {
1076: Mat mat;
1077: PetscCDIntNd *pos;
1078: PetscInt NN, MM, jj = 0, mxsz = 0;
1080: for (int kk = 0; kk < nloc; kk++) {
1081: PetscCall(PetscCDCountAt(agg_llists, kk, &jj));
1082: if (jj > mxsz) mxsz = jj;
1083: }
1084: PetscCall(MatGetSize(a_Gmat, &MM, &NN));
1085: if (mxsz > MM - nloc) mxsz = MM - nloc;
1086: /* matrix of ghost adj for square graph */
1087: PetscCall(MatCreateAIJ(comm, nloc, nloc, PETSC_DETERMINE, PETSC_DETERMINE, 0, NULL, mxsz, NULL, &mat));
1088: for (PetscInt lid = 0, gid = my0; lid < nloc; lid++, gid++) {
1089: PetscCall(PetscCDGetHeadPos(agg_llists, lid, &pos));
1090: while (pos) {
1091: PetscInt gid1;
1092: PetscCall(PetscCDIntNdGetID(pos, &gid1));
1093: PetscCall(PetscCDGetNextPos(agg_llists, lid, &pos));
1094: if (gid1 < my0 || gid1 >= my0 + nloc) PetscCall(MatSetValues(mat, 1, &gid, 1, &gid1, &one, ADD_VALUES));
1095: }
1096: }
1097: PetscCall(MatAssemblyBegin(mat, MAT_FINAL_ASSEMBLY));
1098: PetscCall(MatAssemblyEnd(mat, MAT_FINAL_ASSEMBLY));
1099: PetscCall(PetscCDSetMat(agg_llists, mat));
1100: PetscCall(PetscCDDestroy(ghost_deleted_list));
1101: if (rbuff_sz) PetscCall(PetscFree(rbuff)); // always true
1102: }
1103: // move BCs into some node
1104: if (bc_list) {
1105: PetscCDIntNd *pos;
1106: PetscCall(PetscCDGetHeadPos(bc_list, 0, &pos));
1107: while (pos) {
1108: PetscInt gid1;
1109: PetscCall(PetscCDIntNdGetID(pos, &gid1));
1110: PetscCall(PetscCDGetNextPos(bc_list, 0, &pos));
1111: PetscCall(PetscCDAppendID(agg_llists, bc_agg, gid1));
1112: }
1113: PetscCall(PetscCDRemoveAllAt(bc_list, 0));
1114: PetscCall(PetscCDDestroy(bc_list));
1115: }
1116: {
1117: // check sizes -- all vertices must get in graph
1118: PetscInt sz, globalsz, MM;
1119: PetscCall(MatGetSize(a_Gmat, &MM, NULL));
1120: PetscCall(PetscCDCount(agg_llists, &sz));
1121: PetscCall(MPIU_Allreduce(&sz, &globalsz, 1, MPIU_INT, MPI_SUM, comm));
1122: PetscCheck(MM == globalsz, comm, PETSC_ERR_SUP, "lost %d equations ?", (int)(MM - globalsz));
1123: }
1124: // cleanup
1125: PetscCall(MatDestroy(&cMat));
1126: PetscCall(PetscFree(lid_cprowID));
1127: PetscCall(PetscFree(lid_max_pe));
1128: PetscCall(PetscFree(lid_matched));
1129: PetscCall(ISDestroy(&info_is));
1130: PetscFunctionReturn(PETSC_SUCCESS);
1131: }
1133: /*
1134: HEM coarsen, simple greedy.
1135: */
1136: static PetscErrorCode MatCoarsenApply_HEM(MatCoarsen coarse)
1137: {
1138: Mat mat = coarse->graph;
1140: PetscFunctionBegin;
1141: PetscCall(MatCoarsenApply_HEM_private(mat, coarse->max_it, coarse->threshold, &coarse->agg_lists));
1142: PetscFunctionReturn(PETSC_SUCCESS);
1143: }
1145: static PetscErrorCode MatCoarsenView_HEM(MatCoarsen coarse, PetscViewer viewer)
1146: {
1147: PetscMPIInt rank;
1148: PetscBool iascii;
1150: PetscFunctionBegin;
1151: PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)coarse), &rank));
1152: PetscCall(PetscObjectTypeCompare((PetscObject)viewer, PETSCVIEWERASCII, &iascii));
1153: if (iascii) {
1154: PetscCDIntNd *pos, *pos2;
1155: PetscCall(PetscViewerASCIIPrintf(viewer, "%d matching steps with threshold = %g\n", (int)coarse->max_it, (double)coarse->threshold));
1156: PetscCall(PetscViewerASCIIPushSynchronized(viewer));
1157: for (PetscInt kk = 0; kk < coarse->agg_lists->size; kk++) {
1158: PetscCall(PetscCDGetHeadPos(coarse->agg_lists, kk, &pos));
1159: if ((pos2 = pos)) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "selected local %d: ", (int)kk));
1160: while (pos) {
1161: PetscInt gid1;
1162: PetscCall(PetscCDIntNdGetID(pos, &gid1));
1163: PetscCall(PetscCDGetNextPos(coarse->agg_lists, kk, &pos));
1164: PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, " %d ", (int)gid1));
1165: }
1166: if (pos2) PetscCall(PetscViewerASCIISynchronizedPrintf(viewer, "\n"));
1167: }
1168: PetscCall(PetscViewerFlush(viewer));
1169: PetscCall(PetscViewerASCIIPopSynchronized(viewer));
1170: }
1171: PetscFunctionReturn(PETSC_SUCCESS);
1172: }
1174: /*
1175: MatCoarsenCreate_HEM - A coarsener that uses HEM a simple greedy coarsener
1176: */
1177: PETSC_EXTERN PetscErrorCode MatCoarsenCreate_HEM(MatCoarsen coarse)
1178: {
1179: PetscFunctionBegin;
1180: coarse->ops->apply = MatCoarsenApply_HEM;
1181: coarse->ops->view = MatCoarsenView_HEM;
1182: coarse->max_it = 4;
1183: PetscFunctionReturn(PETSC_SUCCESS);
1184: }