Actual source code: bddcprivate.c
1: #include <../src/mat/impls/aij/seq/aij.h>
2: #include <petsc/private/pcbddcimpl.h>
3: #include <petsc/private/pcbddcprivateimpl.h>
4: #include <petsc/private/kernels/blockinvert.h>
5: #include <../src/mat/impls/dense/seq/dense.h>
6: #include <petscdmplex.h>
7: #include <petscblaslapack.h>
8: #include <petsc/private/sfimpl.h>
9: #include <petsc/private/dmpleximpl.h>
10: #include <petscdmda.h>
12: static PetscErrorCode MatMPIAIJRestrict(Mat, MPI_Comm, Mat *);
14: /* if range is true, it returns B s.t. span{B} = range(A)
15: if range is false, it returns B s.t. range(B) _|_ range(A) */
16: static PetscErrorCode MatDenseOrthogonalRangeOrComplement(Mat A, PetscBool range, PetscInt lw, PetscScalar *work, PetscReal *rwork, Mat *B)
17: {
18: PetscScalar *uwork, *data, *U, ds = 0.;
19: PetscReal *sing;
20: PetscBLASInt bM, bN, lwork, lierr, di = 1;
21: PetscInt ulw, i, nr, nc, n;
22: #if defined(PETSC_USE_COMPLEX)
23: PetscReal *rwork2;
24: #endif
26: PetscFunctionBegin;
27: PetscCall(MatGetSize(A, &nr, &nc));
28: if (!nr || !nc) PetscFunctionReturn(PETSC_SUCCESS);
30: /* workspace */
31: if (!work) {
32: ulw = PetscMax(PetscMax(1, 5 * PetscMin(nr, nc)), 3 * PetscMin(nr, nc) + PetscMax(nr, nc));
33: PetscCall(PetscMalloc1(ulw, &uwork));
34: } else {
35: ulw = lw;
36: uwork = work;
37: }
38: n = PetscMin(nr, nc);
39: if (!rwork) {
40: PetscCall(PetscMalloc1(n, &sing));
41: } else {
42: sing = rwork;
43: }
45: /* SVD */
46: PetscCall(PetscMalloc1(nr * nr, &U));
47: PetscCall(PetscBLASIntCast(nr, &bM));
48: PetscCall(PetscBLASIntCast(nc, &bN));
49: PetscCall(PetscBLASIntCast(ulw, &lwork));
50: PetscCall(MatDenseGetArray(A, &data));
51: PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
52: #if !defined(PETSC_USE_COMPLEX)
53: PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("A", "N", &bM, &bN, data, &bM, sing, U, &bM, &ds, &di, uwork, &lwork, &lierr));
54: #else
55: PetscCall(PetscMalloc1(5 * n, &rwork2));
56: PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("A", "N", &bM, &bN, data, &bM, sing, U, &bM, &ds, &di, uwork, &lwork, rwork2, &lierr));
57: PetscCall(PetscFree(rwork2));
58: #endif
59: PetscCall(PetscFPTrapPop());
60: PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GESVD Lapack routine %" PetscBLASInt_FMT, lierr);
61: PetscCall(MatDenseRestoreArray(A, &data));
62: for (i = 0; i < n; i++)
63: if (sing[i] < PETSC_SMALL) break;
64: if (!rwork) PetscCall(PetscFree(sing));
65: if (!work) PetscCall(PetscFree(uwork));
66: /* create B */
67: if (!range) {
68: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, nr, nr - i, NULL, B));
69: PetscCall(MatDenseGetArray(*B, &data));
70: PetscCall(PetscArraycpy(data, U + nr * i, (nr - i) * nr));
71: } else {
72: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, nr, i, NULL, B));
73: PetscCall(MatDenseGetArray(*B, &data));
74: PetscCall(PetscArraycpy(data, U, i * nr));
75: }
76: PetscCall(MatDenseRestoreArray(*B, &data));
77: PetscCall(PetscFree(U));
78: PetscFunctionReturn(PETSC_SUCCESS);
79: }
81: /* TODO REMOVE */
82: #if defined(PRINT_GDET)
83: static int inc = 0;
84: static int lev = 0;
85: #endif
87: static PetscErrorCode PCBDDCComputeNedelecChangeEdge(Mat lG, IS edge, IS extrow, IS extcol, IS corners, Mat *Gins, Mat *GKins, PetscScalar cvals[2], PetscScalar *work, PetscReal *rwork)
88: {
89: Mat GE, GEd;
90: PetscInt rsize, csize, esize;
91: PetscScalar *ptr;
93: PetscFunctionBegin;
94: PetscCall(ISGetSize(edge, &esize));
95: if (!esize) PetscFunctionReturn(PETSC_SUCCESS);
96: PetscCall(ISGetSize(extrow, &rsize));
97: PetscCall(ISGetSize(extcol, &csize));
99: /* gradients */
100: ptr = work + 5 * esize;
101: PetscCall(MatCreateSubMatrix(lG, extrow, extcol, MAT_INITIAL_MATRIX, &GE));
102: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, rsize, csize, ptr, Gins));
103: PetscCall(MatConvert(GE, MATSEQDENSE, MAT_REUSE_MATRIX, Gins));
104: PetscCall(MatDestroy(&GE));
106: /* constants */
107: ptr += rsize * csize;
108: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, esize, csize, ptr, &GEd));
109: PetscCall(MatCreateSubMatrix(lG, edge, extcol, MAT_INITIAL_MATRIX, &GE));
110: PetscCall(MatConvert(GE, MATSEQDENSE, MAT_REUSE_MATRIX, &GEd));
111: PetscCall(MatDestroy(&GE));
112: PetscCall(MatDenseOrthogonalRangeOrComplement(GEd, PETSC_FALSE, 5 * esize, work, rwork, GKins));
113: PetscCall(MatDestroy(&GEd));
115: if (corners) {
116: Mat GEc;
117: const PetscScalar *vals;
118: PetscScalar v;
120: PetscCall(MatCreateSubMatrix(lG, edge, corners, MAT_INITIAL_MATRIX, &GEc));
121: PetscCall(MatTransposeMatMult(GEc, *GKins, MAT_INITIAL_MATRIX, 1.0, &GEd));
122: PetscCall(MatDenseGetArrayRead(GEd, &vals));
123: /* v = PetscAbsScalar(vals[0]); */
124: v = 1.;
125: cvals[0] = vals[0] / v;
126: cvals[1] = vals[1] / v;
127: PetscCall(MatDenseRestoreArrayRead(GEd, &vals));
128: PetscCall(MatScale(*GKins, 1. / v));
129: #if defined(PRINT_GDET)
130: {
131: PetscViewer viewer;
132: char filename[256];
133: PetscCall(PetscSNPrintf(filename, PETSC_STATIC_ARRAY_LENGTH(filename), "Gdet_l%d_r%d_cc%d.m", lev, PetscGlobalRank, inc++));
134: PetscCall(PetscViewerASCIIOpen(PETSC_COMM_SELF, filename, &viewer));
135: PetscCall(PetscViewerPushFormat(viewer, PETSC_VIEWER_ASCII_MATLAB));
136: PetscCall(PetscObjectSetName((PetscObject)GEc, "GEc"));
137: PetscCall(MatView(GEc, viewer));
138: PetscCall(PetscObjectSetName((PetscObject)*GKins, "GK"));
139: PetscCall(MatView(*GKins, viewer));
140: PetscCall(PetscObjectSetName((PetscObject)GEd, "Gproj"));
141: PetscCall(MatView(GEd, viewer));
142: PetscCall(PetscViewerDestroy(&viewer));
143: }
144: #endif
145: PetscCall(MatDestroy(&GEd));
146: PetscCall(MatDestroy(&GEc));
147: }
148: PetscFunctionReturn(PETSC_SUCCESS);
149: }
151: static PetscErrorCode MatAIJExtractRows(Mat, IS, Mat *);
153: PetscErrorCode PCBDDCNedelecSupport(PC pc)
154: {
155: PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
156: Mat_IS *matis = (Mat_IS *)pc->pmat->data;
157: Mat G, T, conn, lG, lGt, lGis, lGall, lGe, lGinit;
158: PetscSF sfv;
159: ISLocalToGlobalMapping el2g, vl2g, fl2g, al2g;
160: MPI_Comm comm;
161: IS lned, primals, allprimals, nedfieldlocal, elements_corners = NULL;
162: IS *eedges, *extrows, *extcols, *alleedges;
163: PetscBT btv, bte, btvc, btb, btbd, btvcand, btvi, btee, bter;
164: PetscScalar *vals, *work;
165: PetscReal *rwork;
166: const PetscInt *idxs, *ii, *jj, *iit, *jjt;
167: PetscInt ne, nv, Lv, order, n, field;
168: PetscInt i, j, extmem, cum, maxsize, nee;
169: PetscInt *extrow, *extrowcum, *marks, *vmarks, *gidxs;
170: PetscInt *sfvleaves, *sfvroots;
171: PetscInt *corners, *cedges;
172: PetscInt *ecount, **eneighs, *vcount, **vneighs;
173: PetscInt *emarks;
174: PetscBool print, eerr, done, lrc[2], conforming, global, setprimal;
176: PetscFunctionBegin;
177: /* If the discrete gradient is defined for a subset of dofs and global is true,
178: it assumes G is given in global ordering for all the dofs.
179: Otherwise, the ordering is global for the Nedelec field */
180: order = pcbddc->nedorder;
181: conforming = pcbddc->conforming;
182: field = pcbddc->nedfield;
183: global = pcbddc->nedglobal;
184: setprimal = PETSC_FALSE;
185: print = PETSC_FALSE;
187: /* Command line customization */
188: PetscOptionsBegin(PetscObjectComm((PetscObject)pc), ((PetscObject)pc)->prefix, "BDDC Nedelec options", "PC");
189: PetscCall(PetscOptionsBool("-pc_bddc_nedelec_field_primal", "All edge dofs set as primals: Toselli's algorithm C", NULL, setprimal, &setprimal, NULL));
190: /* print debug info and adaptive order TODO: to be removed */
191: PetscCall(PetscOptionsInt("-pc_bddc_nedelec_order", "Test variable order code (to be removed)", NULL, order, &order, NULL));
192: PetscCall(PetscOptionsBool("-pc_bddc_nedelec_print", "Print debug info", NULL, print, &print, NULL));
193: PetscOptionsEnd();
195: /* Return if there are no edges in the decomposition */
196: PetscCall(MatISGetLocalToGlobalMapping(pc->pmat, &al2g, NULL));
197: PetscCall(ISLocalToGlobalMappingGetSize(al2g, &n));
198: PetscCall(PetscObjectGetComm((PetscObject)pc, &comm));
199: PetscCall(VecGetArrayRead(matis->counter, (const PetscScalar **)&vals));
200: lrc[0] = PETSC_FALSE;
201: for (i = 0; i < n; i++) {
202: if (PetscRealPart(vals[i]) > 2.) {
203: lrc[0] = PETSC_TRUE;
204: break;
205: }
206: }
207: PetscCall(VecRestoreArrayRead(matis->counter, (const PetscScalar **)&vals));
208: PetscCallMPI(MPIU_Allreduce(&lrc[0], &lrc[1], 1, MPI_C_BOOL, MPI_LOR, comm));
209: if (!lrc[1]) PetscFunctionReturn(PETSC_SUCCESS);
211: /* Get Nedelec field */
212: PetscCheck(!pcbddc->n_ISForDofsLocal || field < pcbddc->n_ISForDofsLocal, comm, PETSC_ERR_USER, "Invalid field for Nedelec %" PetscInt_FMT ": number of fields is %" PetscInt_FMT, field, pcbddc->n_ISForDofsLocal);
213: if (pcbddc->n_ISForDofsLocal && field >= 0) {
214: PetscCall(PetscObjectReference((PetscObject)pcbddc->ISForDofsLocal[field]));
215: nedfieldlocal = pcbddc->ISForDofsLocal[field];
216: PetscCall(ISGetLocalSize(nedfieldlocal, &ne));
217: } else if (!pcbddc->n_ISForDofsLocal && field != PETSC_DECIDE) {
218: ne = n;
219: nedfieldlocal = NULL;
220: global = PETSC_TRUE;
221: } else if (field == PETSC_DECIDE) {
222: PetscInt rst, ren, *idx;
224: PetscCall(PetscArrayzero(matis->sf_leafdata, n));
225: PetscCall(PetscArrayzero(matis->sf_rootdata, pc->pmat->rmap->n));
226: PetscCall(MatGetOwnershipRange(pcbddc->discretegradient, &rst, &ren));
227: for (i = rst; i < ren; i++) {
228: PetscInt nc;
230: PetscCall(MatGetRow(pcbddc->discretegradient, i, &nc, NULL, NULL));
231: if (nc > 1) matis->sf_rootdata[i - rst] = 1;
232: PetscCall(MatRestoreRow(pcbddc->discretegradient, i, &nc, NULL, NULL));
233: }
234: PetscCall(PetscSFBcastBegin(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE));
235: PetscCall(PetscSFBcastEnd(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE));
236: PetscCall(PetscMalloc1(n, &idx));
237: for (i = 0, ne = 0; i < n; i++)
238: if (matis->sf_leafdata[i]) idx[ne++] = i;
239: PetscCall(ISCreateGeneral(comm, ne, idx, PETSC_OWN_POINTER, &nedfieldlocal));
240: } else {
241: SETERRQ(comm, PETSC_ERR_USER, "When multiple fields are present, the Nedelec field has to be specified");
242: }
244: /* Sanity checks */
245: PetscCheck(order || conforming, comm, PETSC_ERR_SUP, "Variable order and non-conforming spaces are not supported at the same time");
246: PetscCheck(!pcbddc->user_ChangeOfBasisMatrix, comm, PETSC_ERR_SUP, "Cannot generate Nedelec support with user defined change of basis");
247: PetscCheck(!order || (ne % order == 0), PETSC_COMM_SELF, PETSC_ERR_USER, "The number of local edge dofs %" PetscInt_FMT " is not a multiple of the order %" PetscInt_FMT, ne, order);
249: /* Just set primal dofs and return */
250: if (setprimal) {
251: IS enedfieldlocal;
252: PetscInt *eidxs;
254: PetscCall(PetscMalloc1(ne, &eidxs));
255: PetscCall(VecGetArrayRead(matis->counter, (const PetscScalar **)&vals));
256: if (nedfieldlocal) {
257: PetscCall(ISGetIndices(nedfieldlocal, &idxs));
258: for (i = 0, cum = 0; i < ne; i++) {
259: if (PetscRealPart(vals[idxs[i]]) > 2.) eidxs[cum++] = idxs[i];
260: }
261: PetscCall(ISRestoreIndices(nedfieldlocal, &idxs));
262: } else {
263: for (i = 0, cum = 0; i < ne; i++) {
264: if (PetscRealPart(vals[i]) > 2.) eidxs[cum++] = i;
265: }
266: }
267: PetscCall(VecRestoreArrayRead(matis->counter, (const PetscScalar **)&vals));
268: PetscCall(ISCreateGeneral(comm, cum, eidxs, PETSC_COPY_VALUES, &enedfieldlocal));
269: PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, enedfieldlocal));
270: PetscCall(PetscFree(eidxs));
271: PetscCall(ISDestroy(&nedfieldlocal));
272: PetscCall(ISDestroy(&enedfieldlocal));
273: PetscFunctionReturn(PETSC_SUCCESS);
274: }
276: /* Compute some l2g maps */
277: if (nedfieldlocal) {
278: IS is;
280: /* need to map from the local Nedelec field to local numbering */
281: PetscCall(ISLocalToGlobalMappingCreateIS(nedfieldlocal, &fl2g));
282: /* need to map from the local Nedelec field to global numbering for the whole dofs*/
283: PetscCall(ISLocalToGlobalMappingApplyIS(al2g, nedfieldlocal, &is));
284: PetscCall(ISLocalToGlobalMappingCreateIS(is, &al2g));
285: /* need to map from the local Nedelec field to global numbering (for Nedelec only) */
286: if (global) {
287: PetscCall(PetscObjectReference((PetscObject)al2g));
288: el2g = al2g;
289: } else {
290: IS gis;
292: PetscCall(ISRenumber(is, NULL, NULL, &gis));
293: PetscCall(ISLocalToGlobalMappingCreateIS(gis, &el2g));
294: PetscCall(ISDestroy(&gis));
295: }
296: PetscCall(ISDestroy(&is));
297: } else {
298: /* one ref for the destruction of al2g, one for el2g */
299: PetscCall(PetscObjectReference((PetscObject)al2g));
300: PetscCall(PetscObjectReference((PetscObject)al2g));
301: el2g = al2g;
302: fl2g = NULL;
303: }
305: /* Start communication to drop connections for interior edges (for cc analysis only) */
306: PetscCall(PetscArrayzero(matis->sf_leafdata, n));
307: PetscCall(PetscArrayzero(matis->sf_rootdata, pc->pmat->rmap->n));
308: if (nedfieldlocal) {
309: PetscCall(ISGetIndices(nedfieldlocal, &idxs));
310: for (i = 0; i < ne; i++) matis->sf_leafdata[idxs[i]] = 1;
311: PetscCall(ISRestoreIndices(nedfieldlocal, &idxs));
312: } else {
313: for (i = 0; i < ne; i++) matis->sf_leafdata[i] = 1;
314: }
315: PetscCall(PetscSFReduceBegin(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, MPI_SUM));
316: PetscCall(PetscSFReduceEnd(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, MPI_SUM));
318: /* There's no way to detect all possible corner candidates in a element-by-element case in a pure algebraic setting
319: Firedrake attaches a index set to identify them upfront. If it is present, we assume we are in such a case */
320: if (matis->allow_repeated) PetscCall(PetscObjectQuery((PetscObject)pcbddc->discretegradient, "_elements_corners", (PetscObject *)&elements_corners));
322: /* drop connections with interior edges to avoid unneeded communications and memory movements */
323: PetscCall(MatViewFromOptions(pcbddc->discretegradient, (PetscObject)pc, "-pc_bddc_discrete_gradient_view"));
324: PetscCall(MatDuplicate(pcbddc->discretegradient, MAT_COPY_VALUES, &G));
325: PetscCall(MatSetOption(G, MAT_KEEP_NONZERO_PATTERN, PETSC_FALSE));
326: if (global) {
327: PetscInt rst;
329: PetscCall(MatGetOwnershipRange(G, &rst, NULL));
330: for (i = 0, cum = 0; i < pc->pmat->rmap->n; i++) {
331: if (matis->sf_rootdata[i] < 2) matis->sf_rootdata[cum++] = i + rst;
332: }
333: PetscCall(MatSetOption(G, MAT_NO_OFF_PROC_ZERO_ROWS, PETSC_TRUE));
334: PetscCall(MatZeroRows(G, cum, matis->sf_rootdata, 0., NULL, NULL));
335: } else {
336: PetscInt *tbz;
338: PetscCall(PetscMalloc1(ne, &tbz));
339: PetscCall(PetscSFBcastBegin(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE));
340: PetscCall(PetscSFBcastEnd(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE));
341: PetscCall(ISGetIndices(nedfieldlocal, &idxs));
342: for (i = 0, cum = 0; i < ne; i++)
343: if (matis->sf_leafdata[idxs[i]] == 1) tbz[cum++] = i;
344: PetscCall(ISRestoreIndices(nedfieldlocal, &idxs));
345: PetscCall(ISLocalToGlobalMappingApply(el2g, cum, tbz, tbz));
346: PetscCall(MatZeroRows(G, cum, tbz, 0., NULL, NULL));
347: PetscCall(PetscFree(tbz));
348: }
350: /* Extract subdomain relevant rows of G */
351: PetscCall(ISLocalToGlobalMappingGetIndices(el2g, &idxs));
352: PetscCall(ISCreateGeneral(comm, ne, idxs, PETSC_USE_POINTER, &lned));
353: PetscCall(MatAIJExtractRows(G, lned, &lGall));
354: /* PetscCall(MatCreateSubMatrix(G, lned, NULL, MAT_INITIAL_MATRIX, &lGall)); */
355: PetscCall(ISLocalToGlobalMappingRestoreIndices(el2g, &idxs));
356: PetscCall(ISDestroy(&lned));
357: PetscCall(MatConvert(lGall, MATIS, MAT_INITIAL_MATRIX, &lGis));
358: PetscCall(MatDestroy(&lGall));
359: PetscCall(MatISGetLocalMat(lGis, &lG));
360: if (matis->allow_repeated) { /* multi-element support */
361: Mat *lGn, B;
362: IS *is_rows, *tcols, tmap, nmap;
363: PetscInt subnv;
364: const PetscInt *subvidxs;
365: ISLocalToGlobalMapping mapn;
367: PetscCall(PetscCalloc1(pcbddc->n_local_subs * pcbddc->n_local_subs, &lGn));
368: PetscCall(PetscMalloc1(pcbddc->n_local_subs, &is_rows));
369: PetscCall(PetscMalloc1(pcbddc->n_local_subs, &tcols));
370: for (PetscInt i = 0; i < pcbddc->n_local_subs; i++) {
371: if (fl2g) {
372: PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_MASK, pcbddc->local_subs[i], &is_rows[i]));
373: } else {
374: PetscCall(PetscObjectReference((PetscObject)pcbddc->local_subs[i]));
375: is_rows[i] = pcbddc->local_subs[i];
376: }
377: PetscCall(MatCreateSubMatrix(lG, is_rows[i], NULL, MAT_INITIAL_MATRIX, &lGn[i * (1 + pcbddc->n_local_subs)]));
378: PetscCall(MatSeqAIJCompactOutExtraColumns_SeqAIJ(lGn[i * (1 + pcbddc->n_local_subs)], &mapn));
379: PetscCall(ISLocalToGlobalMappingGetSize(mapn, &subnv));
380: PetscCall(ISLocalToGlobalMappingGetIndices(mapn, &subvidxs));
381: PetscCall(ISCreateGeneral(PETSC_COMM_SELF, subnv, subvidxs, PETSC_COPY_VALUES, &tcols[i]));
382: PetscCall(ISLocalToGlobalMappingRestoreIndices(mapn, &subvidxs));
383: PetscCall(ISLocalToGlobalMappingDestroy(&mapn));
384: }
386: /* Create new MATIS with repeated vertices */
387: PetscCall(MatCreate(comm, &B));
388: PetscCall(MatSetSizes(B, lGis->rmap->n, lGis->cmap->n, lGis->rmap->N, lGis->cmap->N));
389: PetscCall(MatSetType(B, MATIS));
390: PetscCall(MatISSetAllowRepeated(B, PETSC_TRUE));
391: PetscCall(ISConcatenate(PETSC_COMM_SELF, pcbddc->n_local_subs, tcols, &tmap));
392: PetscCall(ISLocalToGlobalMappingApplyIS(lGis->cmap->mapping, tmap, &nmap));
393: PetscCall(ISDestroy(&tmap));
394: PetscCall(ISGetLocalSize(nmap, &subnv));
395: PetscCall(ISGetIndices(nmap, &subvidxs));
396: PetscCall(ISCreateGeneral(comm, subnv, subvidxs, PETSC_USE_POINTER, &tmap));
397: PetscCall(ISRestoreIndices(nmap, &subvidxs));
398: PetscCall(ISLocalToGlobalMappingCreateIS(tmap, &mapn));
399: PetscCall(ISDestroy(&tmap));
400: PetscCall(ISDestroy(&nmap));
401: PetscCall(MatSetLocalToGlobalMapping(B, lGis->rmap->mapping, mapn));
402: PetscCall(ISLocalToGlobalMappingDestroy(&mapn));
403: PetscCall(MatCreateNest(PETSC_COMM_SELF, pcbddc->n_local_subs, is_rows, pcbddc->n_local_subs, NULL, lGn, &lG));
404: for (PetscInt i = 0; i < pcbddc->n_local_subs; i++) {
405: PetscCall(MatDestroy(&lGn[i * (1 + pcbddc->n_local_subs)]));
406: PetscCall(ISDestroy(&is_rows[i]));
407: PetscCall(ISDestroy(&tcols[i]));
408: }
409: PetscCall(MatConvert(lG, MATSEQAIJ, MAT_INPLACE_MATRIX, &lG));
410: PetscCall(PetscFree(lGn));
411: PetscCall(PetscFree(is_rows));
412: PetscCall(PetscFree(tcols));
413: PetscCall(MatISSetLocalMat(B, lG));
414: PetscCall(MatDestroy(&lG));
416: PetscCall(MatDestroy(&lGis));
417: lGis = B;
419: lGis->assembled = PETSC_TRUE;
420: }
421: PetscCall(MatViewFromOptions(lGis, (PetscObject)pc, "-pc_bddc_nedelec_init_G_view"));
423: /* SF for nodal dofs communications */
424: PetscCall(MatGetLocalSize(G, NULL, &Lv));
425: PetscCall(MatISGetLocalToGlobalMapping(lGis, NULL, &vl2g));
426: PetscCall(PetscObjectReference((PetscObject)vl2g));
427: PetscCall(ISLocalToGlobalMappingGetSize(vl2g, &nv));
428: PetscCall(PetscSFCreate(comm, &sfv));
429: PetscCall(ISLocalToGlobalMappingGetIndices(vl2g, &idxs));
430: PetscCall(PetscSFSetGraphLayout(sfv, lGis->cmap, nv, NULL, PETSC_OWN_POINTER, idxs));
431: PetscCall(ISLocalToGlobalMappingRestoreIndices(vl2g, &idxs));
433: if (elements_corners) {
434: IS tmp;
435: Vec global, local;
436: Mat_IS *tGis = (Mat_IS *)lGis->data;
438: PetscCall(MatCreateVecs(lGis, &global, NULL));
439: PetscCall(MatCreateVecs(tGis->A, &local, NULL));
440: PetscCall(PCBDDCGlobalToLocal(tGis->cctx, global, local, elements_corners, &tmp));
441: PetscCall(VecDestroy(&global));
442: PetscCall(VecDestroy(&local));
443: elements_corners = tmp;
444: }
446: /* Destroy temporary G */
447: PetscCall(MatISGetLocalMat(lGis, &lG));
448: PetscCall(PetscObjectReference((PetscObject)lG));
449: PetscCall(MatDestroy(&G));
450: PetscCall(MatDestroy(&lGis));
452: if (print) {
453: PetscCall(PetscObjectSetName((PetscObject)lG, "initial_lG"));
454: PetscCall(MatView(lG, NULL));
455: }
457: /* Save lG for values insertion in change of basis */
458: PetscCall(MatDuplicate(lG, MAT_COPY_VALUES, &lGinit));
460: /* Analyze the edge-nodes connections (duplicate lG) */
461: PetscCall(MatDuplicate(lG, MAT_COPY_VALUES, &lGe));
462: PetscCall(MatSetOption(lGe, MAT_KEEP_NONZERO_PATTERN, PETSC_FALSE));
463: PetscCall(PetscBTCreate(nv, &btv));
464: PetscCall(PetscBTCreate(ne, &bte));
465: PetscCall(PetscBTCreate(ne, &btb));
466: PetscCall(PetscBTCreate(ne, &btbd));
467: /* need to import the boundary specification to ensure the
468: proper detection of coarse edges' endpoints */
469: if (pcbddc->DirichletBoundariesLocal) {
470: IS is;
472: if (fl2g) {
473: PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_MASK, pcbddc->DirichletBoundariesLocal, &is));
474: } else {
475: is = pcbddc->DirichletBoundariesLocal;
476: }
477: PetscCall(ISGetLocalSize(is, &cum));
478: PetscCall(ISGetIndices(is, &idxs));
479: for (i = 0; i < cum; i++) {
480: if (idxs[i] >= 0 && idxs[i] < ne) {
481: PetscCall(PetscBTSet(btb, idxs[i]));
482: PetscCall(PetscBTSet(btbd, idxs[i]));
483: }
484: }
485: PetscCall(ISRestoreIndices(is, &idxs));
486: if (fl2g) PetscCall(ISDestroy(&is));
487: }
488: if (pcbddc->NeumannBoundariesLocal) {
489: IS is;
491: if (fl2g) {
492: PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_MASK, pcbddc->NeumannBoundariesLocal, &is));
493: } else {
494: is = pcbddc->NeumannBoundariesLocal;
495: }
496: PetscCall(ISGetLocalSize(is, &cum));
497: PetscCall(ISGetIndices(is, &idxs));
498: for (i = 0; i < cum; i++) {
499: if (idxs[i] >= 0 && idxs[i] < ne) PetscCall(PetscBTSet(btb, idxs[i]));
500: }
501: PetscCall(ISRestoreIndices(is, &idxs));
502: if (fl2g) PetscCall(ISDestroy(&is));
503: }
505: /* Count neighs per dof */
506: PetscCall(ISLocalToGlobalMappingGetNodeInfo(el2g, NULL, &ecount, NULL));
507: PetscCall(ISLocalToGlobalMappingGetNodeInfo(vl2g, NULL, &vcount, NULL));
509: /* need to remove coarse faces' dofs and coarse edges' dirichlet dofs
510: for proper detection of coarse edges' endpoints */
511: PetscCall(PetscBTCreate(ne, &btee));
512: for (i = 0; i < ne; i++) {
513: if ((ecount[i] > 2 && !PetscBTLookup(btbd, i)) || (ecount[i] == 2 && PetscBTLookup(btb, i))) PetscCall(PetscBTSet(btee, i));
514: }
515: PetscCall(PetscMalloc1(ne, &marks));
516: if (!conforming) {
517: PetscCall(MatTranspose(lGe, MAT_INITIAL_MATRIX, &lGt));
518: PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done));
519: }
520: PetscCall(MatGetRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
521: PetscCall(MatSeqAIJGetArray(lGe, &vals));
522: cum = 0;
523: for (i = 0; i < ne; i++) {
524: /* eliminate rows corresponding to edge dofs belonging to coarse faces */
525: if (!PetscBTLookup(btee, i)) {
526: marks[cum++] = i;
527: continue;
528: }
529: /* set badly connected edge dofs as primal */
530: if (!conforming) {
531: if (ii[i + 1] - ii[i] != order + 1) { /* every row of G on the coarse edge should list order+1 nodal dofs */
532: marks[cum++] = i;
533: PetscCall(PetscBTSet(bte, i));
534: for (j = ii[i]; j < ii[i + 1]; j++) PetscCall(PetscBTSet(btv, jj[j]));
535: } else {
536: /* every edge dofs should be connected through a certain number of nodal dofs
537: to other edge dofs belonging to coarse edges
538: - at most 2 endpoints
539: - order-1 interior nodal dofs
540: - no undefined nodal dofs (nconn < order)
541: */
542: PetscInt ends = 0, ints = 0, undef = 0;
543: for (j = ii[i]; j < ii[i + 1]; j++) {
544: PetscInt v = jj[j], k;
545: PetscInt nconn = iit[v + 1] - iit[v];
546: for (k = iit[v]; k < iit[v + 1]; k++)
547: if (!PetscBTLookup(btee, jjt[k])) nconn--;
548: if (nconn > order) ends++;
549: else if (nconn == order) ints++;
550: else undef++;
551: }
552: if (undef || ends > 2 || ints != order - 1) {
553: marks[cum++] = i;
554: PetscCall(PetscBTSet(bte, i));
555: for (j = ii[i]; j < ii[i + 1]; j++) PetscCall(PetscBTSet(btv, jj[j]));
556: }
557: }
558: }
559: /* We assume the order on the element edge is ii[i+1]-ii[i]-1 */
560: if (!order && ii[i + 1] != ii[i]) {
561: PetscScalar val = 1. / (ii[i + 1] - ii[i] - 1);
562: for (j = ii[i]; j < ii[i + 1]; j++) vals[j] = val;
563: }
564: }
565: PetscCall(PetscBTDestroy(&btee));
566: PetscCall(MatSeqAIJRestoreArray(lGe, &vals));
567: PetscCall(MatRestoreRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
568: if (!conforming) {
569: PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done));
570: PetscCall(MatDestroy(&lGt));
571: }
572: PetscCall(MatZeroRows(lGe, cum, marks, 0., NULL, NULL));
574: /* identify splitpoints and corner candidates */
575: PetscCall(PetscMalloc2(nv, &sfvleaves, Lv, &sfvroots));
576: PetscCall(PetscBTCreate(nv, &btvcand));
577: if (elements_corners) {
578: PetscCall(ISGetLocalSize(elements_corners, &cum));
579: PetscCall(ISGetIndices(elements_corners, &idxs));
580: for (i = 0; i < cum; i++) PetscCall(PetscBTSet(btvcand, idxs[i]));
581: PetscCall(ISRestoreIndices(elements_corners, &idxs));
582: }
584: if (matis->allow_repeated) { /* assign a uniq global id to edge local subsets and communicate it with nodal space */
585: PetscSF emlsf, vmlsf;
586: PetscInt *eleaves, *vleaves, *meleaves, *mvleaves;
587: PetscInt cum_subs = 0, n_subs = pcbddc->n_local_subs, bs, emnr, emnl, vmnr, vmnl;
589: PetscCall(ISLocalToGlobalMappingGetBlockSize(el2g, &bs));
590: PetscCheck(bs == 1, comm, PETSC_ERR_SUP, "Not coded");
591: PetscCall(ISLocalToGlobalMappingGetBlockSize(vl2g, &bs));
592: PetscCheck(bs == 1, comm, PETSC_ERR_SUP, "Not coded");
594: PetscCall(ISLocalToGlobalMappingGetBlockMultiLeavesSF(el2g, &emlsf));
595: PetscCall(ISLocalToGlobalMappingGetBlockMultiLeavesSF(vl2g, &vmlsf));
597: PetscCall(PetscSFGetGraph(emlsf, &emnr, &emnl, NULL, NULL));
598: for (i = 0, j = 0; i < ne; i++) j += ecount[i];
599: PetscCheck(emnr == ne, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of roots in edge multi-leaves SF %" PetscInt_FMT " != %" PetscInt_FMT, emnr, ne);
600: PetscCheck(emnl == j, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of leaves in edge multi-leaves SF %" PetscInt_FMT " != %" PetscInt_FMT, emnl, j);
602: PetscCall(PetscSFGetGraph(vmlsf, &vmnr, &vmnl, NULL, NULL));
603: for (i = 0, j = 0; i < nv; i++) j += vcount[i];
604: PetscCheck(vmnr == nv, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of roots in nodal multi-leaves SF %" PetscInt_FMT " != %" PetscInt_FMT, vmnr, nv);
605: PetscCheck(vmnl == j, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of leaves in nodal multi-leaves SF %" PetscInt_FMT " != %" PetscInt_FMT, vmnl, j);
607: PetscCall(PetscMalloc1(ne, &eleaves));
608: PetscCall(PetscMalloc1(nv, &vleaves));
609: for (i = 0; i < ne; i++) eleaves[i] = PETSC_INT_MAX;
610: for (i = 0; i < nv; i++) vleaves[i] = PETSC_INT_MAX;
611: PetscCall(PetscMalloc1(emnl, &meleaves));
612: PetscCall(PetscMalloc1(vmnl, &mvleaves));
614: PetscCallMPI(MPI_Exscan(&n_subs, &cum_subs, 1, MPIU_INT, MPI_SUM, comm));
615: PetscCall(MatGetRowIJ(lGinit, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
616: for (i = 0; i < n_subs; i++) {
617: const PetscInt *idxs;
618: const PetscInt subid = cum_subs + i;
619: PetscInt ns;
621: PetscCall(ISGetLocalSize(pcbddc->local_subs[i], &ns));
622: PetscCall(ISGetIndices(pcbddc->local_subs[i], &idxs));
623: for (j = 0; j < ns; j++) {
624: const PetscInt e = idxs[j];
626: eleaves[e] = subid;
627: for (PetscInt k = ii[e]; k < ii[e + 1]; k++) vleaves[jj[k]] = subid;
628: }
629: PetscCall(ISRestoreIndices(pcbddc->local_subs[i], &idxs));
630: }
631: PetscCall(MatRestoreRowIJ(lGinit, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
632: PetscCall(PetscSFBcastBegin(emlsf, MPIU_INT, eleaves, meleaves, MPI_REPLACE));
633: PetscCall(PetscSFBcastEnd(emlsf, MPIU_INT, eleaves, meleaves, MPI_REPLACE));
634: PetscCall(PetscSFBcastBegin(vmlsf, MPIU_INT, vleaves, mvleaves, MPI_REPLACE));
635: PetscCall(PetscSFBcastEnd(vmlsf, MPIU_INT, vleaves, mvleaves, MPI_REPLACE));
636: PetscCall(PetscFree(eleaves));
637: PetscCall(PetscFree(vleaves));
639: PetscCall(PetscMalloc1(ne + 1, &eneighs));
640: eneighs[0] = meleaves;
641: for (i = 0; i < ne; i++) {
642: PetscCall(PetscSortInt(ecount[i], eneighs[i]));
643: eneighs[i + 1] = eneighs[i] + ecount[i];
644: }
645: PetscCall(PetscMalloc1(nv + 1, &vneighs));
646: vneighs[0] = mvleaves;
647: for (i = 0; i < nv; i++) {
648: PetscCall(PetscSortInt(vcount[i], vneighs[i]));
649: vneighs[i + 1] = vneighs[i] + vcount[i];
650: }
651: } else {
652: PetscCall(ISLocalToGlobalMappingGetNodeInfo(el2g, NULL, NULL, &eneighs));
653: PetscCall(ISLocalToGlobalMappingGetNodeInfo(vl2g, NULL, NULL, &vneighs));
654: }
656: PetscCall(MatTranspose(lGe, MAT_INITIAL_MATRIX, &lGt));
657: if (print) {
658: PetscCall(PetscObjectSetName((PetscObject)lGe, "edgerestr_lG"));
659: PetscCall(MatView(lGe, NULL));
660: PetscCall(PetscObjectSetName((PetscObject)lGt, "edgerestr_lGt"));
661: PetscCall(MatView(lGt, NULL));
662: }
663: PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
664: PetscCall(MatSeqAIJGetArray(lGt, &vals));
665: for (i = 0; i < nv; i++) {
666: PetscInt ord = order, test = ii[i + 1] - ii[i], vc = vcount[i];
667: PetscBool sneighs = PETSC_TRUE, bdir = PETSC_FALSE;
668: if (!order) { /* variable order */
669: PetscReal vorder = 0.;
671: for (j = ii[i]; j < ii[i + 1]; j++) vorder += PetscRealPart(vals[j]);
672: test = PetscFloorReal(vorder + 10. * PETSC_SQRT_MACHINE_EPSILON);
673: PetscCheck(vorder - test <= PETSC_SQRT_MACHINE_EPSILON, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected value for vorder: %g (%" PetscInt_FMT ")", (double)vorder, test);
674: ord = 1;
675: }
676: for (j = ii[i]; j < ii[i + 1] && sneighs; j++) {
677: const PetscInt e = jj[j];
679: if (PetscBTLookup(btbd, e)) {
680: bdir = PETSC_TRUE;
681: break;
682: }
683: if (vc != ecount[e]) {
684: sneighs = PETSC_FALSE;
685: } else {
686: const PetscInt *vn = vneighs[i], *en = eneighs[e];
688: for (PetscInt k = 0; k < vc; k++) {
689: if (vn[k] != en[k]) {
690: sneighs = PETSC_FALSE;
691: break;
692: }
693: }
694: }
695: }
696: if (elements_corners) test = 0;
697: if (!sneighs || test >= 3 * ord || bdir) { /* splitpoints */
698: if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "SPLITPOINT %" PetscInt_FMT " (%s %s %s)\n", i, PetscBools[!sneighs], PetscBools[test >= 3 * ord], PetscBools[bdir]));
699: PetscCall(PetscBTSet(btv, i));
700: } else if (test == ord) {
701: if (order == 1 || (!order && ii[i + 1] - ii[i] == 1)) {
702: if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "ENDPOINT %" PetscInt_FMT "\n", i));
703: PetscCall(PetscBTSet(btv, i));
704: } else if (!elements_corners) {
705: if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "CORNER CANDIDATE %" PetscInt_FMT "\n", i));
706: PetscCall(PetscBTSet(btvcand, i));
707: }
708: }
709: }
710: PetscCall(PetscBTDestroy(&btbd));
712: /* a candidate is valid if it is connected to another candidate via a non-primal edge dof */
713: if (order != 1) {
714: if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "INSPECTING CANDIDATES\n"));
715: PetscCall(MatGetRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done));
716: for (i = 0; i < nv; i++) {
717: if (PetscBTLookup(btvcand, i)) {
718: PetscBool found = PETSC_FALSE;
719: for (j = ii[i]; j < ii[i + 1] && !found; j++) {
720: PetscInt k, e = jj[j];
721: if (PetscBTLookup(bte, e)) continue;
722: for (k = iit[e]; k < iit[e + 1]; k++) {
723: PetscInt v = jjt[k];
724: if (v != i && PetscBTLookup(btvcand, v)) {
725: found = PETSC_TRUE;
726: break;
727: }
728: }
729: }
730: if (!found) {
731: if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " CANDIDATE %" PetscInt_FMT " CLEARED\n", i));
732: PetscCall(PetscBTClear(btvcand, i));
733: } else {
734: if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " CANDIDATE %" PetscInt_FMT " ACCEPTED\n", i));
735: }
736: }
737: }
738: PetscCall(MatRestoreRowIJ(lGe, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done));
739: }
740: PetscCall(MatSeqAIJRestoreArray(lGt, &vals));
741: PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
742: PetscCall(MatDestroy(&lGe));
744: /* Get the local G^T explicitly */
745: PetscCall(MatDestroy(&lGt));
746: PetscCall(MatTranspose(lG, MAT_INITIAL_MATRIX, &lGt));
747: PetscCall(MatSetOption(lGt, MAT_KEEP_NONZERO_PATTERN, PETSC_FALSE));
749: /* Mark shared nodal dofs */
750: PetscCall(PetscBTCreate(nv, &btvi));
751: for (i = 0; i < nv; i++) {
752: if (vcount[i] > 1) PetscCall(PetscBTSet(btvi, i));
753: }
755: if (matis->allow_repeated) {
756: PetscCall(PetscFree(eneighs[0]));
757: PetscCall(PetscFree(vneighs[0]));
758: PetscCall(PetscFree(eneighs));
759: PetscCall(PetscFree(vneighs));
760: }
761: PetscCall(ISLocalToGlobalMappingRestoreNodeInfo(el2g, NULL, &ecount, &eneighs));
762: PetscCall(ISLocalToGlobalMappingRestoreNodeInfo(vl2g, NULL, &vcount, &vneighs));
764: /* communicate corners and splitpoints */
765: PetscCall(PetscMalloc1(nv, &vmarks));
766: PetscCall(PetscArrayzero(sfvleaves, nv));
767: PetscCall(PetscArrayzero(sfvroots, Lv));
768: for (i = 0; i < nv; i++)
769: if (PetscUnlikely(PetscBTLookup(btv, i))) sfvleaves[i] = 1;
771: if (print) {
772: IS tbz;
774: cum = 0;
775: for (i = 0; i < nv; i++)
776: if (sfvleaves[i]) vmarks[cum++] = i;
778: PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, vmarks, PETSC_COPY_VALUES, &tbz));
779: PetscCall(PetscObjectSetName((PetscObject)tbz, "corners_to_be_zeroed_local"));
780: PetscCall(ISView(tbz, NULL));
781: PetscCall(ISDestroy(&tbz));
782: }
784: PetscCall(PetscSFReduceBegin(sfv, MPIU_INT, sfvleaves, sfvroots, MPI_SUM));
785: PetscCall(PetscSFReduceEnd(sfv, MPIU_INT, sfvleaves, sfvroots, MPI_SUM));
786: PetscCall(PetscSFBcastBegin(sfv, MPIU_INT, sfvroots, sfvleaves, MPI_REPLACE));
787: PetscCall(PetscSFBcastEnd(sfv, MPIU_INT, sfvroots, sfvleaves, MPI_REPLACE));
789: /* Zero rows of lGt corresponding to identified corners
790: and interior nodal dofs */
791: cum = 0;
792: for (i = 0; i < nv; i++) {
793: if (sfvleaves[i]) {
794: vmarks[cum++] = i;
795: PetscCall(PetscBTSet(btv, i));
796: } else if (!PetscBTLookup(btvi, i)) vmarks[cum++] = i;
797: }
798: PetscCall(PetscBTDestroy(&btvi));
799: if (print) {
800: IS tbz;
802: PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, vmarks, PETSC_COPY_VALUES, &tbz));
803: PetscCall(PetscObjectSetName((PetscObject)tbz, "corners_to_be_zeroed_with_interior"));
804: PetscCall(ISView(tbz, NULL));
805: PetscCall(ISDestroy(&tbz));
806: }
807: PetscCall(MatZeroRows(lGt, cum, vmarks, 0., NULL, NULL));
808: PetscCall(PetscFree(vmarks));
809: PetscCall(PetscSFDestroy(&sfv));
810: PetscCall(PetscFree2(sfvleaves, sfvroots));
812: /* Recompute G */
813: PetscCall(MatDestroy(&lG));
814: PetscCall(MatTranspose(lGt, MAT_INITIAL_MATRIX, &lG));
815: if (print) {
816: PetscCall(PetscObjectSetName((PetscObject)lG, "used_lG"));
817: PetscCall(MatView(lG, NULL));
818: PetscCall(PetscObjectSetName((PetscObject)lGt, "used_lGt"));
819: PetscCall(MatView(lGt, NULL));
820: }
822: /* Get primal dofs (if any) */
823: cum = 0;
824: for (i = 0; i < ne; i++) {
825: if (PetscUnlikely(PetscBTLookup(bte, i))) marks[cum++] = i;
826: }
827: if (fl2g) PetscCall(ISLocalToGlobalMappingApply(fl2g, cum, marks, marks));
828: PetscCall(ISCreateGeneral(comm, cum, marks, PETSC_COPY_VALUES, &primals));
829: if (print) {
830: PetscCall(PetscObjectSetName((PetscObject)primals, "prescribed_primal_dofs"));
831: PetscCall(ISView(primals, NULL));
832: }
833: PetscCall(PetscBTDestroy(&bte));
834: /* TODO: what if the user passed in some of them ? */
835: PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, primals));
836: PetscCall(ISDestroy(&primals));
838: /* Compute edge connectivity */
839: PetscCall(PetscObjectSetOptionsPrefix((PetscObject)lG, "econn_"));
841: /* Symbolic conn = lG*lGt */
842: if (!elements_corners) { /* if present, we assume we are in the element-by-element case and the CSR graph is not needed */
843: PetscCall(MatProductCreate(lG, lGt, NULL, &conn));
844: PetscCall(MatProductSetType(conn, MATPRODUCT_AB));
845: PetscCall(MatProductSetAlgorithm(conn, "default"));
846: PetscCall(MatProductSetFill(conn, PETSC_DEFAULT));
847: PetscCall(PetscObjectSetOptionsPrefix((PetscObject)conn, "econn_"));
848: PetscCall(MatProductSetFromOptions(conn));
849: PetscCall(MatProductSymbolic(conn));
850: PetscCall(MatGetRowIJ(conn, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
851: if (fl2g) {
852: PetscBT btf;
853: PetscInt *iia, *jja, *iiu, *jju;
854: PetscBool rest = PETSC_FALSE, free = PETSC_FALSE;
856: /* create CSR for all local dofs */
857: PetscCall(PetscMalloc1(n + 1, &iia));
858: if (pcbddc->mat_graph->nvtxs_csr) { /* the user has passed in a CSR graph */
859: PetscCheck(pcbddc->mat_graph->nvtxs_csr == n, PETSC_COMM_SELF, PETSC_ERR_USER, "Invalid size of CSR graph %" PetscInt_FMT ". Should be %" PetscInt_FMT, pcbddc->mat_graph->nvtxs_csr, n);
860: iiu = pcbddc->mat_graph->xadj;
861: jju = pcbddc->mat_graph->adjncy;
862: } else if (pcbddc->use_local_adj) {
863: rest = PETSC_TRUE;
864: PetscCall(MatGetRowIJ(matis->A, 0, PETSC_TRUE, PETSC_FALSE, &i, (const PetscInt **)&iiu, (const PetscInt **)&jju, &done));
865: } else {
866: free = PETSC_TRUE;
867: PetscCall(PetscMalloc2(n + 1, &iiu, n, &jju));
868: iiu[0] = 0;
869: for (i = 0; i < n; i++) {
870: iiu[i + 1] = i + 1;
871: jju[i] = -1;
872: }
873: }
875: /* import sizes of CSR */
876: iia[0] = 0;
877: for (i = 0; i < n; i++) iia[i + 1] = iiu[i + 1] - iiu[i];
879: /* overwrite entries corresponding to the Nedelec field */
880: PetscCall(PetscBTCreate(n, &btf));
881: PetscCall(ISGetIndices(nedfieldlocal, &idxs));
882: for (i = 0; i < ne; i++) {
883: PetscCall(PetscBTSet(btf, idxs[i]));
884: iia[idxs[i] + 1] = ii[i + 1] - ii[i];
885: }
887: /* iia in CSR */
888: for (i = 0; i < n; i++) iia[i + 1] += iia[i];
890: /* jja in CSR */
891: PetscCall(PetscMalloc1(iia[n], &jja));
892: for (i = 0; i < n; i++)
893: if (!PetscBTLookup(btf, i))
894: for (j = 0; j < iiu[i + 1] - iiu[i]; j++) jja[iia[i] + j] = jju[iiu[i] + j];
896: /* map edge dofs connectivity */
897: if (jj) {
898: PetscCall(ISLocalToGlobalMappingApply(fl2g, ii[ne], jj, (PetscInt *)jj));
899: for (i = 0; i < ne; i++) {
900: PetscInt e = idxs[i];
901: for (j = 0; j < ii[i + 1] - ii[i]; j++) jja[iia[e] + j] = jj[ii[i] + j];
902: }
903: }
904: PetscCall(ISRestoreIndices(nedfieldlocal, &idxs));
905: PetscCall(PCBDDCSetLocalAdjacencyGraph(pc, n, iia, jja, PETSC_COPY_VALUES));
906: if (rest) PetscCall(MatRestoreRowIJ(matis->A, 0, PETSC_TRUE, PETSC_FALSE, &i, (const PetscInt **)&iiu, (const PetscInt **)&jju, &done));
907: if (free) PetscCall(PetscFree2(iiu, jju));
908: PetscCall(PetscBTDestroy(&btf));
909: } else {
910: PetscCall(PCBDDCSetLocalAdjacencyGraph(pc, n, ii, jj, PETSC_COPY_VALUES));
911: }
912: PetscCall(MatRestoreRowIJ(conn, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
913: PetscCall(MatDestroy(&conn));
914: }
916: /* Analyze interface for edge dofs */
917: PetscCall(PCBDDCAnalyzeInterface(pc));
918: pcbddc->mat_graph->twodim = PETSC_FALSE;
920: /* Get coarse edges in the edge space */
921: PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals));
923: if (fl2g) {
924: PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, allprimals, &primals));
925: PetscCall(PetscMalloc1(nee, &eedges));
926: for (i = 0; i < nee; i++) PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, alleedges[i], &eedges[i]));
927: } else {
928: eedges = alleedges;
929: primals = allprimals;
930: }
932: /* Mark fine edge dofs with their coarse edge id */
933: PetscCall(PetscArrayzero(marks, ne));
934: PetscCall(ISGetLocalSize(primals, &cum));
935: PetscCall(ISGetIndices(primals, &idxs));
936: for (i = 0; i < cum; i++) marks[idxs[i]] = nee + 1;
937: PetscCall(ISRestoreIndices(primals, &idxs));
938: if (print) {
939: PetscCall(PetscObjectSetName((PetscObject)primals, "obtained_primal_dofs"));
940: PetscCall(ISView(primals, NULL));
941: }
943: maxsize = 0;
944: for (i = 0; i < nee; i++) {
945: PetscInt size, mark = i + 1;
947: PetscCall(ISGetLocalSize(eedges[i], &size));
948: PetscCall(ISGetIndices(eedges[i], &idxs));
949: for (j = 0; j < size; j++) marks[idxs[j]] = mark;
950: PetscCall(ISRestoreIndices(eedges[i], &idxs));
951: maxsize = PetscMax(maxsize, size);
952: }
954: /* Find coarse edge endpoints */
955: PetscCall(MatGetRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
956: PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done));
957: for (i = 0; i < nee; i++) {
958: PetscInt mark = i + 1, size;
960: PetscCall(ISGetLocalSize(eedges[i], &size));
961: if (!size && nedfieldlocal) continue;
962: PetscCheck(size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected zero sized edge %" PetscInt_FMT, i);
963: PetscCall(ISGetIndices(eedges[i], &idxs));
964: if (print) {
965: PetscCall(PetscPrintf(PETSC_COMM_SELF, "ENDPOINTS ANALYSIS EDGE %" PetscInt_FMT "\n", i));
966: PetscCall(ISView(eedges[i], NULL));
967: }
968: for (j = 0; j < size; j++) {
969: PetscInt k, ee = idxs[j];
970: if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " idx %" PetscInt_FMT "\n", ee));
971: for (k = ii[ee]; k < ii[ee + 1]; k++) {
972: if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " inspect %" PetscInt_FMT "\n", jj[k]));
973: if (PetscBTLookup(btv, jj[k])) {
974: if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " corner found (already set) %" PetscInt_FMT "\n", jj[k]));
975: } else if (PetscBTLookup(btvcand, jj[k])) { /* is it ok? */
976: PetscInt k2;
977: PetscBool corner = PETSC_FALSE;
978: for (k2 = iit[jj[k]]; k2 < iit[jj[k] + 1]; k2++) {
979: if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " INSPECTING %" PetscInt_FMT ": mark %" PetscInt_FMT " (ref mark %" PetscInt_FMT "), boundary %d\n", jjt[k2], marks[jjt[k2]], mark, (int)!!PetscBTLookup(btb, jjt[k2])));
980: /* it's a corner if either is connected with an edge dof belonging to a different cc or
981: if the edge dof lie on the natural part of the boundary */
982: if ((marks[jjt[k2]] && marks[jjt[k2]] != mark) || (!marks[jjt[k2]] && PetscBTLookup(btb, jjt[k2]))) {
983: corner = PETSC_TRUE;
984: break;
985: }
986: }
987: if (corner) { /* found the nodal dof corresponding to the endpoint of the edge */
988: if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " corner found %" PetscInt_FMT "\n", jj[k]));
989: PetscCall(PetscBTSet(btv, jj[k]));
990: } else {
991: if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " no corners found\n"));
992: }
993: }
994: }
995: }
996: PetscCall(ISRestoreIndices(eedges[i], &idxs));
997: }
998: PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done));
999: PetscCall(MatRestoreRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1000: PetscCall(PetscBTDestroy(&btb));
1002: /* Reset marked primal dofs */
1003: PetscCall(ISGetLocalSize(primals, &cum));
1004: PetscCall(ISGetIndices(primals, &idxs));
1005: for (i = 0; i < cum; i++) marks[idxs[i]] = 0;
1006: PetscCall(ISRestoreIndices(primals, &idxs));
1008: /* Now use the initial lG */
1009: PetscCall(MatDestroy(&lG));
1010: PetscCall(MatDestroy(&lGt));
1011: lG = lGinit;
1012: PetscCall(MatTranspose(lG, MAT_INITIAL_MATRIX, &lGt));
1014: /* Compute extended cols indices */
1015: PetscCall(PetscBTCreate(nv, &btvc));
1016: PetscCall(PetscBTCreate(nee, &bter));
1017: PetscCall(MatGetRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1018: PetscCall(MatSeqAIJGetMaxRowNonzeros(lG, &i));
1019: i *= maxsize;
1020: PetscCall(PetscCalloc1(nee, &extcols));
1021: PetscCall(PetscMalloc2(i, &extrow, i, &gidxs));
1022: eerr = PETSC_FALSE;
1023: for (i = 0; i < nee; i++) {
1024: PetscInt size, found = 0;
1026: cum = 0;
1027: PetscCall(ISGetLocalSize(eedges[i], &size));
1028: if (!size && nedfieldlocal) continue;
1029: PetscCheck(size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected zero sized edge %" PetscInt_FMT, i);
1030: PetscCall(ISGetIndices(eedges[i], &idxs));
1031: PetscCall(PetscBTMemzero(nv, btvc));
1032: for (j = 0; j < size; j++) {
1033: PetscInt k, ee = idxs[j];
1034: for (k = ii[ee]; k < ii[ee + 1]; k++) {
1035: PetscInt vv = jj[k];
1036: if (!PetscBTLookup(btv, vv)) extrow[cum++] = vv;
1037: else if (!PetscBTLookupSet(btvc, vv)) found++;
1038: }
1039: }
1040: PetscCall(ISRestoreIndices(eedges[i], &idxs));
1041: PetscCall(PetscSortRemoveDupsInt(&cum, extrow));
1042: PetscCall(ISLocalToGlobalMappingApply(vl2g, cum, extrow, gidxs));
1043: PetscCall(PetscSortIntWithArray(cum, gidxs, extrow));
1044: PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, extrow, PETSC_COPY_VALUES, &extcols[i]));
1045: /* it may happen that endpoints are not defined at this point
1046: if it is the case, mark this edge for a second pass */
1047: if (cum != size - 1 || found != 2) {
1048: PetscCall(PetscBTSet(bter, i));
1049: if (print) {
1050: PetscCall(PetscObjectSetName((PetscObject)eedges[i], "error_edge"));
1051: PetscCall(ISView(eedges[i], NULL));
1052: PetscCall(PetscObjectSetName((PetscObject)extcols[i], "error_extcol"));
1053: PetscCall(ISView(extcols[i], NULL));
1054: }
1055: eerr = PETSC_TRUE;
1056: }
1057: }
1058: /* PetscCheck(!eerr,PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL FIRST PASS"); */
1059: PetscCallMPI(MPIU_Allreduce(&eerr, &done, 1, MPI_C_BOOL, MPI_LOR, comm));
1060: if (done) {
1061: PetscInt *newprimals;
1063: PetscCall(PetscMalloc1(ne, &newprimals));
1064: PetscCall(ISGetLocalSize(primals, &cum));
1065: PetscCall(ISGetIndices(primals, &idxs));
1066: PetscCall(PetscArraycpy(newprimals, idxs, cum));
1067: PetscCall(ISRestoreIndices(primals, &idxs));
1068: PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done));
1069: if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "DOING SECOND PASS (eerr %s)\n", PetscBools[eerr]));
1070: for (i = 0; i < nee; i++) {
1071: PetscBool has_candidates = PETSC_FALSE;
1072: if (PetscBTLookup(bter, i)) {
1073: PetscInt size, mark = i + 1;
1075: PetscCall(ISGetLocalSize(eedges[i], &size));
1076: PetscCall(ISGetIndices(eedges[i], &idxs));
1077: /* for (j=0;j<size;j++) newprimals[cum++] = idxs[j]; */
1078: for (j = 0; j < size; j++) {
1079: PetscInt k, ee = idxs[j];
1080: if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "Inspecting edge dof %" PetscInt_FMT " [%" PetscInt_FMT " %" PetscInt_FMT ")\n", ee, ii[ee], ii[ee + 1]));
1081: for (k = ii[ee]; k < ii[ee + 1]; k++) {
1082: /* set all candidates located on the edge as corners */
1083: if (PetscBTLookup(btvcand, jj[k])) {
1084: PetscInt k2, vv = jj[k];
1085: has_candidates = PETSC_TRUE;
1086: if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " Candidate set to vertex %" PetscInt_FMT "\n", vv));
1087: PetscCall(PetscBTSet(btv, vv));
1088: /* set all edge dofs connected to candidate as primals */
1089: for (k2 = iit[vv]; k2 < iit[vv + 1]; k2++) {
1090: if (marks[jjt[k2]] == mark) {
1091: PetscInt k3, ee2 = jjt[k2];
1092: if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " Connected edge dof set to primal %" PetscInt_FMT "\n", ee2));
1093: newprimals[cum++] = ee2;
1094: /* finally set the new corners */
1095: for (k3 = ii[ee2]; k3 < ii[ee2 + 1]; k3++) {
1096: if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " Connected nodal dof set to vertex %" PetscInt_FMT "\n", jj[k3]));
1097: PetscCall(PetscBTSet(btv, jj[k3]));
1098: }
1099: }
1100: }
1101: } else {
1102: if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " Not a candidate vertex %" PetscInt_FMT "\n", jj[k]));
1103: }
1104: }
1105: }
1106: if (!has_candidates) { /* circular edge */
1107: PetscInt k, ee = idxs[0], *tmarks;
1109: PetscCall(PetscCalloc1(ne, &tmarks));
1110: if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " Circular edge %" PetscInt_FMT "\n", i));
1111: for (k = ii[ee]; k < ii[ee + 1]; k++) {
1112: PetscInt k2;
1113: if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " Set to corner %" PetscInt_FMT "\n", jj[k]));
1114: PetscCall(PetscBTSet(btv, jj[k]));
1115: for (k2 = iit[jj[k]]; k2 < iit[jj[k] + 1]; k2++) tmarks[jjt[k2]]++;
1116: }
1117: for (j = 0; j < size; j++) {
1118: if (tmarks[idxs[j]] > 1) {
1119: if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, " Edge dof set to primal %" PetscInt_FMT "\n", idxs[j]));
1120: newprimals[cum++] = idxs[j];
1121: }
1122: }
1123: PetscCall(PetscFree(tmarks));
1124: }
1125: PetscCall(ISRestoreIndices(eedges[i], &idxs));
1126: }
1127: PetscCall(ISDestroy(&extcols[i]));
1128: }
1129: PetscCall(PetscFree(extcols));
1130: PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &iit, &jjt, &done));
1131: PetscCall(PetscSortRemoveDupsInt(&cum, newprimals));
1132: if (fl2g) {
1133: PetscCall(ISLocalToGlobalMappingApply(fl2g, cum, newprimals, newprimals));
1134: PetscCall(ISDestroy(&primals));
1135: for (i = 0; i < nee; i++) PetscCall(ISDestroy(&eedges[i]));
1136: PetscCall(PetscFree(eedges));
1137: }
1138: PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals));
1139: PetscCall(ISCreateGeneral(comm, cum, newprimals, PETSC_COPY_VALUES, &primals));
1140: PetscCall(PetscFree(newprimals));
1141: PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, primals));
1142: PetscCall(ISDestroy(&primals));
1143: PetscCall(PCBDDCAnalyzeInterface(pc));
1144: pcbddc->mat_graph->twodim = PETSC_FALSE;
1145: PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals));
1146: if (fl2g) {
1147: PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, allprimals, &primals));
1148: PetscCall(PetscMalloc1(nee, &eedges));
1149: for (i = 0; i < nee; i++) PetscCall(ISGlobalToLocalMappingApplyIS(fl2g, IS_GTOLM_DROP, alleedges[i], &eedges[i]));
1150: } else {
1151: eedges = alleedges;
1152: primals = allprimals;
1153: }
1154: PetscCall(PetscCalloc1(nee, &extcols));
1156: /* Mark again */
1157: PetscCall(PetscArrayzero(marks, ne));
1158: for (i = 0; i < nee; i++) {
1159: PetscInt size, mark = i + 1;
1161: PetscCall(ISGetLocalSize(eedges[i], &size));
1162: PetscCall(ISGetIndices(eedges[i], &idxs));
1163: for (j = 0; j < size; j++) marks[idxs[j]] = mark;
1164: PetscCall(ISRestoreIndices(eedges[i], &idxs));
1165: }
1166: if (print) {
1167: PetscCall(PetscObjectSetName((PetscObject)primals, "obtained_primal_dofs_secondpass"));
1168: PetscCall(ISView(primals, NULL));
1169: }
1171: /* Recompute extended cols */
1172: eerr = PETSC_FALSE;
1173: for (i = 0; i < nee; i++) {
1174: PetscInt size;
1176: cum = 0;
1177: PetscCall(ISGetLocalSize(eedges[i], &size));
1178: if (!size && nedfieldlocal) continue;
1179: PetscCheck(size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected zero sized edge %" PetscInt_FMT, i);
1180: PetscCall(ISGetIndices(eedges[i], &idxs));
1181: for (j = 0; j < size; j++) {
1182: PetscInt k, ee = idxs[j];
1183: for (k = ii[ee]; k < ii[ee + 1]; k++)
1184: if (!PetscBTLookup(btv, jj[k])) extrow[cum++] = jj[k];
1185: }
1186: PetscCall(ISRestoreIndices(eedges[i], &idxs));
1187: PetscCall(PetscSortRemoveDupsInt(&cum, extrow));
1188: PetscCall(ISLocalToGlobalMappingApply(vl2g, cum, extrow, gidxs));
1189: PetscCall(PetscSortIntWithArray(cum, gidxs, extrow));
1190: PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, extrow, PETSC_COPY_VALUES, &extcols[i]));
1191: if (cum != size - 1) {
1192: if (print) {
1193: PetscCall(PetscObjectSetName((PetscObject)eedges[i], "error_edge_secondpass"));
1194: PetscCall(ISView(eedges[i], NULL));
1195: PetscCall(PetscObjectSetName((PetscObject)extcols[i], "error_extcol_secondpass"));
1196: PetscCall(ISView(extcols[i], NULL));
1197: }
1198: eerr = PETSC_TRUE;
1199: }
1200: }
1201: }
1202: PetscCall(MatRestoreRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1203: PetscCall(PetscFree2(extrow, gidxs));
1204: PetscCall(PetscBTDestroy(&bter));
1205: if (print) PetscCall(PCBDDCGraphASCIIView(pcbddc->mat_graph, 5, PETSC_VIEWER_STDOUT_SELF));
1206: /* an error should not occur at this point */
1207: PetscCheck(!eerr, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected SIZE OF EDGE > EXTCOL SECOND PASS");
1209: /* Check the number of endpoints */
1210: PetscCall(MatGetRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1211: PetscCall(PetscMalloc1(2 * nee, &corners));
1212: PetscCall(PetscMalloc1(nee, &cedges));
1213: for (i = 0; i < nee; i++) {
1214: PetscInt size, found = 0, gc[2];
1216: /* init with defaults */
1217: cedges[i] = corners[i * 2] = corners[i * 2 + 1] = -1;
1218: PetscCall(ISGetLocalSize(eedges[i], &size));
1219: if (!size && nedfieldlocal) continue;
1220: PetscCheck(size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Unexpected zero sized edge %" PetscInt_FMT, i);
1221: PetscCall(ISGetIndices(eedges[i], &idxs));
1222: PetscCall(PetscBTMemzero(nv, btvc));
1223: for (j = 0; j < size; j++) {
1224: PetscInt k, ee = idxs[j];
1225: for (k = ii[ee]; k < ii[ee + 1]; k++) {
1226: PetscInt vv = jj[k];
1227: if (PetscBTLookup(btv, vv) && !PetscBTLookupSet(btvc, vv)) {
1228: PetscCheck(found != 2, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Found more than two corners for edge %" PetscInt_FMT, i);
1229: corners[i * 2 + found++] = vv;
1230: }
1231: }
1232: }
1233: if (found != 2) {
1234: PetscInt e;
1235: if (fl2g) {
1236: PetscCall(ISLocalToGlobalMappingApply(fl2g, 1, idxs, &e));
1237: } else {
1238: e = idxs[0];
1239: }
1240: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_PLIB, "Found %" PetscInt_FMT " corners for edge %" PetscInt_FMT " (astart %" PetscInt_FMT ", estart %" PetscInt_FMT ")", found, i, e, idxs[0]);
1241: }
1243: /* get primal dof index on this coarse edge */
1244: PetscCall(ISLocalToGlobalMappingApply(vl2g, 2, corners + 2 * i, gc));
1245: if (gc[0] > gc[1]) {
1246: PetscInt swap = corners[2 * i];
1247: corners[2 * i] = corners[2 * i + 1];
1248: corners[2 * i + 1] = swap;
1249: }
1250: cedges[i] = idxs[size - 1];
1251: PetscCall(ISRestoreIndices(eedges[i], &idxs));
1252: if (print) PetscCall(PetscPrintf(PETSC_COMM_SELF, "EDGE %" PetscInt_FMT ": ce %" PetscInt_FMT ", corners (%" PetscInt_FMT ",%" PetscInt_FMT ")\n", i, cedges[i], corners[2 * i], corners[2 * i + 1]));
1253: }
1254: PetscCall(MatRestoreRowIJ(lG, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1255: PetscCall(PetscBTDestroy(&btvc));
1257: if (PetscDefined(USE_DEBUG)) {
1258: /* Inspects columns of lG (rows of lGt) and make sure the change of basis will
1259: not interfere with neighbouring coarse edges */
1260: PetscCall(PetscMalloc1(nee + 1, &emarks));
1261: PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1262: for (i = 0; i < nv; i++) {
1263: PetscInt emax = 0, eemax = 0;
1265: if (ii[i + 1] == ii[i] || PetscBTLookup(btv, i)) continue;
1266: PetscCall(PetscArrayzero(emarks, nee + 1));
1267: for (j = ii[i]; j < ii[i + 1]; j++) emarks[marks[jj[j]]]++;
1268: for (j = 1; j < nee + 1; j++) {
1269: if (emax < emarks[j]) {
1270: emax = emarks[j];
1271: eemax = j;
1272: }
1273: }
1274: /* not relevant for edges */
1275: if (!eemax) continue;
1277: for (j = ii[i]; j < ii[i + 1]; j++) {
1278: PetscCheck(!marks[jj[j]] || marks[jj[j]] == eemax, PETSC_COMM_SELF, PETSC_ERR_SUP, "Found 2 coarse edges (id %" PetscInt_FMT " and %" PetscInt_FMT ") connected through the %" PetscInt_FMT " nodal dof at edge dof %" PetscInt_FMT, marks[jj[j]] - 1, eemax, i, jj[j]);
1279: }
1280: }
1281: PetscCall(PetscFree(emarks));
1282: PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1283: }
1285: /* Compute extended rows indices for edge blocks of the change of basis */
1286: PetscCall(MatGetRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1287: PetscCall(MatSeqAIJGetMaxRowNonzeros(lGt, &extmem));
1288: extmem *= maxsize;
1289: PetscCall(PetscMalloc1(extmem * nee, &extrow));
1290: PetscCall(PetscMalloc1(nee, &extrows));
1291: PetscCall(PetscCalloc1(nee, &extrowcum));
1292: for (i = 0; i < nv; i++) {
1293: PetscInt mark = 0, size, start;
1295: if (ii[i + 1] == ii[i] || PetscBTLookup(btv, i)) continue;
1296: for (j = ii[i]; j < ii[i + 1]; j++)
1297: if (marks[jj[j]] && !mark) mark = marks[jj[j]];
1299: /* not relevant */
1300: if (!mark) continue;
1302: /* import extended row */
1303: mark--;
1304: start = mark * extmem + extrowcum[mark];
1305: size = ii[i + 1] - ii[i];
1306: PetscCheck(extrowcum[mark] + size <= extmem, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Not enough memory allocated %" PetscInt_FMT " > %" PetscInt_FMT, extrowcum[mark] + size, extmem);
1307: PetscCall(PetscArraycpy(extrow + start, jj + ii[i], size));
1308: extrowcum[mark] += size;
1309: }
1310: PetscCall(MatRestoreRowIJ(lGt, 0, PETSC_FALSE, PETSC_FALSE, &i, &ii, &jj, &done));
1311: PetscCall(MatDestroy(&lGt));
1312: PetscCall(PetscFree(marks));
1314: /* Compress extrows */
1315: cum = 0;
1316: for (i = 0; i < nee; i++) {
1317: PetscInt size = extrowcum[i], *start = extrow + i * extmem;
1318: PetscCall(PetscSortRemoveDupsInt(&size, start));
1319: PetscCall(ISCreateGeneral(PETSC_COMM_SELF, size, start, PETSC_USE_POINTER, &extrows[i]));
1320: cum = PetscMax(cum, size);
1321: }
1322: PetscCall(PetscFree(extrowcum));
1323: PetscCall(PetscBTDestroy(&btv));
1324: PetscCall(PetscBTDestroy(&btvcand));
1326: /* Workspace for lapack inner calls and VecSetValues */
1327: PetscCall(PetscMalloc2((5 + cum + maxsize) * maxsize, &work, maxsize, &rwork));
1329: /* Create change of basis matrix (no preallocation) */
1330: PetscCall(MatCreate(comm, &T));
1331: PetscCall(MatSetLayouts(T, pc->mat->rmap, pc->mat->cmap));
1332: PetscCall(MatSetType(T, MATAIJ));
1333: PetscCall(MatSetLocalToGlobalMapping(T, al2g, al2g));
1334: PetscCall(MatSetOption(T, MAT_ROW_ORIENTED, PETSC_FALSE));
1335: PetscCall(MatSetOption(T, MAT_IGNORE_ZERO_ENTRIES, PETSC_TRUE));
1336: //PetscCall(MatSeqAIJSetPreallocation(T, maxsize, NULL));
1337: //PetscCall(MatMPIAIJSetPreallocation(T, maxsize, NULL, maxsize, NULL));
1338: //PetscCall(MatSetOption(T, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE));
1340: /* Defaults to identity */
1341: {
1342: Vec w;
1343: const PetscScalar *wa;
1345: PetscCall(MatCreateVecs(T, &w, NULL));
1346: PetscCall(VecSetLocalToGlobalMapping(w, al2g));
1347: PetscCall(VecSet(w, 1.0));
1348: for (i = 0; i < nee; i++) {
1349: const PetscInt *idxs;
1350: PetscInt nl;
1352: PetscCall(ISGetLocalSize(eedges[i], &nl));
1353: PetscCall(ISGetIndices(eedges[i], &idxs));
1354: PetscCall(VecSetValuesLocal(w, nl, idxs, NULL, INSERT_VALUES));
1355: PetscCall(ISRestoreIndices(eedges[i], &idxs));
1356: }
1357: PetscCall(VecAssemblyBegin(w));
1358: PetscCall(VecAssemblyEnd(w));
1359: PetscCall(VecGetArrayRead(w, &wa));
1360: for (i = T->rmap->rstart; i < T->rmap->rend; i++)
1361: if (PetscAbsScalar(wa[i - T->rmap->rstart])) PetscCall(MatSetValue(T, i, i, 1.0, INSERT_VALUES));
1362: PetscCall(VecRestoreArrayRead(w, &wa));
1363: PetscCall(VecDestroy(&w));
1364: }
1366: /* Create discrete gradient for the coarser level if needed */
1367: PetscCall(MatDestroy(&pcbddc->nedcG));
1368: PetscCall(ISDestroy(&pcbddc->nedclocal));
1369: if (pcbddc->current_level < pcbddc->max_levels) {
1370: ISLocalToGlobalMapping cel2g, cvl2g;
1371: IS wis, gwis;
1372: PetscInt cnv, cne;
1374: PetscCall(ISCreateGeneral(comm, nee, cedges, PETSC_COPY_VALUES, &wis));
1375: if (fl2g) {
1376: PetscCall(ISLocalToGlobalMappingApplyIS(fl2g, wis, &pcbddc->nedclocal));
1377: } else {
1378: PetscCall(PetscObjectReference((PetscObject)wis));
1379: pcbddc->nedclocal = wis;
1380: }
1381: PetscCall(ISLocalToGlobalMappingApplyIS(el2g, wis, &gwis));
1382: PetscCall(ISDestroy(&wis));
1383: PetscCall(ISRenumber(gwis, NULL, &cne, &wis));
1384: PetscCall(ISLocalToGlobalMappingCreateIS(wis, &cel2g));
1385: PetscCall(ISDestroy(&wis));
1386: PetscCall(ISDestroy(&gwis));
1388: PetscCall(ISCreateGeneral(comm, 2 * nee, corners, PETSC_USE_POINTER, &wis));
1389: PetscCall(ISLocalToGlobalMappingApplyIS(vl2g, wis, &gwis));
1390: PetscCall(ISDestroy(&wis));
1391: PetscCall(ISRenumber(gwis, NULL, &cnv, &wis));
1392: PetscCall(ISLocalToGlobalMappingCreateIS(wis, &cvl2g));
1393: PetscCall(ISDestroy(&wis));
1394: PetscCall(ISDestroy(&gwis));
1396: PetscCall(MatCreate(comm, &pcbddc->nedcG));
1397: PetscCall(MatSetSizes(pcbddc->nedcG, PETSC_DECIDE, PETSC_DECIDE, cne, cnv));
1398: PetscCall(MatSetType(pcbddc->nedcG, MATAIJ));
1399: PetscCall(MatSeqAIJSetPreallocation(pcbddc->nedcG, 2, NULL));
1400: PetscCall(MatMPIAIJSetPreallocation(pcbddc->nedcG, 2, NULL, 2, NULL));
1401: PetscCall(MatSetLocalToGlobalMapping(pcbddc->nedcG, cel2g, cvl2g));
1402: PetscCall(ISLocalToGlobalMappingDestroy(&cel2g));
1403: PetscCall(ISLocalToGlobalMappingDestroy(&cvl2g));
1404: }
1406: MatNullSpace nnsp;
1407: PetscBool nnsp_has_const = PETSC_FALSE;
1408: const Vec *nnsp_vecs = NULL;
1409: PetscInt nnsp_nvecs = 0;
1410: VecScatter nnsp_vscat = NULL;
1411: PetscCall(MatGetNullSpace(pcbddc->discretegradient, &nnsp));
1412: if (nnsp) PetscCall(MatNullSpaceGetVecs(nnsp, &nnsp_has_const, &nnsp_nvecs, &nnsp_vecs));
1413: if (nnsp_has_const || nnsp_nvecs) { /* create scatter to import edge constraints */
1414: IS allextcols, gallextcols, galleedges, is_E_to_zero;
1415: Vec E, V;
1416: PetscInt *eedgesidxs;
1417: const PetscScalar *evals;
1419: PetscCall(MatCreateVecs(pc->pmat, &E, NULL));
1420: PetscCall(MatCreateVecs(pcbddc->discretegradient, &V, NULL));
1421: PetscCall(ISConcatenate(PETSC_COMM_SELF, nee, extcols, &allextcols));
1422: cum = 0;
1423: for (i = 0; i < nee; i++) {
1424: PetscInt j;
1426: PetscCall(ISGetLocalSize(eedges[i], &j));
1427: PetscCheck(j, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Zero sized egde %" PetscInt_FMT, i);
1428: cum += j - 1;
1429: }
1430: PetscCall(PetscMalloc1(PetscMax(cum, pc->pmat->rmap->n), &eedgesidxs));
1431: cum = 0;
1432: for (i = 0; i < nee; i++) {
1433: const PetscInt *idxs;
1434: PetscInt j;
1436: PetscCall(ISGetLocalSize(eedges[i], &j));
1437: PetscCall(ISGetIndices(eedges[i], &idxs));
1438: PetscCall(PetscArraycpy(eedgesidxs + cum, idxs, j - 1)); /* last on the edge is primal */
1439: PetscCall(ISRestoreIndices(eedges[i], &idxs));
1440: cum += j - 1;
1441: }
1442: PetscCall(ISLocalToGlobalMappingApply(al2g, cum, eedgesidxs, eedgesidxs));
1443: PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, eedgesidxs, PETSC_USE_POINTER, &galleedges));
1444: PetscCall(ISLocalToGlobalMappingApplyIS(vl2g, allextcols, &gallextcols));
1445: PetscCall(VecScatterCreate(V, gallextcols, E, galleedges, &nnsp_vscat));
1446: PetscCall(ISDestroy(&allextcols));
1447: PetscCall(ISDestroy(&gallextcols));
1448: PetscCall(ISDestroy(&galleedges));
1450: /* identify dofs we must zero if importing user-defined near nullspace from pmat */
1451: PetscCall(VecSet(E, 1.0));
1452: PetscCall(VecSetValues(E, cum, eedgesidxs, NULL, INSERT_VALUES));
1453: PetscCall(VecAssemblyBegin(E));
1454: PetscCall(VecAssemblyEnd(E));
1455: PetscCall(VecGetArrayRead(E, &evals));
1456: for (i = 0, cum = 0; i < pc->pmat->rmap->n; i++)
1457: if (evals[i] == 0.0) eedgesidxs[cum++] = i + pc->pmat->rmap->rstart;
1458: PetscCall(VecRestoreArrayRead(E, &evals));
1459: PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cum, eedgesidxs, PETSC_COPY_VALUES, &is_E_to_zero));
1460: PetscCall(PetscFree(eedgesidxs));
1462: PetscCall(PetscObjectCompose((PetscObject)nnsp_vscat, "__V_Vec", (PetscObject)V));
1463: PetscCall(PetscObjectCompose((PetscObject)nnsp_vscat, "__E_Vec", (PetscObject)E));
1464: PetscCall(PetscObjectCompose((PetscObject)nnsp_vscat, "__E_zero", (PetscObject)is_E_to_zero));
1465: PetscCall(ISDestroy(&is_E_to_zero));
1466: PetscCall(VecDestroy(&V));
1467: PetscCall(VecDestroy(&E));
1468: }
1469: #if defined(PRINT_GDET)
1470: inc = 0;
1471: lev = pcbddc->current_level;
1472: #endif
1474: /* Insert values in the change of basis matrix */
1475: for (i = 0; i < nee; i++) {
1476: Mat Gins = NULL, GKins = NULL;
1477: IS cornersis = NULL;
1478: PetscScalar cvals[2];
1480: if (pcbddc->nedcG) PetscCall(ISCreateGeneral(PETSC_COMM_SELF, 2, corners + 2 * i, PETSC_USE_POINTER, &cornersis));
1481: PetscCall(PCBDDCComputeNedelecChangeEdge(lG, eedges[i], extrows[i], extcols[i], cornersis, &Gins, &GKins, cvals, work, rwork));
1482: if (Gins && GKins) {
1483: const PetscScalar *data;
1484: const PetscInt *rows, *cols;
1485: PetscInt nrh, nch, nrc, ncc;
1487: PetscCall(ISGetIndices(eedges[i], &cols));
1488: /* H1 */
1489: PetscCall(ISGetIndices(extrows[i], &rows));
1490: PetscCall(MatGetSize(Gins, &nrh, &nch));
1491: PetscCall(MatDenseGetArrayRead(Gins, &data));
1492: PetscCall(MatSetValuesLocal(T, nrh, rows, nch, cols, data, INSERT_VALUES));
1493: PetscCall(MatDenseRestoreArrayRead(Gins, &data));
1494: PetscCall(ISRestoreIndices(extrows[i], &rows));
1495: /* complement */
1496: PetscCall(MatGetSize(GKins, &nrc, &ncc));
1497: PetscCheck(ncc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Constant function has not been generated for coarse edge %" PetscInt_FMT, i);
1498: PetscCheck(ncc + nch == nrc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "The sum of the number of columns of GKins %" PetscInt_FMT " and Gins %" PetscInt_FMT " does not match %" PetscInt_FMT " for coarse edge %" PetscInt_FMT, ncc, nch, nrc, i);
1499: PetscCheck(ncc == 1 || !pcbddc->nedcG, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot generate the coarse discrete gradient for coarse edge %" PetscInt_FMT " with ncc %" PetscInt_FMT, i, ncc);
1500: PetscCall(MatDenseGetArrayRead(GKins, &data));
1501: PetscCall(MatSetValuesLocal(T, nrc, cols, ncc, cols + nch, data, INSERT_VALUES));
1502: PetscCall(MatDenseRestoreArrayRead(GKins, &data));
1504: /* coarse discrete gradient */
1505: if (pcbddc->nedcG) {
1506: PetscInt cols[2];
1508: cols[0] = 2 * i;
1509: cols[1] = 2 * i + 1;
1510: PetscCall(MatSetValuesLocal(pcbddc->nedcG, 1, &i, 2, cols, cvals, INSERT_VALUES));
1511: }
1512: PetscCall(ISRestoreIndices(eedges[i], &cols));
1513: }
1514: PetscCall(ISDestroy(&extrows[i]));
1515: PetscCall(ISDestroy(&extcols[i]));
1516: PetscCall(ISDestroy(&cornersis));
1517: PetscCall(MatDestroy(&Gins));
1518: PetscCall(MatDestroy(&GKins));
1519: }
1521: /* import edge constraints */
1522: if (nnsp_vscat) {
1523: Vec V, E, *quadvecs;
1524: PetscInt nvecs, nvecs_orth;
1525: MatNullSpace onnsp = NULL;
1526: PetscBool onnsp_has_const = PETSC_FALSE;
1527: const Vec *onnsp_vecs = NULL;
1528: PetscInt onnsp_nvecs = 0, new_nnsp_nvecs, old_nnsp_nvecs;
1529: IS is_E_to_zero;
1531: /* import nearnullspace from preconditioning matrix if user-defined */
1532: PetscCall(MatGetNearNullSpace(pc->pmat, &onnsp));
1533: if (onnsp) {
1534: PetscBool isinternal;
1536: PetscCall(PetscStrcmp("_internal_BDDC_nedelec_nnsp", ((PetscObject)onnsp)->name, &isinternal));
1537: if (!isinternal) PetscCall(MatNullSpaceGetVecs(onnsp, &onnsp_has_const, &onnsp_nvecs, &onnsp_vecs));
1538: }
1539: new_nnsp_nvecs = nnsp_nvecs + (nnsp_has_const ? 1 : 0);
1540: old_nnsp_nvecs = onnsp_nvecs + (onnsp_has_const ? 1 : 0);
1541: nvecs = old_nnsp_nvecs + new_nnsp_nvecs;
1542: PetscCall(PetscMalloc1(nvecs, &quadvecs));
1544: PetscCall(PetscObjectQuery((PetscObject)nnsp_vscat, "__V_Vec", (PetscObject *)&V));
1545: PetscCall(PetscObjectQuery((PetscObject)nnsp_vscat, "__E_Vec", (PetscObject *)&E));
1546: PetscCall(PetscObjectQuery((PetscObject)nnsp_vscat, "__E_zero", (PetscObject *)&is_E_to_zero));
1547: for (i = 0; i < nvecs; i++) PetscCall(VecDuplicate(E, &quadvecs[i]));
1548: cum = 0;
1549: if (nnsp_has_const) {
1550: PetscCall(VecSet(V, 1.0));
1551: PetscCall(VecScatterBegin(nnsp_vscat, V, quadvecs[0], INSERT_VALUES, SCATTER_FORWARD));
1552: PetscCall(VecScatterEnd(nnsp_vscat, V, quadvecs[0], INSERT_VALUES, SCATTER_FORWARD));
1553: cum = 1;
1554: }
1555: for (i = 0; i < nnsp_nvecs; i++) {
1556: PetscCall(VecScatterBegin(nnsp_vscat, nnsp_vecs[i], quadvecs[i + cum], INSERT_VALUES, SCATTER_FORWARD));
1557: PetscCall(VecScatterEnd(nnsp_vscat, nnsp_vecs[i], quadvecs[i + cum], INSERT_VALUES, SCATTER_FORWARD));
1558: }
1560: /* Now add old nnsp if present */
1561: cum = 0;
1562: if (onnsp_has_const) {
1563: PetscCall(VecSet(quadvecs[new_nnsp_nvecs], 1.0));
1564: PetscCall(VecISSet(quadvecs[new_nnsp_nvecs], is_E_to_zero, 0));
1565: cum = 1;
1566: }
1567: for (i = 0; i < onnsp_nvecs; i++) {
1568: PetscCall(VecCopy(onnsp_vecs[i], quadvecs[i + cum + new_nnsp_nvecs]));
1569: PetscCall(VecISSet(quadvecs[i + cum + new_nnsp_nvecs], is_E_to_zero, 0));
1570: }
1571: nvecs_orth = nvecs;
1572: PetscCall(PCBDDCOrthonormalizeVecs(&nvecs_orth, quadvecs));
1573: PetscCall(MatNullSpaceCreate(PetscObjectComm((PetscObject)pc), PETSC_FALSE, nvecs_orth, quadvecs, &nnsp));
1574: for (i = 0; i < nvecs; i++) PetscCall(VecDestroy(&quadvecs[i]));
1575: PetscCall(PetscFree(quadvecs));
1576: PetscCall(PetscObjectSetName((PetscObject)nnsp, "_internal_BDDC_nedelec_nnsp"));
1577: PetscCall(MatSetNearNullSpace(pc->pmat, nnsp));
1578: PetscCall(MatNullSpaceDestroy(&nnsp));
1579: }
1580: PetscCall(VecScatterDestroy(&nnsp_vscat));
1581: PetscCall(ISLocalToGlobalMappingDestroy(&vl2g));
1582: PetscCall(ISLocalToGlobalMappingDestroy(&el2g));
1583: PetscCall(ISLocalToGlobalMappingDestroy(&al2g));
1585: /* Start assembling */
1586: PetscCall(MatAssemblyBegin(T, MAT_FINAL_ASSEMBLY));
1587: if (pcbddc->nedcG) PetscCall(MatAssemblyBegin(pcbddc->nedcG, MAT_FINAL_ASSEMBLY));
1589: /* Free */
1590: if (fl2g) {
1591: PetscCall(ISDestroy(&primals));
1592: for (i = 0; i < nee; i++) PetscCall(ISDestroy(&eedges[i]));
1593: PetscCall(PetscFree(eedges));
1594: }
1596: /* hack mat_graph with primal dofs on the coarse edges */
1597: {
1598: PCBDDCGraph graph = pcbddc->mat_graph;
1599: PetscInt *oqueue = graph->queue;
1600: PetscInt *ocptr = graph->cptr;
1601: PetscInt ncc, *idxs;
1603: /* find first primal edge */
1604: if (pcbddc->nedclocal) {
1605: PetscCall(ISGetIndices(pcbddc->nedclocal, (const PetscInt **)&idxs));
1606: } else {
1607: if (fl2g) PetscCall(ISLocalToGlobalMappingApply(fl2g, nee, cedges, cedges));
1608: idxs = cedges;
1609: }
1610: cum = 0;
1611: while (cum < nee && cedges[cum] < 0) cum++;
1613: /* adapt connected components */
1614: PetscCall(PetscMalloc2(graph->nvtxs + 1, &graph->cptr, ocptr[graph->ncc], &graph->queue));
1615: graph->cptr[0] = 0;
1616: for (i = 0, ncc = 0; i < graph->ncc; i++) {
1617: PetscInt lc = ocptr[i + 1] - ocptr[i];
1618: if (cum != nee && oqueue[ocptr[i + 1] - 1] == cedges[cum]) { /* this cc has a primal dof */
1619: graph->cptr[ncc + 1] = graph->cptr[ncc] + 1;
1620: graph->queue[graph->cptr[ncc]] = cedges[cum];
1621: ncc++;
1622: lc--;
1623: cum++;
1624: while (cum < nee && cedges[cum] < 0) cum++;
1625: }
1626: graph->cptr[ncc + 1] = graph->cptr[ncc] + lc;
1627: for (j = 0; j < lc; j++) graph->queue[graph->cptr[ncc] + j] = oqueue[ocptr[i] + j];
1628: ncc++;
1629: }
1630: graph->ncc = ncc;
1631: if (pcbddc->nedclocal) PetscCall(ISRestoreIndices(pcbddc->nedclocal, (const PetscInt **)&idxs));
1632: PetscCall(PetscFree2(ocptr, oqueue));
1633: }
1634: PetscCall(ISLocalToGlobalMappingDestroy(&fl2g));
1635: PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, &nee, &alleedges, &allprimals));
1636: PetscCall(PCBDDCGraphResetCSR(pcbddc->mat_graph));
1638: PetscCall(ISDestroy(&nedfieldlocal));
1639: PetscCall(PetscFree(extrow));
1640: PetscCall(PetscFree2(work, rwork));
1641: PetscCall(PetscFree(corners));
1642: PetscCall(PetscFree(cedges));
1643: PetscCall(PetscFree(extrows));
1644: PetscCall(PetscFree(extcols));
1645: PetscCall(MatDestroy(&lG));
1647: /* Complete assembling */
1648: PetscCall(MatAssemblyEnd(T, MAT_FINAL_ASSEMBLY));
1649: PetscCall(MatViewFromOptions(T, (PetscObject)pc, "-pc_bddc_nedelec_change_view"));
1650: if (pcbddc->nedcG) {
1651: PetscCall(MatAssemblyEnd(pcbddc->nedcG, MAT_FINAL_ASSEMBLY));
1652: PetscCall(MatViewFromOptions(pcbddc->nedcG, (PetscObject)pc, "-pc_bddc_nedelec_coarse_change_view"));
1653: }
1655: PetscCall(ISDestroy(&elements_corners));
1657: /* set change of basis */
1658: PetscCall(PCBDDCSetChangeOfBasisMat(pc, T, PETSC_FALSE));
1659: PetscCall(MatDestroy(&T));
1660: PetscFunctionReturn(PETSC_SUCCESS);
1661: }
1663: /* the near-null space of BDDC carries information on quadrature weights,
1664: and these can be collinear -> so cheat with MatNullSpaceCreate
1665: and create a suitable set of basis vectors first */
1666: PetscErrorCode PCBDDCNullSpaceCreate(MPI_Comm comm, PetscBool has_const, PetscInt nvecs, Vec quad_vecs[], MatNullSpace *nnsp)
1667: {
1668: PetscInt i;
1670: PetscFunctionBegin;
1671: for (i = 0; i < nvecs; i++) {
1672: PetscInt first, last;
1674: PetscCall(VecGetOwnershipRange(quad_vecs[i], &first, &last));
1675: PetscCheck(last - first >= 2 * nvecs || !has_const, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not implemented");
1676: if (i >= first && i < last) {
1677: PetscScalar *data;
1678: PetscCall(VecGetArray(quad_vecs[i], &data));
1679: if (!has_const) {
1680: data[i - first] = 1.;
1681: } else {
1682: data[2 * i - first] = 1. / PetscSqrtReal(2.);
1683: data[2 * i - first + 1] = -1. / PetscSqrtReal(2.);
1684: }
1685: PetscCall(VecRestoreArray(quad_vecs[i], &data));
1686: }
1687: PetscCall(PetscObjectStateIncrease((PetscObject)quad_vecs[i]));
1688: }
1689: PetscCall(MatNullSpaceCreate(comm, has_const, nvecs, quad_vecs, nnsp));
1690: for (i = 0; i < nvecs; i++) { /* reset vectors */
1691: PetscInt first, last;
1692: PetscCall(VecLockReadPop(quad_vecs[i]));
1693: PetscCall(VecGetOwnershipRange(quad_vecs[i], &first, &last));
1694: if (i >= first && i < last) {
1695: PetscScalar *data;
1696: PetscCall(VecGetArray(quad_vecs[i], &data));
1697: if (!has_const) {
1698: data[i - first] = 0.;
1699: } else {
1700: data[2 * i - first] = 0.;
1701: data[2 * i - first + 1] = 0.;
1702: }
1703: PetscCall(VecRestoreArray(quad_vecs[i], &data));
1704: }
1705: PetscCall(PetscObjectStateIncrease((PetscObject)quad_vecs[i]));
1706: PetscCall(VecLockReadPush(quad_vecs[i]));
1707: }
1708: PetscFunctionReturn(PETSC_SUCCESS);
1709: }
1711: PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp)
1712: {
1713: Mat loc_divudotp;
1714: Vec p, v, quad_vec;
1715: ISLocalToGlobalMapping map;
1716: PetscScalar *array;
1718: PetscFunctionBegin;
1719: PetscCall(MatCreateVecs(A, &quad_vec, NULL));
1720: if (!transpose) {
1721: PetscCall(MatISGetLocalToGlobalMapping(A, &map, NULL));
1722: } else {
1723: PetscCall(MatISGetLocalToGlobalMapping(A, NULL, &map));
1724: }
1725: PetscCall(PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)A), PETSC_FALSE, 1, &quad_vec, nnsp));
1726: PetscCall(VecLockReadPop(quad_vec));
1727: PetscCall(VecSetLocalToGlobalMapping(quad_vec, map));
1729: /* compute local quad vec */
1730: PetscCall(MatISGetLocalMat(divudotp, &loc_divudotp));
1731: if (!transpose) {
1732: PetscCall(MatCreateVecs(loc_divudotp, &v, &p));
1733: } else {
1734: PetscCall(MatCreateVecs(loc_divudotp, &p, &v));
1735: }
1736: /* the assumption here is that the constant vector interpolates the constant on the L2 conforming space */
1737: PetscCall(VecSet(p, 1.));
1738: if (!transpose) {
1739: PetscCall(MatMultTranspose(loc_divudotp, p, v));
1740: } else {
1741: PetscCall(MatMult(loc_divudotp, p, v));
1742: }
1743: PetscCall(VecDestroy(&p));
1744: if (vl2l) {
1745: Mat lA;
1746: VecScatter sc;
1747: Vec vins;
1749: PetscCall(MatISGetLocalMat(A, &lA));
1750: PetscCall(MatCreateVecs(lA, &vins, NULL));
1751: PetscCall(VecScatterCreate(v, NULL, vins, vl2l, &sc));
1752: PetscCall(VecScatterBegin(sc, v, vins, INSERT_VALUES, SCATTER_FORWARD));
1753: PetscCall(VecScatterEnd(sc, v, vins, INSERT_VALUES, SCATTER_FORWARD));
1754: PetscCall(VecScatterDestroy(&sc));
1755: PetscCall(VecDestroy(&v));
1756: v = vins;
1757: }
1759: /* mask summation of interface values */
1760: PetscInt n, *mmask, *mask, *idxs, nmr, nr;
1761: const PetscInt *degree;
1762: PetscSF msf;
1764: PetscCall(VecGetLocalSize(v, &n));
1765: PetscCall(PetscSFGetGraph(graph->interface_subset_sf, &nr, NULL, NULL, NULL));
1766: PetscCall(PetscSFGetMultiSF(graph->interface_subset_sf, &msf));
1767: PetscCall(PetscSFGetGraph(msf, &nmr, NULL, NULL, NULL));
1768: PetscCall(PetscCalloc3(nmr, &mmask, n, &mask, n, &idxs));
1769: PetscCall(PetscSFComputeDegreeBegin(graph->interface_subset_sf, °ree));
1770: PetscCall(PetscSFComputeDegreeEnd(graph->interface_subset_sf, °ree));
1771: for (PetscInt i = 0, c = 0; i < nr; i++) {
1772: mmask[c] = 1;
1773: c += degree[i];
1774: }
1775: PetscCall(PetscSFScatterBegin(graph->interface_subset_sf, MPIU_INT, mmask, mask));
1776: PetscCall(PetscSFScatterEnd(graph->interface_subset_sf, MPIU_INT, mmask, mask));
1777: PetscCall(VecGetArray(v, &array));
1778: for (PetscInt i = 0; i < n; i++) {
1779: array[i] *= mask[i];
1780: idxs[i] = i;
1781: }
1782: PetscCall(VecSetValuesLocal(quad_vec, n, idxs, array, ADD_VALUES));
1783: PetscCall(VecRestoreArray(v, &array));
1784: PetscCall(PetscFree3(mmask, mask, idxs));
1785: PetscCall(VecDestroy(&v));
1786: PetscCall(VecAssemblyBegin(quad_vec));
1787: PetscCall(VecAssemblyEnd(quad_vec));
1788: PetscCall(VecViewFromOptions(quad_vec, NULL, "-pc_bddc_quad_vec_view"));
1789: PetscCall(VecLockReadPush(quad_vec));
1790: PetscCall(VecDestroy(&quad_vec));
1791: PetscFunctionReturn(PETSC_SUCCESS);
1792: }
1794: PetscErrorCode PCBDDCAddPrimalVerticesLocalIS(PC pc, IS primalv)
1795: {
1796: PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
1798: PetscFunctionBegin;
1799: if (primalv) {
1800: if (pcbddc->user_primal_vertices_local) {
1801: IS list[2], newp;
1803: list[0] = primalv;
1804: list[1] = pcbddc->user_primal_vertices_local;
1805: PetscCall(ISConcatenate(PetscObjectComm((PetscObject)pc), 2, list, &newp));
1806: PetscCall(ISSortRemoveDups(newp));
1807: PetscCall(ISDestroy(&list[1]));
1808: pcbddc->user_primal_vertices_local = newp;
1809: } else {
1810: PetscCall(PCBDDCSetPrimalVerticesLocalIS(pc, primalv));
1811: }
1812: }
1813: PetscFunctionReturn(PETSC_SUCCESS);
1814: }
1816: static PetscErrorCode func_coords_private(PetscInt dim, PetscReal t, const PetscReal X[], PetscInt Nf, PetscScalar *out, void *ctx)
1817: {
1818: PetscInt f, *comp = (PetscInt *)ctx;
1820: PetscFunctionBegin;
1821: for (f = 0; f < Nf; f++) out[f] = X[*comp];
1822: PetscFunctionReturn(PETSC_SUCCESS);
1823: }
1825: PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc)
1826: {
1827: Vec local, global;
1828: PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
1829: Mat_IS *matis = (Mat_IS *)pc->pmat->data;
1830: PetscBool monolithic = PETSC_FALSE;
1832: PetscFunctionBegin;
1833: PetscOptionsBegin(PetscObjectComm((PetscObject)pc), ((PetscObject)pc)->prefix, "BDDC topology options", "PC");
1834: PetscCall(PetscOptionsBool("-pc_bddc_monolithic", "Discard any information on dofs splitting", NULL, monolithic, &monolithic, NULL));
1835: PetscOptionsEnd();
1836: /* need to convert from global to local topology information and remove references to information in global ordering */
1837: PetscCall(MatCreateVecs(pc->pmat, &global, NULL));
1838: PetscCall(MatCreateVecs(matis->A, &local, NULL));
1839: PetscCall(VecBindToCPU(global, PETSC_TRUE));
1840: PetscCall(VecBindToCPU(local, PETSC_TRUE));
1841: if (monolithic) { /* just get block size to properly compute vertices */
1842: if (pcbddc->vertex_size == 1) PetscCall(MatGetBlockSize(pc->pmat, &pcbddc->vertex_size));
1843: goto boundary;
1844: }
1846: if (pcbddc->user_provided_isfordofs) {
1847: if (pcbddc->n_ISForDofs) {
1848: PetscInt i;
1850: PetscCall(PetscMalloc1(pcbddc->n_ISForDofs, &pcbddc->ISForDofsLocal));
1851: for (i = 0; i < pcbddc->n_ISForDofs; i++) {
1852: PetscInt bs;
1854: PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->ISForDofs[i], &pcbddc->ISForDofsLocal[i]));
1855: PetscCall(ISGetBlockSize(pcbddc->ISForDofs[i], &bs));
1856: PetscCall(ISSetBlockSize(pcbddc->ISForDofsLocal[i], bs));
1857: PetscCall(ISDestroy(&pcbddc->ISForDofs[i]));
1858: }
1859: pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs;
1860: pcbddc->n_ISForDofs = 0;
1861: PetscCall(PetscFree(pcbddc->ISForDofs));
1862: }
1863: } else {
1864: if (!pcbddc->n_ISForDofsLocal) { /* field split not present */
1865: DM dm;
1867: PetscCall(MatGetDM(pc->pmat, &dm));
1868: if (!dm) PetscCall(PCGetDM(pc, &dm));
1869: if (dm) {
1870: IS *fields;
1871: PetscInt nf, i;
1873: PetscCall(DMCreateFieldDecomposition(dm, &nf, NULL, &fields, NULL));
1874: PetscCall(PetscMalloc1(nf, &pcbddc->ISForDofsLocal));
1875: for (i = 0; i < nf; i++) {
1876: PetscInt bs;
1878: PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, fields[i], &pcbddc->ISForDofsLocal[i]));
1879: PetscCall(ISGetBlockSize(fields[i], &bs));
1880: PetscCall(ISSetBlockSize(pcbddc->ISForDofsLocal[i], bs));
1881: PetscCall(ISDestroy(&fields[i]));
1882: }
1883: PetscCall(PetscFree(fields));
1884: pcbddc->n_ISForDofsLocal = nf;
1885: } else { /* See if MATIS has fields attached by the conversion from MatNest */
1886: PetscContainer c;
1888: PetscCall(PetscObjectQuery((PetscObject)pc->pmat, "_convert_nest_lfields", (PetscObject *)&c));
1889: if (c) {
1890: MatISLocalFields lf;
1891: PetscCall(PetscContainerGetPointer(c, (void **)&lf));
1892: PetscCall(PCBDDCSetDofsSplittingLocal(pc, lf->nr, lf->rf));
1893: } else { /* fallback, create the default fields if bs > 1 */
1894: PetscInt i, n = matis->A->rmap->n;
1895: PetscCall(MatGetBlockSize(pc->pmat, &i));
1896: if (i > 1) {
1897: pcbddc->n_ISForDofsLocal = i;
1898: PetscCall(PetscMalloc1(pcbddc->n_ISForDofsLocal, &pcbddc->ISForDofsLocal));
1899: for (i = 0; i < pcbddc->n_ISForDofsLocal; i++) PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc), n / pcbddc->n_ISForDofsLocal, i, pcbddc->n_ISForDofsLocal, &pcbddc->ISForDofsLocal[i]));
1900: }
1901: }
1902: }
1903: } else {
1904: PetscInt i;
1905: for (i = 0; i < pcbddc->n_ISForDofsLocal; i++) PetscCall(PCBDDCConsistencyCheckIS(pc, MPI_LAND, &pcbddc->ISForDofsLocal[i]));
1906: }
1907: }
1909: boundary:
1910: if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) {
1911: PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->DirichletBoundaries, &pcbddc->DirichletBoundariesLocal));
1912: } else if (pcbddc->DirichletBoundariesLocal) {
1913: PetscCall(PCBDDCConsistencyCheckIS(pc, MPI_LAND, &pcbddc->DirichletBoundariesLocal));
1914: }
1915: if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) {
1916: PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->NeumannBoundaries, &pcbddc->NeumannBoundariesLocal));
1917: } else if (pcbddc->NeumannBoundariesLocal) {
1918: PetscCall(PCBDDCConsistencyCheckIS(pc, MPI_LOR, &pcbddc->NeumannBoundariesLocal));
1919: }
1920: if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) PetscCall(PCBDDCGlobalToLocal(matis->rctx, global, local, pcbddc->user_primal_vertices, &pcbddc->user_primal_vertices_local));
1921: PetscCall(VecDestroy(&global));
1922: PetscCall(VecDestroy(&local));
1923: /* detect local disconnected subdomains if requested or needed */
1924: if (pcbddc->detect_disconnected || matis->allow_repeated) {
1925: IS primalv = NULL;
1926: PetscInt nel;
1927: PetscBool filter = pcbddc->detect_disconnected_filter;
1929: for (PetscInt i = 0; i < pcbddc->n_local_subs; i++) PetscCall(ISDestroy(&pcbddc->local_subs[i]));
1930: PetscCall(PetscFree(pcbddc->local_subs));
1931: PetscCall(MatGetVariableBlockSizes(matis->A, &nel, NULL));
1932: if (matis->allow_repeated && nel) {
1933: const PetscInt *elsizes;
1935: pcbddc->n_local_subs = nel;
1936: PetscCall(MatGetVariableBlockSizes(matis->A, NULL, &elsizes));
1937: PetscCall(PetscMalloc1(nel, &pcbddc->local_subs));
1938: for (PetscInt i = 0, c = 0; i < nel; i++) {
1939: PetscCall(ISCreateStride(PETSC_COMM_SELF, elsizes[i], c, 1, &pcbddc->local_subs[i]));
1940: c += elsizes[i];
1941: }
1942: } else {
1943: PetscCall(PCBDDCDetectDisconnectedComponents(pc, filter, &pcbddc->n_local_subs, &pcbddc->local_subs, &primalv));
1944: }
1945: PetscCall(PCBDDCAddPrimalVerticesLocalIS(pc, primalv));
1946: PetscCall(ISDestroy(&primalv));
1947: }
1948: /* early stage corner detection */
1949: {
1950: DM dm;
1952: PetscCall(MatGetDM(pc->pmat, &dm));
1953: if (!dm) PetscCall(PCGetDM(pc, &dm));
1954: if (dm) {
1955: PetscBool isda;
1957: PetscCall(PetscObjectTypeCompare((PetscObject)dm, DMDA, &isda));
1958: if (isda) {
1959: ISLocalToGlobalMapping l2l;
1960: IS corners;
1961: Mat lA;
1962: PetscBool gl, lo;
1964: {
1965: Vec cvec;
1966: const PetscScalar *coords;
1967: PetscInt dof, n, cdim;
1968: PetscBool memc = PETSC_TRUE;
1970: PetscCall(DMDAGetInfo(dm, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &dof, NULL, NULL, NULL, NULL, NULL));
1971: PetscCall(DMGetCoordinates(dm, &cvec));
1972: PetscCall(VecGetLocalSize(cvec, &n));
1973: PetscCall(VecGetBlockSize(cvec, &cdim));
1974: n /= cdim;
1975: PetscCall(PetscFree(pcbddc->mat_graph->coords));
1976: PetscCall(PetscMalloc1(dof * n * cdim, &pcbddc->mat_graph->coords));
1977: PetscCall(VecGetArrayRead(cvec, &coords));
1978: #if defined(PETSC_USE_COMPLEX)
1979: memc = PETSC_FALSE;
1980: #endif
1981: if (dof != 1) memc = PETSC_FALSE;
1982: if (memc) {
1983: PetscCall(PetscArraycpy(pcbddc->mat_graph->coords, coords, cdim * n * dof));
1984: } else { /* BDDC graph does not use any blocked information, we need to replicate the data */
1985: PetscReal *bcoords = pcbddc->mat_graph->coords;
1986: PetscInt i, b, d;
1988: for (i = 0; i < n; i++) {
1989: for (b = 0; b < dof; b++) {
1990: for (d = 0; d < cdim; d++) bcoords[i * dof * cdim + b * cdim + d] = PetscRealPart(coords[i * cdim + d]);
1991: }
1992: }
1993: }
1994: PetscCall(VecRestoreArrayRead(cvec, &coords));
1995: pcbddc->mat_graph->cdim = cdim;
1996: pcbddc->mat_graph->cnloc = dof * n;
1997: pcbddc->mat_graph->cloc = PETSC_FALSE;
1998: }
1999: PetscCall(DMDAGetSubdomainCornersIS(dm, &corners));
2000: PetscCall(MatISGetLocalMat(pc->pmat, &lA));
2001: PetscCall(MatGetLocalToGlobalMapping(lA, &l2l, NULL));
2002: PetscCall(MatISRestoreLocalMat(pc->pmat, &lA));
2003: lo = (PetscBool)(l2l && corners);
2004: PetscCallMPI(MPIU_Allreduce(&lo, &gl, 1, MPI_C_BOOL, MPI_LAND, PetscObjectComm((PetscObject)pc)));
2005: if (gl) { /* From PETSc's DMDA */
2006: const PetscInt *idx;
2007: PetscInt dof, bs, *idxout, n;
2009: PetscCall(DMDAGetInfo(dm, NULL, NULL, NULL, NULL, NULL, NULL, NULL, &dof, NULL, NULL, NULL, NULL, NULL));
2010: PetscCall(ISLocalToGlobalMappingGetBlockSize(l2l, &bs));
2011: PetscCall(ISGetLocalSize(corners, &n));
2012: PetscCall(ISGetIndices(corners, &idx));
2013: if (bs == dof) {
2014: PetscCall(PetscMalloc1(n, &idxout));
2015: PetscCall(ISLocalToGlobalMappingApplyBlock(l2l, n, idx, idxout));
2016: } else { /* the original DMDA local-to-local map have been modified */
2017: PetscInt i, d;
2019: PetscCall(PetscMalloc1(dof * n, &idxout));
2020: for (i = 0; i < n; i++)
2021: for (d = 0; d < dof; d++) idxout[dof * i + d] = dof * idx[i] + d;
2022: PetscCall(ISLocalToGlobalMappingApply(l2l, dof * n, idxout, idxout));
2024: bs = 1;
2025: n *= dof;
2026: }
2027: PetscCall(ISRestoreIndices(corners, &idx));
2028: PetscCall(DMDARestoreSubdomainCornersIS(dm, &corners));
2029: PetscCall(ISCreateBlock(PetscObjectComm((PetscObject)pc), bs, n, idxout, PETSC_OWN_POINTER, &corners));
2030: PetscCall(PCBDDCAddPrimalVerticesLocalIS(pc, corners));
2031: PetscCall(ISDestroy(&corners));
2032: pcbddc->corner_selected = PETSC_TRUE;
2033: pcbddc->corner_selection = PETSC_TRUE;
2034: }
2035: if (corners) PetscCall(DMDARestoreSubdomainCornersIS(dm, &corners));
2036: }
2037: }
2038: }
2039: if (pcbddc->corner_selection && !pcbddc->mat_graph->cdim) {
2040: DM dm;
2042: PetscCall(MatGetDM(pc->pmat, &dm));
2043: if (!dm) PetscCall(PCGetDM(pc, &dm));
2044: if (dm) { /* this can get very expensive, I need to find a faster alternative */
2045: Vec vcoords;
2046: PetscSection section;
2047: PetscReal *coords;
2048: PetscInt d, cdim, nl, nf, **ctxs;
2049: PetscErrorCode (**funcs)(PetscInt, PetscReal, const PetscReal *, PetscInt, PetscScalar *, void *);
2050: /* debug coordinates */
2051: PetscViewer viewer;
2052: PetscBool flg;
2053: PetscViewerFormat format;
2054: const char *prefix;
2056: PetscCall(DMGetCoordinateDim(dm, &cdim));
2057: PetscCall(DMGetLocalSection(dm, §ion));
2058: PetscCall(PetscSectionGetNumFields(section, &nf));
2059: PetscCall(DMCreateGlobalVector(dm, &vcoords));
2060: PetscCall(VecGetLocalSize(vcoords, &nl));
2061: PetscCall(PetscMalloc1(nl * cdim, &coords));
2062: PetscCall(PetscMalloc2(nf, &funcs, nf, &ctxs));
2063: PetscCall(PetscMalloc1(nf, &ctxs[0]));
2064: for (d = 0; d < nf; d++) funcs[d] = func_coords_private;
2065: for (d = 1; d < nf; d++) ctxs[d] = ctxs[d - 1] + 1;
2067: /* debug coordinates */
2068: PetscCall(PCGetOptionsPrefix(pc, &prefix));
2069: PetscCall(PetscOptionsCreateViewer(PetscObjectComm((PetscObject)vcoords), ((PetscObject)vcoords)->options, prefix, "-pc_bddc_coords_vec_view", &viewer, &format, &flg));
2070: if (flg) PetscCall(PetscViewerPushFormat(viewer, format));
2071: for (d = 0; d < cdim; d++) {
2072: PetscInt i;
2073: const PetscScalar *v;
2074: char name[16];
2076: for (i = 0; i < nf; i++) ctxs[i][0] = d;
2077: PetscCall(PetscSNPrintf(name, sizeof(name), "bddc_coords_%" PetscInt_FMT, d));
2078: PetscCall(PetscObjectSetName((PetscObject)vcoords, name));
2079: PetscCall(DMProjectFunction(dm, 0.0, funcs, (void **)ctxs, INSERT_VALUES, vcoords));
2080: if (flg) PetscCall(VecView(vcoords, viewer));
2081: PetscCall(VecGetArrayRead(vcoords, &v));
2082: for (i = 0; i < nl; i++) coords[i * cdim + d] = PetscRealPart(v[i]);
2083: PetscCall(VecRestoreArrayRead(vcoords, &v));
2084: }
2085: PetscCall(VecDestroy(&vcoords));
2086: PetscCall(PCSetCoordinates(pc, cdim, nl, coords));
2087: PetscCall(PetscFree(coords));
2088: PetscCall(PetscFree(ctxs[0]));
2089: PetscCall(PetscFree2(funcs, ctxs));
2090: if (flg) {
2091: PetscCall(PetscViewerPopFormat(viewer));
2092: PetscCall(PetscViewerDestroy(&viewer));
2093: }
2094: }
2095: }
2096: PetscFunctionReturn(PETSC_SUCCESS);
2097: }
2099: PetscErrorCode PCBDDCConsistencyCheckIS(PC pc, MPI_Op mop, IS *is)
2100: {
2101: Mat_IS *matis = (Mat_IS *)pc->pmat->data;
2102: IS nis;
2103: const PetscInt *idxs;
2104: PetscInt i, nd, n = matis->A->rmap->n, *nidxs, nnd;
2106: PetscFunctionBegin;
2107: PetscCheck(mop == MPI_LAND || mop == MPI_LOR, PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Supported are MPI_LAND and MPI_LOR");
2108: if (mop == MPI_LAND) {
2109: /* init rootdata with true */
2110: for (i = 0; i < pc->pmat->rmap->n; i++) matis->sf_rootdata[i] = 1;
2111: } else {
2112: PetscCall(PetscArrayzero(matis->sf_rootdata, pc->pmat->rmap->n));
2113: }
2114: PetscCall(PetscArrayzero(matis->sf_leafdata, n));
2115: PetscCall(ISGetLocalSize(*is, &nd));
2116: PetscCall(ISGetIndices(*is, &idxs));
2117: for (i = 0; i < nd; i++)
2118: if (-1 < idxs[i] && idxs[i] < n) matis->sf_leafdata[idxs[i]] = 1;
2119: PetscCall(ISRestoreIndices(*is, &idxs));
2120: PetscCall(PetscSFReduceBegin(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, mop));
2121: PetscCall(PetscSFReduceEnd(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, mop));
2122: PetscCall(PetscSFBcastBegin(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE));
2123: PetscCall(PetscSFBcastEnd(matis->sf, MPIU_INT, matis->sf_rootdata, matis->sf_leafdata, MPI_REPLACE));
2124: if (mop == MPI_LAND) {
2125: PetscCall(PetscMalloc1(nd, &nidxs));
2126: } else {
2127: PetscCall(PetscMalloc1(n, &nidxs));
2128: }
2129: for (i = 0, nnd = 0; i < n; i++)
2130: if (matis->sf_leafdata[i]) nidxs[nnd++] = i;
2131: PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)*is), nnd, nidxs, PETSC_OWN_POINTER, &nis));
2132: PetscCall(ISDestroy(is));
2133: *is = nis;
2134: PetscFunctionReturn(PETSC_SUCCESS);
2135: }
2137: PetscErrorCode PCBDDCBenignRemoveInterior(PC pc, Vec r, Vec z)
2138: {
2139: PC_IS *pcis = (PC_IS *)pc->data;
2140: PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
2142: PetscFunctionBegin;
2143: if (!pcbddc->benign_have_null) PetscFunctionReturn(PETSC_SUCCESS);
2144: if (pcbddc->ChangeOfBasisMatrix) {
2145: Vec swap;
2147: PetscCall(MatMultTranspose(pcbddc->ChangeOfBasisMatrix, r, pcbddc->work_change));
2148: swap = pcbddc->work_change;
2149: pcbddc->work_change = r;
2150: r = swap;
2151: }
2152: PetscCall(VecScatterBegin(pcis->global_to_D, r, pcis->vec1_D, INSERT_VALUES, SCATTER_FORWARD));
2153: PetscCall(VecScatterEnd(pcis->global_to_D, r, pcis->vec1_D, INSERT_VALUES, SCATTER_FORWARD));
2154: PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][0], pc, 0, 0, 0));
2155: PetscCall(KSPSolve(pcbddc->ksp_D, pcis->vec1_D, pcis->vec2_D));
2156: PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][0], pc, 0, 0, 0));
2157: PetscCall(KSPCheckSolve(pcbddc->ksp_D, pc, pcis->vec2_D));
2158: PetscCall(VecSet(z, 0.));
2159: PetscCall(VecScatterBegin(pcis->global_to_D, pcis->vec2_D, z, INSERT_VALUES, SCATTER_REVERSE));
2160: PetscCall(VecScatterEnd(pcis->global_to_D, pcis->vec2_D, z, INSERT_VALUES, SCATTER_REVERSE));
2161: if (pcbddc->ChangeOfBasisMatrix) {
2162: pcbddc->work_change = r;
2163: PetscCall(VecCopy(z, pcbddc->work_change));
2164: PetscCall(MatMult(pcbddc->ChangeOfBasisMatrix, pcbddc->work_change, z));
2165: }
2166: PetscFunctionReturn(PETSC_SUCCESS);
2167: }
2169: static PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose)
2170: {
2171: PCBDDCBenignMatMult_ctx ctx;
2172: PetscBool apply_right, apply_left, reset_x;
2174: PetscFunctionBegin;
2175: PetscCall(MatShellGetContext(A, &ctx));
2176: if (transpose) {
2177: apply_right = ctx->apply_left;
2178: apply_left = ctx->apply_right;
2179: } else {
2180: apply_right = ctx->apply_right;
2181: apply_left = ctx->apply_left;
2182: }
2183: reset_x = PETSC_FALSE;
2184: if (apply_right) {
2185: const PetscScalar *ax;
2186: PetscInt nl, i;
2188: PetscCall(VecGetLocalSize(x, &nl));
2189: PetscCall(VecGetArrayRead(x, &ax));
2190: PetscCall(PetscArraycpy(ctx->work, ax, nl));
2191: PetscCall(VecRestoreArrayRead(x, &ax));
2192: for (i = 0; i < ctx->benign_n; i++) {
2193: PetscScalar sum, val;
2194: const PetscInt *idxs;
2195: PetscInt nz, j;
2196: PetscCall(ISGetLocalSize(ctx->benign_zerodiag_subs[i], &nz));
2197: PetscCall(ISGetIndices(ctx->benign_zerodiag_subs[i], &idxs));
2198: sum = 0.;
2199: if (ctx->apply_p0) {
2200: val = ctx->work[idxs[nz - 1]];
2201: for (j = 0; j < nz - 1; j++) {
2202: sum += ctx->work[idxs[j]];
2203: ctx->work[idxs[j]] += val;
2204: }
2205: } else {
2206: for (j = 0; j < nz - 1; j++) sum += ctx->work[idxs[j]];
2207: }
2208: ctx->work[idxs[nz - 1]] -= sum;
2209: PetscCall(ISRestoreIndices(ctx->benign_zerodiag_subs[i], &idxs));
2210: }
2211: PetscCall(VecPlaceArray(x, ctx->work));
2212: reset_x = PETSC_TRUE;
2213: }
2214: if (transpose) {
2215: PetscCall(MatMultTranspose(ctx->A, x, y));
2216: } else {
2217: PetscCall(MatMult(ctx->A, x, y));
2218: }
2219: if (reset_x) PetscCall(VecResetArray(x));
2220: if (apply_left) {
2221: PetscScalar *ay;
2222: PetscInt i;
2224: PetscCall(VecGetArray(y, &ay));
2225: for (i = 0; i < ctx->benign_n; i++) {
2226: PetscScalar sum, val;
2227: const PetscInt *idxs;
2228: PetscInt nz, j;
2229: PetscCall(ISGetLocalSize(ctx->benign_zerodiag_subs[i], &nz));
2230: PetscCall(ISGetIndices(ctx->benign_zerodiag_subs[i], &idxs));
2231: val = -ay[idxs[nz - 1]];
2232: if (ctx->apply_p0) {
2233: sum = 0.;
2234: for (j = 0; j < nz - 1; j++) {
2235: sum += ay[idxs[j]];
2236: ay[idxs[j]] += val;
2237: }
2238: ay[idxs[nz - 1]] += sum;
2239: } else {
2240: for (j = 0; j < nz - 1; j++) ay[idxs[j]] += val;
2241: ay[idxs[nz - 1]] = 0.;
2242: }
2243: PetscCall(ISRestoreIndices(ctx->benign_zerodiag_subs[i], &idxs));
2244: }
2245: PetscCall(VecRestoreArray(y, &ay));
2246: }
2247: PetscFunctionReturn(PETSC_SUCCESS);
2248: }
2250: static PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y)
2251: {
2252: PetscFunctionBegin;
2253: PetscCall(PCBDDCBenignMatMult_Private_Private(A, x, y, PETSC_TRUE));
2254: PetscFunctionReturn(PETSC_SUCCESS);
2255: }
2257: static PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y)
2258: {
2259: PetscFunctionBegin;
2260: PetscCall(PCBDDCBenignMatMult_Private_Private(A, x, y, PETSC_FALSE));
2261: PetscFunctionReturn(PETSC_SUCCESS);
2262: }
2264: PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore)
2265: {
2266: PC_IS *pcis = (PC_IS *)pc->data;
2267: PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
2268: PCBDDCBenignMatMult_ctx ctx;
2270: PetscFunctionBegin;
2271: if (!restore) {
2272: Mat A_IB, A_BI;
2273: PetscScalar *work;
2274: PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL;
2276: PetscCheck(!pcbddc->benign_original_mat, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Benign original mat has not been restored");
2277: if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) PetscFunctionReturn(PETSC_SUCCESS);
2278: PetscCall(PetscMalloc1(pcis->n, &work));
2279: PetscCall(MatCreate(PETSC_COMM_SELF, &A_IB));
2280: PetscCall(MatSetSizes(A_IB, pcis->n - pcis->n_B, pcis->n_B, PETSC_DECIDE, PETSC_DECIDE));
2281: PetscCall(MatSetType(A_IB, MATSHELL));
2282: PetscCall(MatShellSetOperation(A_IB, MATOP_MULT, (PetscErrorCodeFn *)PCBDDCBenignMatMult_Private));
2283: PetscCall(MatShellSetOperation(A_IB, MATOP_MULT_TRANSPOSE, (PetscErrorCodeFn *)PCBDDCBenignMatMultTranspose_Private));
2284: PetscCall(PetscNew(&ctx));
2285: PetscCall(MatShellSetContext(A_IB, ctx));
2286: ctx->apply_left = PETSC_TRUE;
2287: ctx->apply_right = PETSC_FALSE;
2288: ctx->apply_p0 = PETSC_FALSE;
2289: ctx->benign_n = pcbddc->benign_n;
2290: if (reuse) {
2291: ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs;
2292: ctx->free = PETSC_FALSE;
2293: } else { /* TODO: could be optimized for successive solves */
2294: ISLocalToGlobalMapping N_to_D;
2295: PetscInt i;
2297: PetscCall(ISLocalToGlobalMappingCreateIS(pcis->is_I_local, &N_to_D));
2298: PetscCall(PetscMalloc1(pcbddc->benign_n, &ctx->benign_zerodiag_subs));
2299: for (i = 0; i < pcbddc->benign_n; i++) PetscCall(ISGlobalToLocalMappingApplyIS(N_to_D, IS_GTOLM_DROP, pcbddc->benign_zerodiag_subs[i], &ctx->benign_zerodiag_subs[i]));
2300: PetscCall(ISLocalToGlobalMappingDestroy(&N_to_D));
2301: ctx->free = PETSC_TRUE;
2302: }
2303: ctx->A = pcis->A_IB;
2304: ctx->work = work;
2305: PetscCall(MatSetUp(A_IB));
2306: PetscCall(MatAssemblyBegin(A_IB, MAT_FINAL_ASSEMBLY));
2307: PetscCall(MatAssemblyEnd(A_IB, MAT_FINAL_ASSEMBLY));
2308: pcis->A_IB = A_IB;
2310: /* A_BI as A_IB^T */
2311: PetscCall(MatCreateTranspose(A_IB, &A_BI));
2312: pcbddc->benign_original_mat = pcis->A_BI;
2313: pcis->A_BI = A_BI;
2314: } else {
2315: if (!pcbddc->benign_original_mat) PetscFunctionReturn(PETSC_SUCCESS);
2316: PetscCall(MatShellGetContext(pcis->A_IB, &ctx));
2317: PetscCall(MatDestroy(&pcis->A_IB));
2318: pcis->A_IB = ctx->A;
2319: ctx->A = NULL;
2320: PetscCall(MatDestroy(&pcis->A_BI));
2321: pcis->A_BI = pcbddc->benign_original_mat;
2322: pcbddc->benign_original_mat = NULL;
2323: if (ctx->free) {
2324: PetscInt i;
2325: for (i = 0; i < ctx->benign_n; i++) PetscCall(ISDestroy(&ctx->benign_zerodiag_subs[i]));
2326: PetscCall(PetscFree(ctx->benign_zerodiag_subs));
2327: }
2328: PetscCall(PetscFree(ctx->work));
2329: PetscCall(PetscFree(ctx));
2330: }
2331: PetscFunctionReturn(PETSC_SUCCESS);
2332: }
2334: /* used just in bddc debug mode */
2335: static PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B)
2336: {
2337: PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
2338: Mat_IS *matis = (Mat_IS *)pc->pmat->data;
2339: Mat An;
2341: PetscFunctionBegin;
2342: PetscCall(MatPtAP(matis->A, pcbddc->benign_change, MAT_INITIAL_MATRIX, 2.0, &An));
2343: PetscCall(MatZeroRowsColumns(An, pcbddc->benign_n, pcbddc->benign_p0_lidx, 1.0, NULL, NULL));
2344: if (is1) {
2345: PetscCall(MatCreateSubMatrix(An, is1, is2, MAT_INITIAL_MATRIX, B));
2346: PetscCall(MatDestroy(&An));
2347: } else {
2348: *B = An;
2349: }
2350: PetscFunctionReturn(PETSC_SUCCESS);
2351: }
2353: /* TODO: add reuse flag */
2354: PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B)
2355: {
2356: Mat Bt;
2357: PetscScalar *a, *bdata;
2358: const PetscInt *ii, *ij;
2359: PetscInt m, n, i, nnz, *bii, *bij;
2360: PetscBool flg_row;
2362: PetscFunctionBegin;
2363: PetscCall(MatGetSize(A, &n, &m));
2364: PetscCall(MatGetRowIJ(A, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &ij, &flg_row));
2365: PetscCall(MatSeqAIJGetArray(A, &a));
2366: nnz = n;
2367: for (i = 0; i < ii[n]; i++) {
2368: if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++;
2369: }
2370: PetscCall(PetscMalloc1(n + 1, &bii));
2371: PetscCall(PetscMalloc1(nnz, &bij));
2372: PetscCall(PetscMalloc1(nnz, &bdata));
2373: nnz = 0;
2374: bii[0] = 0;
2375: for (i = 0; i < n; i++) {
2376: PetscInt j;
2377: for (j = ii[i]; j < ii[i + 1]; j++) {
2378: PetscScalar entry = a[j];
2379: if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || (n == m && ij[j] == i)) {
2380: bij[nnz] = ij[j];
2381: bdata[nnz] = entry;
2382: nnz++;
2383: }
2384: }
2385: bii[i + 1] = nnz;
2386: }
2387: PetscCall(MatSeqAIJRestoreArray(A, &a));
2388: PetscCall(MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A), n, m, bii, bij, bdata, &Bt));
2389: PetscCall(MatRestoreRowIJ(A, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &ij, &flg_row));
2390: {
2391: Mat_SeqAIJ *b = (Mat_SeqAIJ *)Bt->data;
2392: b->free_a = PETSC_TRUE;
2393: b->free_ij = PETSC_TRUE;
2394: }
2395: if (*B == A) PetscCall(MatDestroy(&A));
2396: *B = Bt;
2397: PetscFunctionReturn(PETSC_SUCCESS);
2398: }
2400: PetscErrorCode PCBDDCDetectDisconnectedComponents(PC pc, PetscBool filter, PetscInt *ncc, IS *cc[], IS *primalv)
2401: {
2402: Mat B = NULL;
2403: DM dm;
2404: IS is_dummy, *cc_n;
2405: ISLocalToGlobalMapping l2gmap_dummy;
2406: PCBDDCGraph graph;
2407: PetscInt *xadj_filtered = NULL, *adjncy_filtered = NULL;
2408: PetscInt i, n;
2409: PetscInt *xadj, *adjncy;
2410: PetscBool isplex = PETSC_FALSE;
2412: PetscFunctionBegin;
2413: if (ncc) *ncc = 0;
2414: if (cc) *cc = NULL;
2415: if (primalv) *primalv = NULL;
2416: PetscCall(PCBDDCGraphCreate(&graph));
2417: PetscCall(MatGetDM(pc->pmat, &dm));
2418: if (!dm) PetscCall(PCGetDM(pc, &dm));
2419: if (dm) PetscCall(PetscObjectTypeCompareAny((PetscObject)dm, &isplex, DMPLEX, DMP4EST, DMP8EST, ""));
2420: if (filter) isplex = PETSC_FALSE;
2422: if (isplex) { /* this code has been modified from plexpartition.c */
2423: PetscInt p, pStart, pEnd, a, adjSize, idx, size, nroots;
2424: PetscInt *adj = NULL;
2425: IS cellNumbering;
2426: const PetscInt *cellNum;
2427: PetscBool useCone, useClosure;
2428: PetscSection section;
2429: PetscSegBuffer adjBuffer;
2430: PetscSF sfPoint;
2432: PetscCall(DMConvert(dm, DMPLEX, &dm));
2433: PetscCall(DMPlexGetHeightStratum(dm, 0, &pStart, &pEnd));
2434: PetscCall(DMGetPointSF(dm, &sfPoint));
2435: PetscCall(PetscSFGetGraph(sfPoint, &nroots, NULL, NULL, NULL));
2436: /* Build adjacency graph via a section/segbuffer */
2437: PetscCall(PetscSectionCreate(PetscObjectComm((PetscObject)dm), §ion));
2438: PetscCall(PetscSectionSetChart(section, pStart, pEnd));
2439: PetscCall(PetscSegBufferCreate(sizeof(PetscInt), 1000, &adjBuffer));
2440: /* Always use FVM adjacency to create partitioner graph */
2441: PetscCall(DMGetBasicAdjacency(dm, &useCone, &useClosure));
2442: PetscCall(DMSetBasicAdjacency(dm, PETSC_TRUE, PETSC_FALSE));
2443: PetscCall(DMPlexGetCellNumbering(dm, &cellNumbering));
2444: PetscCall(ISGetIndices(cellNumbering, &cellNum));
2445: for (n = 0, p = pStart; p < pEnd; p++) {
2446: /* Skip non-owned cells in parallel (ParMetis expects no overlap) */
2447: if (nroots > 0) {
2448: if (cellNum[p] < 0) continue;
2449: }
2450: adjSize = PETSC_DETERMINE;
2451: PetscCall(DMPlexGetAdjacency(dm, p, &adjSize, &adj));
2452: for (a = 0; a < adjSize; ++a) {
2453: const PetscInt point = adj[a];
2454: if (pStart <= point && point < pEnd) {
2455: PetscInt *PETSC_RESTRICT pBuf;
2456: PetscCall(PetscSectionAddDof(section, p, 1));
2457: PetscCall(PetscSegBufferGetInts(adjBuffer, 1, &pBuf));
2458: *pBuf = point;
2459: }
2460: }
2461: n++;
2462: }
2463: PetscCall(DMSetBasicAdjacency(dm, useCone, useClosure));
2464: /* Derive CSR graph from section/segbuffer */
2465: PetscCall(PetscSectionSetUp(section));
2466: PetscCall(PetscSectionGetStorageSize(section, &size));
2467: PetscCall(PetscMalloc1(n + 1, &xadj));
2468: for (idx = 0, p = pStart; p < pEnd; p++) {
2469: if (nroots > 0) {
2470: if (cellNum[p] < 0) continue;
2471: }
2472: PetscCall(PetscSectionGetOffset(section, p, &xadj[idx++]));
2473: }
2474: xadj[n] = size;
2475: PetscCall(PetscSegBufferExtractAlloc(adjBuffer, &adjncy));
2476: /* Clean up */
2477: PetscCall(PetscSegBufferDestroy(&adjBuffer));
2478: PetscCall(PetscSectionDestroy(§ion));
2479: PetscCall(PetscFree(adj));
2480: graph->xadj = xadj;
2481: graph->adjncy = adjncy;
2482: } else {
2483: Mat A;
2484: PetscBool isseqaij, flg_row;
2486: PetscCall(MatISGetLocalMat(pc->pmat, &A));
2487: if (!A->rmap->N || !A->cmap->N) {
2488: PetscCall(PCBDDCGraphDestroy(&graph));
2489: PetscFunctionReturn(PETSC_SUCCESS);
2490: }
2491: PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isseqaij));
2492: if (!isseqaij && filter) {
2493: PetscBool isseqdense;
2495: PetscCall(PetscObjectTypeCompare((PetscObject)A, MATSEQDENSE, &isseqdense));
2496: if (!isseqdense) {
2497: PetscCall(MatConvert(A, MATSEQAIJ, MAT_INITIAL_MATRIX, &B));
2498: } else { /* TODO: rectangular case and LDA */
2499: PetscScalar *array;
2500: PetscReal chop = 1.e-6;
2502: PetscCall(MatDuplicate(A, MAT_COPY_VALUES, &B));
2503: PetscCall(MatDenseGetArray(B, &array));
2504: PetscCall(MatGetSize(B, &n, NULL));
2505: for (i = 0; i < n; i++) {
2506: PetscInt j;
2507: for (j = i + 1; j < n; j++) {
2508: PetscReal thresh = chop * (PetscAbsScalar(array[i * (n + 1)]) + PetscAbsScalar(array[j * (n + 1)]));
2509: if (PetscAbsScalar(array[i * n + j]) < thresh) array[i * n + j] = 0.;
2510: if (PetscAbsScalar(array[j * n + i]) < thresh) array[j * n + i] = 0.;
2511: }
2512: }
2513: PetscCall(MatDenseRestoreArray(B, &array));
2514: PetscCall(MatConvert(B, MATSEQAIJ, MAT_INPLACE_MATRIX, &B));
2515: }
2516: } else {
2517: PetscCall(PetscObjectReference((PetscObject)A));
2518: B = A;
2519: }
2520: PetscCall(MatGetRowIJ(B, 0, PETSC_TRUE, PETSC_FALSE, &n, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row));
2522: /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */
2523: if (filter) {
2524: PetscScalar *data;
2525: PetscInt j, cum;
2527: PetscCall(PetscCalloc2(n + 1, &xadj_filtered, xadj[n], &adjncy_filtered));
2528: PetscCall(MatSeqAIJGetArray(B, &data));
2529: cum = 0;
2530: for (i = 0; i < n; i++) {
2531: PetscInt t;
2533: for (j = xadj[i]; j < xadj[i + 1]; j++) {
2534: if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) continue;
2535: adjncy_filtered[cum + xadj_filtered[i]++] = adjncy[j];
2536: }
2537: t = xadj_filtered[i];
2538: xadj_filtered[i] = cum;
2539: cum += t;
2540: }
2541: PetscCall(MatSeqAIJRestoreArray(B, &data));
2542: graph->xadj = xadj_filtered;
2543: graph->adjncy = adjncy_filtered;
2544: } else {
2545: graph->xadj = xadj;
2546: graph->adjncy = adjncy;
2547: }
2548: }
2549: /* compute local connected components using PCBDDCGraph */
2550: graph->seq_graph = PETSC_TRUE; /* analyze local connected components (i.e. disconnected subdomains) irrespective of dofs count */
2551: PetscCall(ISCreateStride(PETSC_COMM_SELF, n, 0, 1, &is_dummy));
2552: PetscCall(ISLocalToGlobalMappingCreateIS(is_dummy, &l2gmap_dummy));
2553: PetscCall(ISDestroy(&is_dummy));
2554: PetscCall(PCBDDCGraphInit(graph, l2gmap_dummy, n, PETSC_INT_MAX));
2555: PetscCall(ISLocalToGlobalMappingDestroy(&l2gmap_dummy));
2556: PetscCall(PCBDDCGraphSetUp(graph, 1, NULL, NULL, 0, NULL, NULL));
2557: PetscCall(PCBDDCGraphComputeConnectedComponents(graph));
2559: /* partial clean up */
2560: PetscCall(PetscFree2(xadj_filtered, adjncy_filtered));
2561: if (B) {
2562: PetscBool flg_row;
2563: PetscCall(MatRestoreRowIJ(B, 0, PETSC_TRUE, PETSC_FALSE, &n, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row));
2564: PetscCall(MatDestroy(&B));
2565: }
2566: if (isplex) {
2567: PetscCall(PetscFree(xadj));
2568: PetscCall(PetscFree(adjncy));
2569: }
2571: /* get back data */
2572: if (isplex) {
2573: if (ncc) *ncc = graph->ncc;
2574: if (cc || primalv) {
2575: Mat A;
2576: PetscBT btv, btvt, btvc;
2577: PetscSection subSection;
2578: PetscInt *ids, cum, cump, *cids, *pids;
2579: PetscInt dim, cStart, cEnd, fStart, fEnd, vStart, vEnd, pStart, pEnd;
2581: PetscCall(DMGetDimension(dm, &dim));
2582: PetscCall(DMPlexGetSubdomainSection(dm, &subSection));
2583: PetscCall(DMPlexGetHeightStratum(dm, 1, &fStart, &fEnd));
2584: PetscCall(DMPlexGetHeightStratum(dm, 0, &cStart, &cEnd));
2585: PetscCall(DMPlexGetDepthStratum(dm, 0, &vStart, &vEnd));
2586: PetscCall(DMPlexGetChart(dm, &pStart, &pEnd));
2587: PetscCall(MatISGetLocalMat(pc->pmat, &A));
2588: PetscCall(PetscMalloc3(A->rmap->n, &ids, graph->ncc + 1, &cids, A->rmap->n, &pids));
2589: PetscCall(PetscBTCreate(A->rmap->n, &btv));
2590: PetscCall(PetscBTCreate(A->rmap->n, &btvt));
2591: PetscCall(PetscBTCreate(pEnd - pStart, &btvc));
2593: /* First see if we find corners for the subdomains, i.e. a vertex
2594: shared by at least dim subdomain boundary faces. This does not
2595: cover all the possible cases with simplices but it is enough
2596: for tensor cells */
2597: if (vStart != fStart && dim <= 3) {
2598: for (PetscInt c = cStart; c < cEnd; c++) {
2599: PetscInt nf, cnt = 0, mcnt = dim, *cfaces;
2600: const PetscInt *faces;
2602: PetscCall(DMPlexGetConeSize(dm, c, &nf));
2603: PetscCall(DMGetWorkArray(dm, nf, MPIU_INT, &cfaces));
2604: PetscCall(DMPlexGetCone(dm, c, &faces));
2605: for (PetscInt f = 0; f < nf; f++) {
2606: PetscInt nc, ff;
2608: PetscCall(DMPlexGetSupportSize(dm, faces[f], &nc));
2609: PetscCall(DMPlexGetTreeParent(dm, faces[f], &ff, NULL));
2610: if (nc == 1 && faces[f] == ff) cfaces[cnt++] = faces[f];
2611: }
2612: if (cnt >= mcnt) {
2613: PetscInt size, *closure = NULL;
2615: PetscCall(DMPlexGetTransitiveClosure(dm, c, PETSC_TRUE, &size, &closure));
2616: for (PetscInt k = 0; k < 2 * size; k += 2) {
2617: PetscInt v = closure[k];
2618: if (v >= vStart && v < vEnd) {
2619: PetscInt vsize, *vclosure = NULL;
2621: cnt = 0;
2622: PetscCall(DMPlexGetTransitiveClosure(dm, v, PETSC_FALSE, &vsize, &vclosure));
2623: for (PetscInt vk = 0; vk < 2 * vsize; vk += 2) {
2624: PetscInt f = vclosure[vk];
2625: if (f >= fStart && f < fEnd) {
2626: PetscInt nc, ff;
2627: PetscBool valid = PETSC_FALSE;
2629: for (PetscInt fk = 0; fk < nf; fk++)
2630: if (f == cfaces[fk]) valid = PETSC_TRUE;
2631: if (!valid) continue;
2632: PetscCall(DMPlexGetSupportSize(dm, f, &nc));
2633: PetscCall(DMPlexGetTreeParent(dm, f, &ff, NULL));
2634: if (nc == 1 && f == ff) cnt++;
2635: }
2636: }
2637: if (cnt >= mcnt) PetscCall(PetscBTSet(btvc, v - pStart));
2638: PetscCall(DMPlexRestoreTransitiveClosure(dm, v, PETSC_FALSE, &vsize, &vclosure));
2639: }
2640: }
2641: PetscCall(DMPlexRestoreTransitiveClosure(dm, c, PETSC_TRUE, &size, &closure));
2642: }
2643: PetscCall(DMRestoreWorkArray(dm, nf, MPIU_INT, &cfaces));
2644: }
2645: }
2647: cids[0] = 0;
2648: for (i = 0, cump = 0, cum = 0; i < graph->ncc; i++) {
2649: PetscInt j;
2651: PetscCall(PetscBTMemzero(A->rmap->n, btvt));
2652: for (j = graph->cptr[i]; j < graph->cptr[i + 1]; j++) {
2653: PetscInt k, size, *closure = NULL, cell = graph->queue[j];
2655: PetscCall(DMPlexGetTransitiveClosure(dm, cell, PETSC_TRUE, &size, &closure));
2656: for (k = 0; k < 2 * size; k += 2) {
2657: PetscInt s, pp, p = closure[k], off, dof, cdof;
2659: PetscCall(PetscSectionGetConstraintDof(subSection, p, &cdof));
2660: PetscCall(PetscSectionGetOffset(subSection, p, &off));
2661: PetscCall(PetscSectionGetDof(subSection, p, &dof));
2662: for (s = 0; s < dof - cdof; s++) {
2663: if (PetscBTLookupSet(btvt, off + s)) continue;
2664: if (PetscBTLookup(btvc, p - pStart)) pids[cump++] = off + s; /* subdomain corner */
2665: else if (!PetscBTLookup(btv, off + s)) ids[cum++] = off + s;
2666: else pids[cump++] = off + s; /* cross-vertex */
2667: }
2668: PetscCall(DMPlexGetTreeParent(dm, p, &pp, NULL));
2669: if (pp != p) {
2670: PetscCall(PetscSectionGetConstraintDof(subSection, pp, &cdof));
2671: PetscCall(PetscSectionGetOffset(subSection, pp, &off));
2672: PetscCall(PetscSectionGetDof(subSection, pp, &dof));
2673: for (s = 0; s < dof - cdof; s++) {
2674: if (PetscBTLookupSet(btvt, off + s)) continue;
2675: if (PetscBTLookup(btvc, pp - pStart)) pids[cump++] = off + s; /* subdomain corner */
2676: else if (!PetscBTLookup(btv, off + s)) ids[cum++] = off + s;
2677: else pids[cump++] = off + s; /* cross-vertex */
2678: }
2679: }
2680: }
2681: PetscCall(DMPlexRestoreTransitiveClosure(dm, cell, PETSC_TRUE, &size, &closure));
2682: }
2683: cids[i + 1] = cum;
2684: /* mark dofs as already assigned */
2685: for (j = cids[i]; j < cids[i + 1]; j++) PetscCall(PetscBTSet(btv, ids[j]));
2686: }
2687: if (cc) {
2688: PetscCall(PetscMalloc1(graph->ncc, &cc_n));
2689: for (i = 0; i < graph->ncc; i++) PetscCall(ISCreateGeneral(PETSC_COMM_SELF, cids[i + 1] - cids[i], ids + cids[i], PETSC_COPY_VALUES, &cc_n[i]));
2690: *cc = cc_n;
2691: }
2692: if (primalv) PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), cump, pids, PETSC_COPY_VALUES, primalv));
2693: PetscCall(PetscFree3(ids, cids, pids));
2694: PetscCall(PetscBTDestroy(&btv));
2695: PetscCall(PetscBTDestroy(&btvt));
2696: PetscCall(PetscBTDestroy(&btvc));
2697: PetscCall(DMDestroy(&dm));
2698: }
2699: } else {
2700: if (ncc) *ncc = graph->ncc;
2701: if (cc) {
2702: PetscCall(PetscMalloc1(graph->ncc, &cc_n));
2703: for (i = 0; i < graph->ncc; i++) PetscCall(ISCreateGeneral(PETSC_COMM_SELF, graph->cptr[i + 1] - graph->cptr[i], graph->queue + graph->cptr[i], PETSC_COPY_VALUES, &cc_n[i]));
2704: *cc = cc_n;
2705: }
2706: }
2707: /* clean up graph */
2708: graph->xadj = NULL;
2709: graph->adjncy = NULL;
2710: PetscCall(PCBDDCGraphDestroy(&graph));
2711: PetscFunctionReturn(PETSC_SUCCESS);
2712: }
2714: PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag)
2715: {
2716: PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
2717: PC_IS *pcis = (PC_IS *)pc->data;
2718: IS dirIS = NULL;
2719: PetscInt i;
2721: PetscFunctionBegin;
2722: PetscCall(PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph, &dirIS));
2723: if (zerodiag) {
2724: Mat A;
2725: Vec vec3_N;
2726: PetscScalar *vals;
2727: const PetscInt *idxs;
2728: PetscInt nz, *count;
2730: /* p0 */
2731: PetscCall(VecSet(pcis->vec1_N, 0.));
2732: PetscCall(PetscMalloc1(pcis->n, &vals));
2733: PetscCall(ISGetLocalSize(zerodiag, &nz));
2734: PetscCall(ISGetIndices(zerodiag, &idxs));
2735: for (i = 0; i < nz; i++) vals[i] = 1.;
2736: PetscCall(VecSetValues(pcis->vec1_N, nz, idxs, vals, INSERT_VALUES));
2737: PetscCall(VecAssemblyBegin(pcis->vec1_N));
2738: PetscCall(VecAssemblyEnd(pcis->vec1_N));
2739: /* v_I */
2740: PetscCall(VecSetRandom(pcis->vec2_N, NULL));
2741: for (i = 0; i < nz; i++) vals[i] = 0.;
2742: PetscCall(VecSetValues(pcis->vec2_N, nz, idxs, vals, INSERT_VALUES));
2743: PetscCall(ISRestoreIndices(zerodiag, &idxs));
2744: PetscCall(ISGetIndices(pcis->is_B_local, &idxs));
2745: for (i = 0; i < pcis->n_B; i++) vals[i] = 0.;
2746: PetscCall(VecSetValues(pcis->vec2_N, pcis->n_B, idxs, vals, INSERT_VALUES));
2747: PetscCall(ISRestoreIndices(pcis->is_B_local, &idxs));
2748: if (dirIS) {
2749: PetscInt n;
2751: PetscCall(ISGetLocalSize(dirIS, &n));
2752: PetscCall(ISGetIndices(dirIS, &idxs));
2753: for (i = 0; i < n; i++) vals[i] = 0.;
2754: PetscCall(VecSetValues(pcis->vec2_N, n, idxs, vals, INSERT_VALUES));
2755: PetscCall(ISRestoreIndices(dirIS, &idxs));
2756: }
2757: PetscCall(VecAssemblyBegin(pcis->vec2_N));
2758: PetscCall(VecAssemblyEnd(pcis->vec2_N));
2759: PetscCall(VecDuplicate(pcis->vec1_N, &vec3_N));
2760: PetscCall(VecSet(vec3_N, 0.));
2761: PetscCall(MatISGetLocalMat(pc->pmat, &A));
2762: PetscCall(MatMult(A, pcis->vec1_N, vec3_N));
2763: PetscCall(VecDot(vec3_N, pcis->vec2_N, &vals[0]));
2764: PetscCheck(PetscAbsScalar(vals[0]) <= 1.e-1, PETSC_COMM_SELF, PETSC_ERR_SUP, "Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)", (double)PetscAbsScalar(vals[0]));
2765: PetscCall(PetscFree(vals));
2766: PetscCall(VecDestroy(&vec3_N));
2768: /* there should not be any pressure dofs lying on the interface */
2769: PetscCall(PetscCalloc1(pcis->n, &count));
2770: PetscCall(ISGetIndices(pcis->is_B_local, &idxs));
2771: for (i = 0; i < pcis->n_B; i++) count[idxs[i]]++;
2772: PetscCall(ISRestoreIndices(pcis->is_B_local, &idxs));
2773: PetscCall(ISGetIndices(zerodiag, &idxs));
2774: for (i = 0; i < nz; i++) PetscCheck(!count[idxs[i]], PETSC_COMM_SELF, PETSC_ERR_SUP, "Benign trick can not be applied! pressure dof %" PetscInt_FMT " is an interface dof", idxs[i]);
2775: PetscCall(ISRestoreIndices(zerodiag, &idxs));
2776: PetscCall(PetscFree(count));
2777: }
2778: PetscCall(ISDestroy(&dirIS));
2780: /* check PCBDDCBenignGetOrSetP0 */
2781: PetscCall(VecSetRandom(pcis->vec1_global, NULL));
2782: for (i = 0; i < pcbddc->benign_n; i++) pcbddc->benign_p0[i] = -PetscGlobalRank - i;
2783: PetscCall(PCBDDCBenignGetOrSetP0(pc, pcis->vec1_global, PETSC_FALSE));
2784: for (i = 0; i < pcbddc->benign_n; i++) pcbddc->benign_p0[i] = 1;
2785: PetscCall(PCBDDCBenignGetOrSetP0(pc, pcis->vec1_global, PETSC_TRUE));
2786: for (i = 0; i < pcbddc->benign_n; i++) {
2787: PetscInt val = PetscRealPart(pcbddc->benign_p0[i]);
2788: PetscCheck(val == -PetscGlobalRank - i, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error testing PCBDDCBenignGetOrSetP0! Found %g at %" PetscInt_FMT " instead of %g", (double)PetscRealPart(pcbddc->benign_p0[i]), i, (double)(-PetscGlobalRank - i));
2789: }
2790: PetscFunctionReturn(PETSC_SUCCESS);
2791: }
2793: PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, PetscBool reuse, IS *zerodiaglocal)
2794: {
2795: PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
2796: Mat_IS *matis = (Mat_IS *)pc->pmat->data;
2797: IS pressures = NULL, zerodiag = NULL, *bzerodiag = NULL, zerodiag_save, *zerodiag_subs;
2798: PetscInt nz, n, benign_n, bsp = 1;
2799: PetscInt *interior_dofs, n_interior_dofs, nneu;
2800: PetscBool sorted, have_null, has_null_pressures, recompute_zerodiag, checkb;
2802: PetscFunctionBegin;
2803: if (reuse) goto project_b0;
2804: PetscCall(PetscSFDestroy(&pcbddc->benign_sf));
2805: PetscCall(MatDestroy(&pcbddc->benign_B0));
2806: for (n = 0; n < pcbddc->benign_n; n++) PetscCall(ISDestroy(&pcbddc->benign_zerodiag_subs[n]));
2807: PetscCall(PetscFree(pcbddc->benign_zerodiag_subs));
2808: has_null_pressures = PETSC_TRUE;
2809: have_null = PETSC_TRUE;
2810: /* if a local information on dofs is present, gets pressure dofs from command line (uses the last field is not provided)
2811: Without local information, it uses only the zerodiagonal dofs (ok if the pressure block is all zero and it is a scalar field)
2812: Checks if all the pressure dofs in each subdomain have a zero diagonal
2813: If not, a change of basis on pressures is not needed
2814: since the local Schur complements are already SPD
2815: */
2816: if (pcbddc->n_ISForDofsLocal) {
2817: IS iP = NULL;
2818: PetscInt p, *pp;
2819: PetscBool flg, blocked = PETSC_FALSE;
2821: PetscCall(PetscMalloc1(pcbddc->n_ISForDofsLocal, &pp));
2822: n = pcbddc->n_ISForDofsLocal;
2823: PetscOptionsBegin(PetscObjectComm((PetscObject)pc), ((PetscObject)pc)->prefix, "BDDC benign options", "PC");
2824: PetscCall(PetscOptionsIntArray("-pc_bddc_pressure_field", "Field id for pressures", NULL, pp, &n, &flg));
2825: PetscCall(PetscOptionsBool("-pc_bddc_pressure_blocked", "Use blocked pressure fields", NULL, blocked, &blocked, NULL));
2826: PetscOptionsEnd();
2827: if (!flg) {
2828: n = 1;
2829: pp[0] = pcbddc->n_ISForDofsLocal - 1;
2830: }
2832: bsp = 0;
2833: for (p = 0; p < n; p++) {
2834: PetscInt bs = 1;
2836: PetscCheck(pp[p] >= 0 && pp[p] < pcbddc->n_ISForDofsLocal, PetscObjectComm((PetscObject)pc), PETSC_ERR_USER, "Invalid field id for pressures %" PetscInt_FMT, pp[p]);
2837: if (blocked) PetscCall(ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]], &bs));
2838: bsp += bs;
2839: }
2840: PetscCall(PetscMalloc1(bsp, &bzerodiag));
2841: bsp = 0;
2842: for (p = 0; p < n; p++) {
2843: const PetscInt *idxs;
2844: PetscInt b, bs = 1, npl, *bidxs;
2846: if (blocked) PetscCall(ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]], &bs));
2847: PetscCall(ISGetLocalSize(pcbddc->ISForDofsLocal[pp[p]], &npl));
2848: PetscCall(ISGetIndices(pcbddc->ISForDofsLocal[pp[p]], &idxs));
2849: PetscCall(PetscMalloc1(npl / bs, &bidxs));
2850: for (b = 0; b < bs; b++) {
2851: PetscInt i;
2853: for (i = 0; i < npl / bs; i++) bidxs[i] = idxs[bs * i + b];
2854: PetscCall(ISCreateGeneral(PETSC_COMM_SELF, npl / bs, bidxs, PETSC_COPY_VALUES, &bzerodiag[bsp]));
2855: bsp++;
2856: }
2857: PetscCall(PetscFree(bidxs));
2858: PetscCall(ISRestoreIndices(pcbddc->ISForDofsLocal[pp[p]], &idxs));
2859: }
2860: PetscCall(ISConcatenate(PETSC_COMM_SELF, bsp, bzerodiag, &pressures));
2862: /* remove zeroed out pressures if we are setting up a BDDC solver for a saddle-point FETI-DP */
2863: PetscCall(PetscObjectQuery((PetscObject)pc, "__KSPFETIDP_lP", (PetscObject *)&iP));
2864: if (iP) {
2865: IS newpressures;
2867: PetscCall(ISDifference(pressures, iP, &newpressures));
2868: PetscCall(ISDestroy(&pressures));
2869: pressures = newpressures;
2870: }
2871: PetscCall(ISSorted(pressures, &sorted));
2872: if (!sorted) PetscCall(ISSort(pressures));
2873: PetscCall(PetscFree(pp));
2874: }
2876: /* pcis has not been setup yet, so get the local size from the subdomain matrix */
2877: PetscCall(MatGetLocalSize(pcbddc->local_mat, &n, NULL));
2878: if (!n) pcbddc->benign_change_explicit = PETSC_TRUE;
2879: PetscCall(MatFindZeroDiagonals(pcbddc->local_mat, &zerodiag));
2880: PetscCall(ISSorted(zerodiag, &sorted));
2881: if (!sorted) PetscCall(ISSort(zerodiag));
2882: PetscCall(PetscObjectReference((PetscObject)zerodiag));
2883: zerodiag_save = zerodiag;
2884: PetscCall(ISGetLocalSize(zerodiag, &nz));
2885: if (!nz) {
2886: if (n) have_null = PETSC_FALSE;
2887: has_null_pressures = PETSC_FALSE;
2888: PetscCall(ISDestroy(&zerodiag));
2889: }
2890: recompute_zerodiag = PETSC_FALSE;
2892: /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */
2893: zerodiag_subs = NULL;
2894: benign_n = 0;
2895: n_interior_dofs = 0;
2896: interior_dofs = NULL;
2897: nneu = 0;
2898: if (pcbddc->NeumannBoundariesLocal) PetscCall(ISGetLocalSize(pcbddc->NeumannBoundariesLocal, &nneu));
2899: checkb = (PetscBool)(!pcbddc->NeumannBoundariesLocal || pcbddc->current_level);
2900: if (checkb) { /* need to compute interior nodes */
2901: PetscInt n, i;
2902: PetscInt *count;
2903: ISLocalToGlobalMapping mapping;
2905: PetscCall(MatISGetLocalToGlobalMapping(pc->pmat, &mapping, NULL));
2906: PetscCall(ISLocalToGlobalMappingGetNodeInfo(mapping, &n, &count, NULL));
2907: PetscCall(PetscMalloc1(n, &interior_dofs));
2908: for (i = 0; i < n; i++)
2909: if (count[i] < 2) interior_dofs[n_interior_dofs++] = i;
2910: PetscCall(ISLocalToGlobalMappingRestoreNodeInfo(mapping, &n, &count, NULL));
2911: }
2912: if (has_null_pressures) {
2913: IS *subs;
2914: PetscInt nsubs, i, j, nl;
2915: const PetscInt *idxs;
2916: PetscScalar *array;
2917: Vec *work;
2919: subs = pcbddc->local_subs;
2920: nsubs = pcbddc->n_local_subs;
2921: /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */
2922: if (checkb) {
2923: PetscCall(VecDuplicateVecs(matis->y, 2, &work));
2924: PetscCall(ISGetLocalSize(zerodiag, &nl));
2925: PetscCall(ISGetIndices(zerodiag, &idxs));
2926: /* work[0] = 1_p */
2927: PetscCall(VecSet(work[0], 0.));
2928: PetscCall(VecGetArray(work[0], &array));
2929: for (j = 0; j < nl; j++) array[idxs[j]] = 1.;
2930: PetscCall(VecRestoreArray(work[0], &array));
2931: /* work[0] = 1_v */
2932: PetscCall(VecSet(work[1], 1.));
2933: PetscCall(VecGetArray(work[1], &array));
2934: for (j = 0; j < nl; j++) array[idxs[j]] = 0.;
2935: PetscCall(VecRestoreArray(work[1], &array));
2936: PetscCall(ISRestoreIndices(zerodiag, &idxs));
2937: }
2939: if (nsubs > 1 || bsp > 1) {
2940: IS *is;
2941: PetscInt b, totb;
2943: totb = bsp;
2944: is = bsp > 1 ? bzerodiag : &zerodiag;
2945: nsubs = PetscMax(nsubs, 1);
2946: PetscCall(PetscCalloc1(nsubs * totb, &zerodiag_subs));
2947: for (b = 0; b < totb; b++) {
2948: for (i = 0; i < nsubs; i++) {
2949: ISLocalToGlobalMapping l2g;
2950: IS t_zerodiag_subs;
2951: PetscInt nl;
2953: if (subs) {
2954: PetscCall(ISLocalToGlobalMappingCreateIS(subs[i], &l2g));
2955: } else {
2956: IS tis;
2958: PetscCall(MatGetLocalSize(pcbddc->local_mat, &nl, NULL));
2959: PetscCall(ISCreateStride(PETSC_COMM_SELF, nl, 0, 1, &tis));
2960: PetscCall(ISLocalToGlobalMappingCreateIS(tis, &l2g));
2961: PetscCall(ISDestroy(&tis));
2962: }
2963: PetscCall(ISGlobalToLocalMappingApplyIS(l2g, IS_GTOLM_DROP, is[b], &t_zerodiag_subs));
2964: PetscCall(ISGetLocalSize(t_zerodiag_subs, &nl));
2965: if (nl) {
2966: PetscBool valid = PETSC_TRUE;
2968: if (checkb) {
2969: PetscCall(VecSet(matis->x, 0));
2970: PetscCall(ISGetLocalSize(subs[i], &nl));
2971: PetscCall(ISGetIndices(subs[i], &idxs));
2972: PetscCall(VecGetArray(matis->x, &array));
2973: for (j = 0; j < nl; j++) array[idxs[j]] = 1.;
2974: PetscCall(VecRestoreArray(matis->x, &array));
2975: PetscCall(ISRestoreIndices(subs[i], &idxs));
2976: PetscCall(VecPointwiseMult(matis->x, work[0], matis->x));
2977: PetscCall(MatMult(matis->A, matis->x, matis->y));
2978: PetscCall(VecPointwiseMult(matis->y, work[1], matis->y));
2979: PetscCall(VecGetArray(matis->y, &array));
2980: for (j = 0; j < n_interior_dofs; j++) {
2981: if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) {
2982: valid = PETSC_FALSE;
2983: break;
2984: }
2985: }
2986: PetscCall(VecRestoreArray(matis->y, &array));
2987: }
2988: if (valid && nneu) {
2989: const PetscInt *idxs;
2990: PetscInt nzb;
2992: PetscCall(ISGetIndices(pcbddc->NeumannBoundariesLocal, &idxs));
2993: PetscCall(ISGlobalToLocalMappingApply(l2g, IS_GTOLM_DROP, nneu, idxs, &nzb, NULL));
2994: PetscCall(ISRestoreIndices(pcbddc->NeumannBoundariesLocal, &idxs));
2995: if (nzb) valid = PETSC_FALSE;
2996: }
2997: if (valid && pressures) {
2998: IS t_pressure_subs, tmp;
2999: PetscInt i1, i2;
3001: PetscCall(ISGlobalToLocalMappingApplyIS(l2g, IS_GTOLM_DROP, pressures, &t_pressure_subs));
3002: PetscCall(ISEmbed(t_zerodiag_subs, t_pressure_subs, PETSC_TRUE, &tmp));
3003: PetscCall(ISGetLocalSize(tmp, &i1));
3004: PetscCall(ISGetLocalSize(t_zerodiag_subs, &i2));
3005: if (i2 != i1) valid = PETSC_FALSE;
3006: PetscCall(ISDestroy(&t_pressure_subs));
3007: PetscCall(ISDestroy(&tmp));
3008: }
3009: if (valid) {
3010: PetscCall(ISLocalToGlobalMappingApplyIS(l2g, t_zerodiag_subs, &zerodiag_subs[benign_n]));
3011: benign_n++;
3012: } else recompute_zerodiag = PETSC_TRUE;
3013: }
3014: PetscCall(ISDestroy(&t_zerodiag_subs));
3015: PetscCall(ISLocalToGlobalMappingDestroy(&l2g));
3016: }
3017: }
3018: } else { /* there's just one subdomain (or zero if they have not been detected */
3019: PetscBool valid = PETSC_TRUE;
3021: if (nneu) valid = PETSC_FALSE;
3022: if (valid && pressures) PetscCall(ISEqual(pressures, zerodiag, &valid));
3023: if (valid && checkb) {
3024: PetscCall(MatMult(matis->A, work[0], matis->x));
3025: PetscCall(VecPointwiseMult(matis->x, work[1], matis->x));
3026: PetscCall(VecGetArray(matis->x, &array));
3027: for (j = 0; j < n_interior_dofs; j++) {
3028: if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) {
3029: valid = PETSC_FALSE;
3030: break;
3031: }
3032: }
3033: PetscCall(VecRestoreArray(matis->x, &array));
3034: }
3035: if (valid) {
3036: benign_n = 1;
3037: PetscCall(PetscMalloc1(benign_n, &zerodiag_subs));
3038: PetscCall(PetscObjectReference((PetscObject)zerodiag));
3039: zerodiag_subs[0] = zerodiag;
3040: }
3041: }
3042: if (checkb) PetscCall(VecDestroyVecs(2, &work));
3043: }
3044: PetscCall(PetscFree(interior_dofs));
3046: if (!benign_n) {
3047: PetscInt n;
3049: PetscCall(ISDestroy(&zerodiag));
3050: recompute_zerodiag = PETSC_FALSE;
3051: PetscCall(MatGetLocalSize(pcbddc->local_mat, &n, NULL));
3052: if (n) have_null = PETSC_FALSE;
3053: }
3055: /* final check for null pressures */
3056: if (zerodiag && pressures) PetscCall(ISEqual(pressures, zerodiag, &have_null));
3058: if (recompute_zerodiag) {
3059: PetscCall(ISDestroy(&zerodiag));
3060: if (benign_n == 1) {
3061: PetscCall(PetscObjectReference((PetscObject)zerodiag_subs[0]));
3062: zerodiag = zerodiag_subs[0];
3063: } else {
3064: PetscInt i, nzn, *new_idxs;
3066: nzn = 0;
3067: for (i = 0; i < benign_n; i++) {
3068: PetscInt ns;
3069: PetscCall(ISGetLocalSize(zerodiag_subs[i], &ns));
3070: nzn += ns;
3071: }
3072: PetscCall(PetscMalloc1(nzn, &new_idxs));
3073: nzn = 0;
3074: for (i = 0; i < benign_n; i++) {
3075: PetscInt ns, *idxs;
3076: PetscCall(ISGetLocalSize(zerodiag_subs[i], &ns));
3077: PetscCall(ISGetIndices(zerodiag_subs[i], (const PetscInt **)&idxs));
3078: PetscCall(PetscArraycpy(new_idxs + nzn, idxs, ns));
3079: PetscCall(ISRestoreIndices(zerodiag_subs[i], (const PetscInt **)&idxs));
3080: nzn += ns;
3081: }
3082: PetscCall(PetscSortInt(nzn, new_idxs));
3083: PetscCall(ISCreateGeneral(PETSC_COMM_SELF, nzn, new_idxs, PETSC_OWN_POINTER, &zerodiag));
3084: }
3085: have_null = PETSC_FALSE;
3086: }
3088: /* determines if the coarse solver will be singular or not */
3089: PetscCallMPI(MPIU_Allreduce(&have_null, &pcbddc->benign_null, 1, MPI_C_BOOL, MPI_LAND, PetscObjectComm((PetscObject)pc)));
3091: /* Prepare matrix to compute no-net-flux */
3092: if (pcbddc->compute_nonetflux && !pcbddc->divudotp) {
3093: Mat A, loc_divudotp;
3094: ISLocalToGlobalMapping rl2g, cl2g, l2gmap;
3095: IS row, col, isused = NULL;
3096: PetscInt M, N, n, st, n_isused;
3098: if (pressures) {
3099: isused = pressures;
3100: } else {
3101: isused = zerodiag_save;
3102: }
3103: PetscCall(MatISGetLocalToGlobalMapping(pc->pmat, &l2gmap, NULL));
3104: PetscCall(MatISGetLocalMat(pc->pmat, &A));
3105: PetscCall(MatGetLocalSize(A, &n, NULL));
3106: PetscCheck(isused || (n == 0), PETSC_COMM_SELF, PETSC_ERR_USER, "Don't know how to extract div u dot p! Please provide the pressure field");
3107: n_isused = 0;
3108: if (isused) PetscCall(ISGetLocalSize(isused, &n_isused));
3109: PetscCallMPI(MPI_Scan(&n_isused, &st, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc)));
3110: st = st - n_isused;
3111: if (n) {
3112: const PetscInt *gidxs;
3114: PetscCall(MatCreateSubMatrix(A, isused, NULL, MAT_INITIAL_MATRIX, &loc_divudotp));
3115: PetscCall(ISLocalToGlobalMappingGetIndices(l2gmap, &gidxs));
3116: /* TODO: extend ISCreateStride with st = PETSC_DECIDE */
3117: PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc), n_isused, st, 1, &row));
3118: PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), n, gidxs, PETSC_COPY_VALUES, &col));
3119: PetscCall(ISLocalToGlobalMappingRestoreIndices(l2gmap, &gidxs));
3120: } else {
3121: PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, 0, 0, 1, NULL, &loc_divudotp));
3122: PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc), n_isused, st, 1, &row));
3123: PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), 0, NULL, PETSC_COPY_VALUES, &col));
3124: }
3125: PetscCall(MatGetSize(pc->pmat, NULL, &N));
3126: PetscCall(ISGetSize(row, &M));
3127: PetscCall(ISLocalToGlobalMappingCreateIS(row, &rl2g));
3128: PetscCall(ISLocalToGlobalMappingCreateIS(col, &cl2g));
3129: PetscCall(ISDestroy(&row));
3130: PetscCall(ISDestroy(&col));
3131: PetscCall(MatCreate(PetscObjectComm((PetscObject)pc), &pcbddc->divudotp));
3132: PetscCall(MatSetType(pcbddc->divudotp, MATIS));
3133: PetscCall(MatSetSizes(pcbddc->divudotp, PETSC_DECIDE, PETSC_DECIDE, M, N));
3134: PetscCall(MatSetLocalToGlobalMapping(pcbddc->divudotp, rl2g, cl2g));
3135: PetscCall(ISLocalToGlobalMappingDestroy(&rl2g));
3136: PetscCall(ISLocalToGlobalMappingDestroy(&cl2g));
3137: PetscCall(MatISSetLocalMat(pcbddc->divudotp, loc_divudotp));
3138: PetscCall(MatDestroy(&loc_divudotp));
3139: PetscCall(MatAssemblyBegin(pcbddc->divudotp, MAT_FINAL_ASSEMBLY));
3140: PetscCall(MatAssemblyEnd(pcbddc->divudotp, MAT_FINAL_ASSEMBLY));
3141: }
3142: PetscCall(ISDestroy(&zerodiag_save));
3143: PetscCall(ISDestroy(&pressures));
3144: if (bzerodiag) {
3145: PetscInt i;
3147: for (i = 0; i < bsp; i++) PetscCall(ISDestroy(&bzerodiag[i]));
3148: PetscCall(PetscFree(bzerodiag));
3149: }
3150: pcbddc->benign_n = benign_n;
3151: pcbddc->benign_zerodiag_subs = zerodiag_subs;
3153: /* determines if the problem has subdomains with 0 pressure block */
3154: have_null = (PetscBool)(!!pcbddc->benign_n);
3155: PetscCallMPI(MPIU_Allreduce(&have_null, &pcbddc->benign_have_null, 1, MPI_C_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc)));
3157: project_b0:
3158: PetscCall(MatGetLocalSize(pcbddc->local_mat, &n, NULL));
3159: /* change of basis and p0 dofs */
3160: if (pcbddc->benign_n) {
3161: PetscInt i, s, *nnz;
3163: /* local change of basis for pressures */
3164: PetscCall(MatDestroy(&pcbddc->benign_change));
3165: PetscCall(MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat), &pcbddc->benign_change));
3166: PetscCall(MatSetType(pcbddc->benign_change, MATAIJ));
3167: PetscCall(MatSetSizes(pcbddc->benign_change, n, n, PETSC_DECIDE, PETSC_DECIDE));
3168: PetscCall(PetscMalloc1(n, &nnz));
3169: for (i = 0; i < n; i++) nnz[i] = 1; /* defaults to identity */
3170: for (i = 0; i < pcbddc->benign_n; i++) {
3171: const PetscInt *idxs;
3172: PetscInt nzs, j;
3174: PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[i], &nzs));
3175: PetscCall(ISGetIndices(pcbddc->benign_zerodiag_subs[i], &idxs));
3176: for (j = 0; j < nzs - 1; j++) nnz[idxs[j]] = 2; /* change on pressures */
3177: nnz[idxs[nzs - 1]] = nzs; /* last local pressure dof in subdomain */
3178: PetscCall(ISRestoreIndices(pcbddc->benign_zerodiag_subs[i], &idxs));
3179: }
3180: PetscCall(MatSeqAIJSetPreallocation(pcbddc->benign_change, 0, nnz));
3181: PetscCall(MatSetOption(pcbddc->benign_change, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
3182: PetscCall(PetscFree(nnz));
3183: /* set identity by default */
3184: for (i = 0; i < n; i++) PetscCall(MatSetValue(pcbddc->benign_change, i, i, 1., INSERT_VALUES));
3185: PetscCall(PetscFree3(pcbddc->benign_p0_lidx, pcbddc->benign_p0_gidx, pcbddc->benign_p0));
3186: PetscCall(PetscMalloc3(pcbddc->benign_n, &pcbddc->benign_p0_lidx, pcbddc->benign_n, &pcbddc->benign_p0_gidx, pcbddc->benign_n, &pcbddc->benign_p0));
3187: /* set change on pressures */
3188: for (s = 0; s < pcbddc->benign_n; s++) {
3189: PetscScalar *array;
3190: const PetscInt *idxs;
3191: PetscInt nzs;
3193: PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[s], &nzs));
3194: PetscCall(ISGetIndices(pcbddc->benign_zerodiag_subs[s], &idxs));
3195: for (i = 0; i < nzs - 1; i++) {
3196: PetscScalar vals[2];
3197: PetscInt cols[2];
3199: cols[0] = idxs[i];
3200: cols[1] = idxs[nzs - 1];
3201: vals[0] = 1.;
3202: vals[1] = 1.;
3203: PetscCall(MatSetValues(pcbddc->benign_change, 1, cols, 2, cols, vals, INSERT_VALUES));
3204: }
3205: PetscCall(PetscMalloc1(nzs, &array));
3206: for (i = 0; i < nzs - 1; i++) array[i] = -1.;
3207: array[nzs - 1] = 1.;
3208: PetscCall(MatSetValues(pcbddc->benign_change, 1, idxs + nzs - 1, nzs, idxs, array, INSERT_VALUES));
3209: /* store local idxs for p0 */
3210: pcbddc->benign_p0_lidx[s] = idxs[nzs - 1];
3211: PetscCall(ISRestoreIndices(pcbddc->benign_zerodiag_subs[s], &idxs));
3212: PetscCall(PetscFree(array));
3213: }
3214: PetscCall(MatAssemblyBegin(pcbddc->benign_change, MAT_FINAL_ASSEMBLY));
3215: PetscCall(MatAssemblyEnd(pcbddc->benign_change, MAT_FINAL_ASSEMBLY));
3217: /* project if needed */
3218: if (pcbddc->benign_change_explicit) {
3219: Mat M;
3221: PetscCall(MatPtAP(pcbddc->local_mat, pcbddc->benign_change, MAT_INITIAL_MATRIX, 2.0, &M));
3222: PetscCall(MatDestroy(&pcbddc->local_mat));
3223: PetscCall(MatSeqAIJCompress(M, &pcbddc->local_mat));
3224: PetscCall(MatDestroy(&M));
3225: }
3226: /* store global idxs for p0 */
3227: PetscCall(ISLocalToGlobalMappingApply(matis->rmapping, pcbddc->benign_n, pcbddc->benign_p0_lidx, pcbddc->benign_p0_gidx));
3228: }
3229: *zerodiaglocal = zerodiag;
3230: PetscFunctionReturn(PETSC_SUCCESS);
3231: }
3233: PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get)
3234: {
3235: PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
3236: PetscScalar *array;
3238: PetscFunctionBegin;
3239: if (!pcbddc->benign_sf) {
3240: PetscCall(PetscSFCreate(PetscObjectComm((PetscObject)pc), &pcbddc->benign_sf));
3241: PetscCall(PetscSFSetGraphLayout(pcbddc->benign_sf, pc->pmat->rmap, pcbddc->benign_n, NULL, PETSC_OWN_POINTER, pcbddc->benign_p0_gidx));
3242: }
3243: if (get) {
3244: PetscCall(VecGetArrayRead(v, (const PetscScalar **)&array));
3245: PetscCall(PetscSFBcastBegin(pcbddc->benign_sf, MPIU_SCALAR, array, pcbddc->benign_p0, MPI_REPLACE));
3246: PetscCall(PetscSFBcastEnd(pcbddc->benign_sf, MPIU_SCALAR, array, pcbddc->benign_p0, MPI_REPLACE));
3247: PetscCall(VecRestoreArrayRead(v, (const PetscScalar **)&array));
3248: } else {
3249: PetscCall(VecGetArray(v, &array));
3250: PetscCall(PetscSFReduceBegin(pcbddc->benign_sf, MPIU_SCALAR, pcbddc->benign_p0, array, MPI_REPLACE));
3251: PetscCall(PetscSFReduceEnd(pcbddc->benign_sf, MPIU_SCALAR, pcbddc->benign_p0, array, MPI_REPLACE));
3252: PetscCall(VecRestoreArray(v, &array));
3253: }
3254: PetscFunctionReturn(PETSC_SUCCESS);
3255: }
3257: PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop)
3258: {
3259: PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
3261: PetscFunctionBegin;
3262: /* TODO: add error checking
3263: - avoid nested pop (or push) calls.
3264: - cannot push before pop.
3265: - cannot call this if pcbddc->local_mat is NULL
3266: */
3267: if (!pcbddc->benign_n) PetscFunctionReturn(PETSC_SUCCESS);
3268: if (pop) {
3269: if (pcbddc->benign_change_explicit) {
3270: IS is_p0;
3271: MatReuse reuse;
3273: /* extract B_0 */
3274: reuse = MAT_INITIAL_MATRIX;
3275: if (pcbddc->benign_B0) reuse = MAT_REUSE_MATRIX;
3276: PetscCall(ISCreateGeneral(PETSC_COMM_SELF, pcbddc->benign_n, pcbddc->benign_p0_lidx, PETSC_COPY_VALUES, &is_p0));
3277: PetscCall(MatCreateSubMatrix(pcbddc->local_mat, is_p0, NULL, reuse, &pcbddc->benign_B0));
3278: /* remove rows and cols from local problem */
3279: PetscCall(MatSetOption(pcbddc->local_mat, MAT_KEEP_NONZERO_PATTERN, PETSC_TRUE));
3280: PetscCall(MatSetOption(pcbddc->local_mat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE));
3281: PetscCall(MatZeroRowsColumnsIS(pcbddc->local_mat, is_p0, 1.0, NULL, NULL));
3282: PetscCall(ISDestroy(&is_p0));
3283: } else {
3284: Mat_IS *matis = (Mat_IS *)pc->pmat->data;
3285: PetscScalar *vals;
3286: PetscInt i, n, *idxs_ins;
3288: PetscCall(VecGetLocalSize(matis->y, &n));
3289: PetscCall(PetscMalloc2(n, &idxs_ins, n, &vals));
3290: if (!pcbddc->benign_B0) {
3291: PetscInt *nnz;
3292: PetscCall(MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat), &pcbddc->benign_B0));
3293: PetscCall(MatSetType(pcbddc->benign_B0, MATAIJ));
3294: PetscCall(MatSetSizes(pcbddc->benign_B0, pcbddc->benign_n, n, PETSC_DECIDE, PETSC_DECIDE));
3295: PetscCall(PetscMalloc1(pcbddc->benign_n, &nnz));
3296: for (i = 0; i < pcbddc->benign_n; i++) {
3297: PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[i], &nnz[i]));
3298: nnz[i] = n - nnz[i];
3299: }
3300: PetscCall(MatSeqAIJSetPreallocation(pcbddc->benign_B0, 0, nnz));
3301: PetscCall(MatSetOption(pcbddc->benign_B0, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
3302: PetscCall(PetscFree(nnz));
3303: }
3305: for (i = 0; i < pcbddc->benign_n; i++) {
3306: PetscScalar *array;
3307: PetscInt *idxs, j, nz, cum;
3309: PetscCall(VecSet(matis->x, 0.));
3310: PetscCall(ISGetLocalSize(pcbddc->benign_zerodiag_subs[i], &nz));
3311: PetscCall(ISGetIndices(pcbddc->benign_zerodiag_subs[i], (const PetscInt **)&idxs));
3312: for (j = 0; j < nz; j++) vals[j] = 1.;
3313: PetscCall(VecSetValues(matis->x, nz, idxs, vals, INSERT_VALUES));
3314: PetscCall(VecAssemblyBegin(matis->x));
3315: PetscCall(VecAssemblyEnd(matis->x));
3316: PetscCall(VecSet(matis->y, 0.));
3317: PetscCall(MatMult(matis->A, matis->x, matis->y));
3318: PetscCall(VecGetArray(matis->y, &array));
3319: cum = 0;
3320: for (j = 0; j < n; j++) {
3321: if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) {
3322: vals[cum] = array[j];
3323: idxs_ins[cum] = j;
3324: cum++;
3325: }
3326: }
3327: PetscCall(MatSetValues(pcbddc->benign_B0, 1, &i, cum, idxs_ins, vals, INSERT_VALUES));
3328: PetscCall(VecRestoreArray(matis->y, &array));
3329: PetscCall(ISRestoreIndices(pcbddc->benign_zerodiag_subs[i], (const PetscInt **)&idxs));
3330: }
3331: PetscCall(MatAssemblyBegin(pcbddc->benign_B0, MAT_FINAL_ASSEMBLY));
3332: PetscCall(MatAssemblyEnd(pcbddc->benign_B0, MAT_FINAL_ASSEMBLY));
3333: PetscCall(PetscFree2(idxs_ins, vals));
3334: }
3335: } else { /* push */
3337: PetscCheck(pcbddc->benign_change_explicit, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot push B0!");
3338: for (PetscInt i = 0; i < pcbddc->benign_n; i++) {
3339: PetscScalar *B0_vals;
3340: PetscInt *B0_cols, B0_ncol;
3342: PetscCall(MatGetRow(pcbddc->benign_B0, i, &B0_ncol, (const PetscInt **)&B0_cols, (const PetscScalar **)&B0_vals));
3343: PetscCall(MatSetValues(pcbddc->local_mat, 1, pcbddc->benign_p0_lidx + i, B0_ncol, B0_cols, B0_vals, INSERT_VALUES));
3344: PetscCall(MatSetValues(pcbddc->local_mat, B0_ncol, B0_cols, 1, pcbddc->benign_p0_lidx + i, B0_vals, INSERT_VALUES));
3345: PetscCall(MatSetValue(pcbddc->local_mat, pcbddc->benign_p0_lidx[i], pcbddc->benign_p0_lidx[i], 0.0, INSERT_VALUES));
3346: PetscCall(MatRestoreRow(pcbddc->benign_B0, i, &B0_ncol, (const PetscInt **)&B0_cols, (const PetscScalar **)&B0_vals));
3347: }
3348: PetscCall(MatAssemblyBegin(pcbddc->local_mat, MAT_FINAL_ASSEMBLY));
3349: PetscCall(MatAssemblyEnd(pcbddc->local_mat, MAT_FINAL_ASSEMBLY));
3350: }
3351: PetscFunctionReturn(PETSC_SUCCESS);
3352: }
3354: PetscErrorCode PCBDDCAdaptiveSelection(PC pc)
3355: {
3356: PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
3357: PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
3358: PetscBLASInt B_neigs, B_ierr, B_lwork;
3359: PetscBLASInt *B_iwork, *B_ifail;
3360: PetscScalar *work, lwork;
3361: PetscScalar *St, *S, *eigv;
3362: PetscScalar *Sarray, *Starray;
3363: PetscReal *eigs, thresh, lthresh, uthresh;
3364: PetscInt i, nmax, nmin, nv, cum, mss, cum2, cumarray, maxneigs;
3365: PetscBool allocated_S_St, upart;
3366: #if defined(PETSC_USE_COMPLEX)
3367: PetscReal *rwork;
3368: #endif
3370: PetscFunctionBegin;
3371: if (!pcbddc->adaptive_selection) PetscFunctionReturn(PETSC_SUCCESS);
3372: PetscCheck(sub_schurs, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Adaptive selection of constraints requires SubSchurs data");
3373: PetscCheck(sub_schurs->schur_explicit || !sub_schurs->n_subs, PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO");
3374: PetscCheck(!sub_schurs->n_subs || sub_schurs->is_symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Adaptive selection not yet implemented for this matrix pencil (herm %d, symm %d, posdef %d)", sub_schurs->is_hermitian, sub_schurs->is_symmetric,
3375: sub_schurs->is_posdef);
3376: PetscCall(PetscLogEventBegin(PC_BDDC_AdaptiveSetUp[pcbddc->current_level], pc, 0, 0, 0));
3378: if (pcbddc->dbg_flag) {
3379: if (!pcbddc->dbg_viewer) pcbddc->dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pc));
3380: PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
3381: PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n"));
3382: PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Check adaptive selection of constraints\n"));
3383: PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer));
3384: }
3386: if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d cc %" PetscInt_FMT " (%d,%d).\n", PetscGlobalRank, sub_schurs->n_subs, sub_schurs->is_hermitian, sub_schurs->is_posdef));
3388: /* max size of subsets */
3389: mss = 0;
3390: for (i = 0; i < sub_schurs->n_subs; i++) {
3391: PetscInt subset_size;
3393: PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
3394: mss = PetscMax(mss, subset_size);
3395: }
3397: /* min/max and threshold */
3398: nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss;
3399: nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0;
3400: nmax = PetscMax(nmin, nmax);
3401: allocated_S_St = PETSC_FALSE;
3402: if (nmin || !sub_schurs->is_posdef) { /* XXX */
3403: allocated_S_St = PETSC_TRUE;
3404: }
3406: /* allocate lapack workspace */
3407: cum = cum2 = 0;
3408: maxneigs = 0;
3409: for (i = 0; i < sub_schurs->n_subs; i++) {
3410: PetscInt n, subset_size;
3412: PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
3413: n = PetscMin(subset_size, nmax);
3414: cum += subset_size;
3415: cum2 += subset_size * n;
3416: maxneigs = PetscMax(maxneigs, n);
3417: }
3418: lwork = 0;
3419: if (mss) {
3420: PetscScalar sdummy = 0.;
3421: PetscBLASInt B_itype = 1;
3422: PetscBLASInt B_N, idummy = 0;
3423: PetscReal rdummy = 0., zero = 0.0;
3424: PetscReal eps = 0.0; /* dlamch? */
3426: PetscCheck(sub_schurs->is_symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented");
3427: PetscCall(PetscBLASIntCast(mss, &B_N));
3428: B_lwork = -1;
3429: /* some implementations may complain about NULL pointers, even if we are querying */
3430: S = &sdummy;
3431: St = &sdummy;
3432: eigs = &rdummy;
3433: eigv = &sdummy;
3434: B_iwork = &idummy;
3435: B_ifail = &idummy;
3436: #if defined(PETSC_USE_COMPLEX)
3437: rwork = &rdummy;
3438: #endif
3439: thresh = 1.0;
3440: PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
3441: #if defined(PETSC_USE_COMPLEX)
3442: PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &zero, &thresh, B_iwork, B_iwork, &eps, &B_neigs, eigs, eigv, &B_N, &lwork, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3443: #else
3444: PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &zero, &thresh, B_iwork, B_iwork, &eps, &B_neigs, eigs, eigv, &B_N, &lwork, &B_lwork, B_iwork, B_ifail, &B_ierr));
3445: #endif
3446: PetscCheck(B_ierr == 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to SYGVX Lapack routine %" PetscBLASInt_FMT, B_ierr);
3447: PetscCall(PetscFPTrapPop());
3448: }
3450: nv = 0;
3451: if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */
3452: PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &nv));
3453: }
3454: PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lwork), &B_lwork));
3455: if (allocated_S_St) PetscCall(PetscMalloc2(mss * mss, &S, mss * mss, &St));
3456: PetscCall(PetscMalloc5(mss * mss, &eigv, mss, &eigs, B_lwork, &work, 5 * mss, &B_iwork, mss, &B_ifail));
3457: #if defined(PETSC_USE_COMPLEX)
3458: PetscCall(PetscMalloc1(7 * mss, &rwork));
3459: #endif
3460: PetscCall(PetscMalloc5(nv + sub_schurs->n_subs, &pcbddc->adaptive_constraints_n, nv + sub_schurs->n_subs + 1, &pcbddc->adaptive_constraints_idxs_ptr, nv + sub_schurs->n_subs + 1, &pcbddc->adaptive_constraints_data_ptr, nv + cum, &pcbddc->adaptive_constraints_idxs, nv + cum2,
3461: &pcbddc->adaptive_constraints_data));
3462: PetscCall(PetscArrayzero(pcbddc->adaptive_constraints_n, nv + sub_schurs->n_subs));
3464: maxneigs = 0;
3465: cum = cumarray = 0;
3466: pcbddc->adaptive_constraints_idxs_ptr[0] = 0;
3467: pcbddc->adaptive_constraints_data_ptr[0] = 0;
3468: if (sub_schurs->is_vertices && pcbddc->use_vertices) {
3469: const PetscInt *idxs;
3471: PetscCall(ISGetIndices(sub_schurs->is_vertices, &idxs));
3472: for (cum = 0; cum < nv; cum++) {
3473: pcbddc->adaptive_constraints_n[cum] = 1;
3474: pcbddc->adaptive_constraints_idxs[cum] = idxs[cum];
3475: pcbddc->adaptive_constraints_data[cum] = 1.0;
3476: pcbddc->adaptive_constraints_idxs_ptr[cum + 1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + 1;
3477: pcbddc->adaptive_constraints_data_ptr[cum + 1] = pcbddc->adaptive_constraints_data_ptr[cum] + 1;
3478: }
3479: PetscCall(ISRestoreIndices(sub_schurs->is_vertices, &idxs));
3480: }
3482: if (mss) { /* multilevel */
3483: if (sub_schurs->gdsw) {
3484: PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all, &Sarray));
3485: PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &Starray));
3486: } else {
3487: PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all, &Sarray));
3488: PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &Starray));
3489: }
3490: }
3492: lthresh = pcbddc->adaptive_threshold[0];
3493: uthresh = pcbddc->adaptive_threshold[1];
3494: upart = pcbddc->use_deluxe_scaling;
3495: for (i = 0; i < sub_schurs->n_subs; i++) {
3496: const PetscInt *idxs;
3497: PetscReal upper, lower;
3498: PetscInt j, subset_size, eigs_start = 0;
3499: PetscBLASInt B_N;
3500: PetscBool same_data = PETSC_FALSE;
3501: PetscBool scal = PETSC_FALSE;
3503: if (upart) {
3504: upper = PETSC_MAX_REAL;
3505: lower = uthresh;
3506: } else {
3507: if (sub_schurs->gdsw) {
3508: upper = uthresh;
3509: lower = PETSC_MIN_REAL;
3510: } else {
3511: PetscCheck(sub_schurs->is_posdef, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented without deluxe scaling");
3512: upper = 1. / uthresh;
3513: lower = 0.;
3514: }
3515: }
3516: PetscCall(ISGetLocalSize(sub_schurs->is_subs[i], &subset_size));
3517: PetscCall(ISGetIndices(sub_schurs->is_subs[i], &idxs));
3518: PetscCall(PetscBLASIntCast(subset_size, &B_N));
3519: /* this is experimental: we assume the dofs have been properly grouped to have
3520: the diagonal blocks Schur complements either positive or negative definite (true for Stokes) */
3521: if (!sub_schurs->is_posdef) {
3522: Mat T;
3524: for (j = 0; j < subset_size; j++) {
3525: if (PetscRealPart(*(Sarray + cumarray + j * (subset_size + 1))) < 0.0) {
3526: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, Sarray + cumarray, &T));
3527: PetscCall(MatScale(T, -1.0));
3528: PetscCall(MatDestroy(&T));
3529: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, subset_size, Starray + cumarray, &T));
3530: PetscCall(MatScale(T, -1.0));
3531: PetscCall(MatDestroy(&T));
3532: if (sub_schurs->change_primal_sub) {
3533: PetscInt nz, k;
3534: const PetscInt *idxs;
3536: PetscCall(ISGetLocalSize(sub_schurs->change_primal_sub[i], &nz));
3537: PetscCall(ISGetIndices(sub_schurs->change_primal_sub[i], &idxs));
3538: for (k = 0; k < nz; k++) {
3539: *(Sarray + cumarray + idxs[k] * (subset_size + 1)) *= -1.0;
3540: *(Starray + cumarray + idxs[k] * (subset_size + 1)) = 0.0;
3541: }
3542: PetscCall(ISRestoreIndices(sub_schurs->change_primal_sub[i], &idxs));
3543: }
3544: scal = PETSC_TRUE;
3545: break;
3546: }
3547: }
3548: }
3550: if (allocated_S_St) { /* S and S_t should be copied since we could need them later */
3551: if (sub_schurs->is_symmetric) {
3552: PetscInt j, k;
3553: if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscArraycmp() later */
3554: PetscCall(PetscArrayzero(S, subset_size * subset_size));
3555: PetscCall(PetscArrayzero(St, subset_size * subset_size));
3556: }
3557: for (j = 0; j < subset_size; j++) {
3558: for (k = j; k < subset_size; k++) {
3559: S[j * subset_size + k] = Sarray[cumarray + j * subset_size + k];
3560: St[j * subset_size + k] = Starray[cumarray + j * subset_size + k];
3561: }
3562: }
3563: } else {
3564: PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size));
3565: PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size));
3566: }
3567: } else {
3568: S = Sarray + cumarray;
3569: St = Starray + cumarray;
3570: }
3571: /* see if we can save some work */
3572: if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) PetscCall(PetscArraycmp(S, St, subset_size * subset_size, &same_data));
3574: if (same_data && !sub_schurs->change) { /* there's no need of constraints here */
3575: B_neigs = 0;
3576: } else {
3577: PetscBLASInt B_itype = 1, B_IL = 1, B_IU = 0;
3578: PetscReal eps = -1.0; /* dlamch? */
3579: PetscInt nmin_s;
3580: PetscBool compute_range;
3582: PetscCheck(sub_schurs->is_symmetric, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented");
3583: B_neigs = 0;
3584: compute_range = (PetscBool)!same_data;
3585: if (nmin >= subset_size) compute_range = PETSC_FALSE;
3587: if (pcbddc->dbg_flag) {
3588: PetscInt nc = 0, c = pcbddc->mat_graph->nodes[idxs[0]].count, w = pcbddc->mat_graph->nodes[idxs[0]].which_dof;
3590: if (sub_schurs->change_primal_sub) PetscCall(ISGetLocalSize(sub_schurs->change_primal_sub[i], &nc));
3591: PetscCall(
3592: PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Computing for sub %" PetscInt_FMT "/%" PetscInt_FMT " size %" PetscInt_FMT " count %" PetscInt_FMT " fid %" PetscInt_FMT " (range %d) (change %" PetscInt_FMT ").\n", i, sub_schurs->n_subs, subset_size, c, w, compute_range, nc));
3593: }
3595: PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
3596: if (compute_range) {
3597: /* ask for eigenvalues larger than thresh */
3598: if (sub_schurs->is_posdef) {
3599: #if defined(PETSC_USE_COMPLEX)
3600: PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3601: #else
3602: PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3603: #endif
3604: PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3605: } else { /* no theory so far, but it works nicely */
3606: PetscInt recipe = 0, recipe_m = 1;
3607: PetscReal bb[2];
3609: PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)pc)->prefix, "-pc_bddc_adaptive_recipe", &recipe, NULL));
3610: switch (recipe) {
3611: case 0:
3612: if (scal) {
3613: bb[0] = PETSC_MIN_REAL;
3614: bb[1] = lthresh;
3615: } else {
3616: bb[0] = uthresh;
3617: bb[1] = PETSC_MAX_REAL;
3618: }
3619: #if defined(PETSC_USE_COMPLEX)
3620: PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3621: #else
3622: PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3623: #endif
3624: PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3625: break;
3626: case 1:
3627: bb[0] = PETSC_MIN_REAL;
3628: bb[1] = lthresh * lthresh;
3629: #if defined(PETSC_USE_COMPLEX)
3630: PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3631: #else
3632: PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3633: #endif
3634: PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3635: if (!scal) {
3636: PetscBLASInt B_neigs2 = 0;
3638: bb[0] = PetscMax(lthresh * lthresh, uthresh);
3639: bb[1] = PETSC_MAX_REAL;
3640: PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size));
3641: PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size));
3642: #if defined(PETSC_USE_COMPLEX)
3643: PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3644: #else
3645: PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3646: #endif
3647: PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3648: B_neigs += B_neigs2;
3649: }
3650: break;
3651: case 2:
3652: if (scal) {
3653: bb[0] = PETSC_MIN_REAL;
3654: bb[1] = 0;
3655: #if defined(PETSC_USE_COMPLEX)
3656: PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3657: #else
3658: PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3659: #endif
3660: PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3661: } else {
3662: PetscBLASInt B_neigs2 = 0;
3663: PetscBool do_copy = PETSC_FALSE;
3665: lthresh = PetscMax(lthresh, 0.0);
3666: if (lthresh > 0.0) {
3667: bb[0] = PETSC_MIN_REAL;
3668: bb[1] = lthresh * lthresh;
3670: do_copy = PETSC_TRUE;
3671: #if defined(PETSC_USE_COMPLEX)
3672: PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3673: #else
3674: PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3675: #endif
3676: PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3677: }
3678: bb[0] = PetscMax(lthresh * lthresh, uthresh);
3679: bb[1] = PETSC_MAX_REAL;
3680: if (do_copy) {
3681: PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size));
3682: PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size));
3683: }
3684: #if defined(PETSC_USE_COMPLEX)
3685: PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3686: #else
3687: PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3688: #endif
3689: PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3690: B_neigs += B_neigs2;
3691: }
3692: break;
3693: case 3:
3694: if (scal) {
3695: PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)pc)->prefix, "-pc_bddc_adaptive_recipe3_min_scal", &recipe_m, NULL));
3696: } else {
3697: PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)pc)->prefix, "-pc_bddc_adaptive_recipe3_min", &recipe_m, NULL));
3698: }
3699: if (!scal) {
3700: bb[0] = uthresh;
3701: bb[1] = PETSC_MAX_REAL;
3702: #if defined(PETSC_USE_COMPLEX)
3703: PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3704: #else
3705: PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3706: #endif
3707: PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3708: }
3709: if (recipe_m > 0 && B_N - B_neigs > 0) {
3710: PetscBLASInt B_neigs2 = 0;
3712: PetscCall(PetscBLASIntCast(PetscMin(recipe_m, B_N - B_neigs), &B_IU));
3713: PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size));
3714: PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size));
3715: #if defined(PETSC_USE_COMPLEX)
3716: PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3717: #else
3718: PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3719: #endif
3720: PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3721: B_neigs += B_neigs2;
3722: }
3723: break;
3724: case 4:
3725: bb[0] = PETSC_MIN_REAL;
3726: bb[1] = lthresh;
3727: #if defined(PETSC_USE_COMPLEX)
3728: PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3729: #else
3730: PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3731: #endif
3732: PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3733: {
3734: PetscBLASInt B_neigs2 = 0;
3736: bb[0] = PetscMax(lthresh + PETSC_SMALL, uthresh);
3737: bb[1] = PETSC_MAX_REAL;
3738: PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size));
3739: PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size));
3740: #if defined(PETSC_USE_COMPLEX)
3741: PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3742: #else
3743: PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "V", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * B_N, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3744: #endif
3745: PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3746: B_neigs += B_neigs2;
3747: }
3748: break;
3749: case 5: /* same as before: first compute all eigenvalues, then filter */
3750: #if defined(PETSC_USE_COMPLEX)
3751: PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "A", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3752: #else
3753: PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "A", "L", &B_N, St, &B_N, S, &B_N, &bb[0], &bb[1], &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3754: #endif
3755: PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3756: {
3757: PetscInt e, k, ne;
3758: for (e = 0, ne = 0; e < B_neigs; e++) {
3759: if (eigs[e] < lthresh || eigs[e] > uthresh) {
3760: for (k = 0; k < B_N; k++) S[ne * B_N + k] = eigv[e * B_N + k];
3761: eigs[ne] = eigs[e];
3762: ne++;
3763: }
3764: }
3765: PetscCall(PetscArraycpy(eigv, S, B_N * ne));
3766: PetscCall(PetscBLASIntCast(ne, &B_neigs));
3767: }
3768: break;
3769: default:
3770: SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Unknown recipe %" PetscInt_FMT, recipe);
3771: }
3772: }
3773: } else if (!same_data) { /* this is just to see all the eigenvalues */
3774: PetscCall(PetscBLASIntCast(PetscMax(1, PetscMin(B_N, nmax)), &B_IU));
3775: #if defined(PETSC_USE_COMPLEX)
3776: PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3777: #else
3778: PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs, eigs, eigv, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3779: #endif
3780: PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3781: } else { /* same_data is true, so just get the adaptive functional requested by the user */
3782: PetscInt k;
3783: PetscCheck(sub_schurs->change_primal_sub, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen");
3784: PetscCall(ISGetLocalSize(sub_schurs->change_primal_sub[i], &nmax));
3785: PetscCall(PetscBLASIntCast(nmax, &B_neigs));
3786: nmin = nmax;
3787: PetscCall(PetscArrayzero(eigv, subset_size * nmax));
3788: for (k = 0; k < nmax; k++) {
3789: eigs[k] = 1. / PETSC_SMALL;
3790: eigv[k * (subset_size + 1)] = 1.0;
3791: }
3792: }
3793: PetscCall(PetscFPTrapPop());
3794: if (B_ierr) {
3795: PetscCheck(B_ierr >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: illegal value for argument %" PetscBLASInt_FMT, -B_ierr);
3796: PetscCheck(B_ierr > B_N, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: %" PetscBLASInt_FMT " eigenvalues failed to converge", B_ierr);
3797: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: leading minor of order %" PetscBLASInt_FMT " is not positive definite", B_ierr - B_N - 1);
3798: }
3800: if (B_neigs > nmax) {
3801: if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " found %" PetscBLASInt_FMT " eigs, more than maximum required %" PetscInt_FMT ".\n", B_neigs, nmax));
3802: if (upart) eigs_start = scal ? 0 : B_neigs - nmax;
3803: PetscCall(PetscBLASIntCast(nmax, &B_neigs));
3804: }
3806: nmin_s = PetscMin(nmin, B_N);
3807: if (B_neigs < nmin_s) {
3808: PetscBLASInt B_neigs2 = 0;
3810: if (upart) {
3811: if (scal) {
3812: PetscCall(PetscBLASIntCast(nmin_s, &B_IU));
3813: B_IL = B_neigs + 1;
3814: } else {
3815: PetscCall(PetscBLASIntCast(B_N - nmin_s + 1, &B_IL));
3816: B_IU = B_N - B_neigs;
3817: }
3818: } else {
3819: B_IL = B_neigs + 1;
3820: PetscCall(PetscBLASIntCast(nmin_s, &B_IU));
3821: }
3822: if (pcbddc->dbg_flag) {
3823: PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " found %" PetscBLASInt_FMT " eigs, less than minimum required %" PetscInt_FMT ". Asking for %" PetscBLASInt_FMT " to %" PetscBLASInt_FMT " incl (fortran like)\n", B_neigs, nmin, B_IL, B_IU));
3824: }
3825: if (sub_schurs->is_symmetric) {
3826: PetscInt j, k;
3827: for (j = 0; j < subset_size; j++) {
3828: for (k = j; k < subset_size; k++) {
3829: S[j * subset_size + k] = Sarray[cumarray + j * subset_size + k];
3830: St[j * subset_size + k] = Starray[cumarray + j * subset_size + k];
3831: }
3832: }
3833: } else {
3834: PetscCall(PetscArraycpy(S, Sarray + cumarray, subset_size * subset_size));
3835: PetscCall(PetscArraycpy(St, Starray + cumarray, subset_size * subset_size));
3836: }
3837: PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
3838: #if defined(PETSC_USE_COMPLEX)
3839: PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * subset_size, &B_N, work, &B_lwork, rwork, B_iwork, B_ifail, &B_ierr));
3840: #else
3841: PetscCallBLAS("LAPACKsygvx", LAPACKsygvx_(&B_itype, "V", "I", "L", &B_N, St, &B_N, S, &B_N, &lower, &upper, &B_IL, &B_IU, &eps, &B_neigs2, eigs + B_neigs, eigv + B_neigs * subset_size, &B_N, work, &B_lwork, B_iwork, B_ifail, &B_ierr));
3842: #endif
3843: PetscCall(PetscLogFlops((4.0 * subset_size * subset_size * subset_size) / 3.0));
3844: PetscCall(PetscFPTrapPop());
3845: B_neigs += B_neigs2;
3846: }
3847: if (B_ierr) {
3848: PetscCheck(B_ierr >= 0, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: illegal value for argument %" PetscBLASInt_FMT, -B_ierr);
3849: PetscCheck(B_ierr > B_N, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: %" PetscBLASInt_FMT " eigenvalues failed to converge", B_ierr);
3850: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYGVX Lapack routine: leading minor of order %" PetscBLASInt_FMT " is not positive definite", B_ierr - B_N - 1);
3851: }
3852: if (pcbddc->dbg_flag) {
3853: PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " -> Got %" PetscBLASInt_FMT " eigs\n", B_neigs));
3854: for (j = 0; j < B_neigs; j++) {
3855: if (!sub_schurs->gdsw) {
3856: if (eigs[j] == 0.0) {
3857: PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " Inf\n"));
3858: } else {
3859: if (upart) {
3860: PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.6e\n", (double)eigs[j + eigs_start]));
3861: } else {
3862: PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.6e\n", (double)(1 / eigs[j + eigs_start])));
3863: }
3864: }
3865: } else {
3866: double pg = (double)eigs[j + eigs_start];
3867: if (pg < 2 * PETSC_SMALL) pg = 0.0;
3868: PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.6e\n", pg));
3869: }
3870: }
3871: }
3872: }
3873: /* change the basis back to the original one */
3874: if (sub_schurs->change) {
3875: Mat change, phi, phit;
3877: if (pcbddc->dbg_flag > 2) {
3878: PetscInt ii;
3879: for (ii = 0; ii < B_neigs; ii++) {
3880: PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " -> Eigenvector (old basis) %" PetscInt_FMT "/%" PetscBLASInt_FMT " (%" PetscBLASInt_FMT ")\n", ii, B_neigs, B_N));
3881: for (j = 0; j < B_N; j++) {
3882: #if defined(PETSC_USE_COMPLEX)
3883: PetscReal r = PetscRealPart(eigv[(ii + eigs_start) * subset_size + j]);
3884: PetscReal c = PetscImaginaryPart(eigv[(ii + eigs_start) * subset_size + j]);
3885: PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.4e + %1.4e i\n", (double)r, (double)c));
3886: #else
3887: PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.4e\n", (double)(eigv[(ii + eigs_start) * subset_size + j])));
3888: #endif
3889: }
3890: }
3891: }
3892: PetscCall(KSPGetOperators(sub_schurs->change[i], &change, NULL));
3893: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, subset_size, B_neigs, eigv + eigs_start * subset_size, &phit));
3894: PetscCall(MatMatMult(change, phit, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &phi));
3895: PetscCall(MatCopy(phi, phit, SAME_NONZERO_PATTERN));
3896: PetscCall(MatDestroy(&phit));
3897: PetscCall(MatDestroy(&phi));
3898: }
3899: maxneigs = PetscMax(B_neigs, maxneigs);
3900: pcbddc->adaptive_constraints_n[i + nv] = B_neigs;
3901: if (B_neigs) {
3902: PetscCall(PetscArraycpy(pcbddc->adaptive_constraints_data + pcbddc->adaptive_constraints_data_ptr[cum], eigv + eigs_start * subset_size, B_neigs * subset_size));
3904: if (pcbddc->dbg_flag > 1) {
3905: PetscInt ii;
3906: for (ii = 0; ii < B_neigs; ii++) {
3907: PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " -> Eigenvector %" PetscInt_FMT "/%" PetscBLASInt_FMT " (%" PetscBLASInt_FMT ")\n", ii, B_neigs, B_N));
3908: for (j = 0; j < B_N; j++) {
3909: #if defined(PETSC_USE_COMPLEX)
3910: PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii * subset_size + j + pcbddc->adaptive_constraints_data_ptr[cum]]);
3911: PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii * subset_size + j + pcbddc->adaptive_constraints_data_ptr[cum]]);
3912: PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.4e + %1.4e i\n", (double)r, (double)c));
3913: #else
3914: PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, " %1.4e\n", (double)PetscRealPart(pcbddc->adaptive_constraints_data[ii * subset_size + j + pcbddc->adaptive_constraints_data_ptr[cum]])));
3915: #endif
3916: }
3917: }
3918: }
3919: PetscCall(PetscArraycpy(pcbddc->adaptive_constraints_idxs + pcbddc->adaptive_constraints_idxs_ptr[cum], idxs, subset_size));
3920: pcbddc->adaptive_constraints_idxs_ptr[cum + 1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size;
3921: pcbddc->adaptive_constraints_data_ptr[cum + 1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size * B_neigs;
3922: cum++;
3923: }
3924: PetscCall(ISRestoreIndices(sub_schurs->is_subs[i], &idxs));
3925: /* shift for next computation */
3926: cumarray += subset_size * subset_size;
3927: }
3928: if (pcbddc->dbg_flag) PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
3930: if (mss) {
3931: if (sub_schurs->gdsw) {
3932: PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all, &Sarray));
3933: PetscCall(MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all, &Starray));
3934: } else {
3935: PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all, &Sarray));
3936: PetscCall(MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all, &Starray));
3937: /* destroy matrices (junk) */
3938: PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_inv_all));
3939: PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_tilda_all));
3940: }
3941: }
3942: if (allocated_S_St) PetscCall(PetscFree2(S, St));
3943: PetscCall(PetscFree5(eigv, eigs, work, B_iwork, B_ifail));
3944: #if defined(PETSC_USE_COMPLEX)
3945: PetscCall(PetscFree(rwork));
3946: #endif
3947: if (pcbddc->dbg_flag) {
3948: PetscInt maxneigs_r;
3949: PetscCallMPI(MPIU_Allreduce(&maxneigs, &maxneigs_r, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)pc)));
3950: PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Maximum number of constraints per cc %" PetscInt_FMT "\n", maxneigs_r));
3951: }
3952: PetscCall(PetscLogEventEnd(PC_BDDC_AdaptiveSetUp[pcbddc->current_level], pc, 0, 0, 0));
3953: PetscFunctionReturn(PETSC_SUCCESS);
3954: }
3956: PetscErrorCode PCBDDCSetUpSolvers(PC pc)
3957: {
3958: Mat coarse_submat;
3960: PetscFunctionBegin;
3961: /* Setup local scatters R_to_B and (optionally) R_to_D */
3962: /* PCBDDCSetUpLocalWorkVectors should be called first! */
3963: PetscCall(PCBDDCSetUpLocalScatters(pc));
3965: /* Setup local neumann solver ksp_R */
3966: /* PCBDDCSetUpLocalScatters should be called first! */
3967: PetscCall(PCBDDCSetUpLocalSolvers(pc, PETSC_FALSE, PETSC_TRUE));
3969: /*
3970: Setup local correction and local part of coarse basis.
3971: Gives back the dense local part of the coarse matrix in column major ordering
3972: */
3973: PetscCall(PCBDDCSetUpCorrection(pc, &coarse_submat));
3975: /* Compute total number of coarse nodes and setup coarse solver */
3976: PetscCall(PCBDDCSetUpCoarseSolver(pc, coarse_submat));
3977: PetscCall(MatDestroy(&coarse_submat));
3978: PetscFunctionReturn(PETSC_SUCCESS);
3979: }
3981: PetscErrorCode PCBDDCResetCustomization(PC pc)
3982: {
3983: PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
3985: PetscFunctionBegin;
3986: PetscCall(ISDestroy(&pcbddc->user_primal_vertices));
3987: PetscCall(ISDestroy(&pcbddc->user_primal_vertices_local));
3988: PetscCall(ISDestroy(&pcbddc->NeumannBoundaries));
3989: PetscCall(ISDestroy(&pcbddc->NeumannBoundariesLocal));
3990: PetscCall(ISDestroy(&pcbddc->DirichletBoundaries));
3991: PetscCall(MatNullSpaceDestroy(&pcbddc->onearnullspace));
3992: PetscCall(PetscFree(pcbddc->onearnullvecs_state));
3993: PetscCall(ISDestroy(&pcbddc->DirichletBoundariesLocal));
3994: PetscCall(PCBDDCSetDofsSplitting(pc, 0, NULL));
3995: PetscCall(PCBDDCSetDofsSplittingLocal(pc, 0, NULL));
3996: PetscFunctionReturn(PETSC_SUCCESS);
3997: }
3999: PetscErrorCode PCBDDCResetTopography(PC pc)
4000: {
4001: PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
4002: PetscInt i;
4004: PetscFunctionBegin;
4005: PetscCall(MatDestroy(&pcbddc->nedcG));
4006: PetscCall(ISDestroy(&pcbddc->nedclocal));
4007: PetscCall(MatDestroy(&pcbddc->discretegradient));
4008: PetscCall(MatDestroy(&pcbddc->user_ChangeOfBasisMatrix));
4009: PetscCall(MatDestroy(&pcbddc->ChangeOfBasisMatrix));
4010: PetscCall(MatDestroy(&pcbddc->switch_static_change));
4011: PetscCall(VecDestroy(&pcbddc->work_change));
4012: PetscCall(MatDestroy(&pcbddc->ConstraintMatrix));
4013: PetscCall(MatDestroy(&pcbddc->divudotp));
4014: PetscCall(ISDestroy(&pcbddc->divudotp_vl2l));
4015: PetscCall(PCBDDCGraphDestroy(&pcbddc->mat_graph));
4016: for (i = 0; i < pcbddc->n_local_subs; i++) PetscCall(ISDestroy(&pcbddc->local_subs[i]));
4017: pcbddc->n_local_subs = 0;
4018: PetscCall(PetscFree(pcbddc->local_subs));
4019: PetscCall(PCBDDCSubSchursDestroy(&pcbddc->sub_schurs));
4020: pcbddc->graphanalyzed = PETSC_FALSE;
4021: pcbddc->recompute_topography = PETSC_TRUE;
4022: pcbddc->corner_selected = PETSC_FALSE;
4023: PetscFunctionReturn(PETSC_SUCCESS);
4024: }
4026: PetscErrorCode PCBDDCResetSolvers(PC pc)
4027: {
4028: PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
4030: PetscFunctionBegin;
4031: PetscCall(VecDestroy(&pcbddc->coarse_vec));
4032: PetscCall(MatDestroy(&pcbddc->coarse_phi_B));
4033: PetscCall(MatDestroy(&pcbddc->coarse_phi_D));
4034: PetscCall(MatDestroy(&pcbddc->coarse_psi_B));
4035: PetscCall(MatDestroy(&pcbddc->coarse_psi_D));
4036: PetscCall(VecDestroy(&pcbddc->vec1_P));
4037: PetscCall(VecDestroy(&pcbddc->vec1_C));
4038: PetscCall(MatDestroy(&pcbddc->local_auxmat2));
4039: PetscCall(MatDestroy(&pcbddc->local_auxmat1));
4040: PetscCall(VecDestroy(&pcbddc->vec1_R));
4041: PetscCall(VecDestroy(&pcbddc->vec2_R));
4042: PetscCall(ISDestroy(&pcbddc->is_R_local));
4043: PetscCall(VecScatterDestroy(&pcbddc->R_to_B));
4044: PetscCall(VecScatterDestroy(&pcbddc->R_to_D));
4045: PetscCall(VecScatterDestroy(&pcbddc->coarse_loc_to_glob));
4046: PetscCall(KSPReset(pcbddc->ksp_D));
4047: PetscCall(KSPReset(pcbddc->ksp_R));
4048: PetscCall(KSPReset(pcbddc->coarse_ksp));
4049: PetscCall(MatDestroy(&pcbddc->local_mat));
4050: PetscCall(PetscFree(pcbddc->primal_indices_local_idxs));
4051: PetscCall(PetscFree2(pcbddc->local_primal_ref_node, pcbddc->local_primal_ref_mult));
4052: PetscCall(PetscFree(pcbddc->global_primal_indices));
4053: PetscCall(ISDestroy(&pcbddc->coarse_subassembling));
4054: PetscCall(MatDestroy(&pcbddc->benign_change));
4055: PetscCall(VecDestroy(&pcbddc->benign_vec));
4056: PetscCall(PCBDDCBenignShellMat(pc, PETSC_TRUE));
4057: PetscCall(MatDestroy(&pcbddc->benign_B0));
4058: PetscCall(PetscSFDestroy(&pcbddc->benign_sf));
4059: if (pcbddc->benign_zerodiag_subs) {
4060: PetscInt i;
4061: for (i = 0; i < pcbddc->benign_n; i++) PetscCall(ISDestroy(&pcbddc->benign_zerodiag_subs[i]));
4062: PetscCall(PetscFree(pcbddc->benign_zerodiag_subs));
4063: }
4064: PetscCall(PetscFree3(pcbddc->benign_p0_lidx, pcbddc->benign_p0_gidx, pcbddc->benign_p0));
4065: PetscFunctionReturn(PETSC_SUCCESS);
4066: }
4068: PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc)
4069: {
4070: PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
4071: PC_IS *pcis = (PC_IS *)pc->data;
4072: VecType impVecType;
4073: PetscInt n_constraints, n_R, old_size;
4075: PetscFunctionBegin;
4076: n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices;
4077: n_R = pcis->n - pcbddc->n_vertices;
4078: PetscCall(VecGetType(pcis->vec1_N, &impVecType));
4079: /* local work vectors (try to avoid unneeded work)*/
4080: /* R nodes */
4081: old_size = -1;
4082: if (pcbddc->vec1_R) PetscCall(VecGetSize(pcbddc->vec1_R, &old_size));
4083: if (n_R != old_size) {
4084: PetscCall(VecDestroy(&pcbddc->vec1_R));
4085: PetscCall(VecDestroy(&pcbddc->vec2_R));
4086: PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &pcbddc->vec1_R));
4087: PetscCall(VecSetSizes(pcbddc->vec1_R, PETSC_DECIDE, n_R));
4088: PetscCall(VecSetType(pcbddc->vec1_R, impVecType));
4089: PetscCall(VecDuplicate(pcbddc->vec1_R, &pcbddc->vec2_R));
4090: }
4091: /* local primal dofs */
4092: old_size = -1;
4093: if (pcbddc->vec1_P) PetscCall(VecGetSize(pcbddc->vec1_P, &old_size));
4094: if (pcbddc->local_primal_size != old_size) {
4095: PetscCall(VecDestroy(&pcbddc->vec1_P));
4096: PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &pcbddc->vec1_P));
4097: PetscCall(VecSetSizes(pcbddc->vec1_P, PETSC_DECIDE, pcbddc->local_primal_size));
4098: PetscCall(VecSetType(pcbddc->vec1_P, impVecType));
4099: }
4100: /* local explicit constraints */
4101: old_size = -1;
4102: if (pcbddc->vec1_C) PetscCall(VecGetSize(pcbddc->vec1_C, &old_size));
4103: if (n_constraints && n_constraints != old_size) {
4104: PetscCall(VecDestroy(&pcbddc->vec1_C));
4105: PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &pcbddc->vec1_C));
4106: PetscCall(VecSetSizes(pcbddc->vec1_C, PETSC_DECIDE, n_constraints));
4107: PetscCall(VecSetType(pcbddc->vec1_C, impVecType));
4108: }
4109: PetscFunctionReturn(PETSC_SUCCESS);
4110: }
4112: static PetscErrorCode MatSetValuesSubMat(Mat A, Mat S, PetscInt nr, const PetscInt rows[], PetscInt nc, const PetscInt cols[], InsertMode imode)
4113: {
4114: PetscBool flg;
4115: const PetscScalar *a;
4117: PetscFunctionBegin;
4118: PetscCall(PetscObjectBaseTypeCompare((PetscObject)S, MATSEQDENSE, &flg));
4119: if (flg) {
4120: PetscCall(MatDenseGetArrayRead(S, &a));
4121: PetscCall(MatSetOption(A, MAT_ROW_ORIENTED, PETSC_FALSE));
4122: PetscCall(MatSetValues(A, nr, rows, nc, cols, a, imode));
4123: PetscCall(MatSetOption(A, MAT_ROW_ORIENTED, PETSC_TRUE));
4124: PetscCall(MatDenseRestoreArrayRead(S, &a));
4125: } else {
4126: const PetscInt *ii, *jj;
4127: PetscInt n;
4128: PetscInt buf[8192], *bufc = NULL;
4129: PetscBool freeb = PETSC_FALSE;
4130: Mat Sm = S;
4132: PetscCall(PetscObjectBaseTypeCompare((PetscObject)S, MATSEQAIJ, &flg));
4133: if (!flg) PetscCall(MatConvert(S, MATSEQAIJ, MAT_INITIAL_MATRIX, &Sm));
4134: else PetscCall(PetscObjectReference((PetscObject)S));
4135: PetscCall(MatSeqAIJGetArrayRead(Sm, &a));
4136: PetscCall(MatGetRowIJ(Sm, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &jj, &flg));
4137: PetscCheck(flg, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Cannot get IJ structure");
4138: if (nc <= (PetscInt)PETSC_STATIC_ARRAY_LENGTH(buf)) {
4139: bufc = buf;
4140: } else {
4141: PetscCall(PetscMalloc1(nc, &bufc));
4142: freeb = PETSC_TRUE;
4143: }
4145: for (PetscInt i = 0; i < n; i++) {
4146: const PetscInt nci = ii[i + 1] - ii[i];
4148: for (PetscInt j = 0; j < nci; j++) bufc[j] = cols[jj[ii[i] + j]];
4149: PetscCall(MatSetValues(A, 1, rows + i, nci, bufc, a + ii[i], imode));
4150: }
4151: PetscCall(MatRestoreRowIJ(Sm, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &jj, &flg));
4152: PetscCall(MatSeqAIJRestoreArrayRead(Sm, &a));
4153: PetscCall(MatDestroy(&Sm));
4154: if (freeb) PetscCall(PetscFree(bufc));
4155: }
4156: PetscCall(MatAssemblyBegin(A, MAT_FLUSH_ASSEMBLY));
4157: PetscCall(MatAssemblyEnd(A, MAT_FLUSH_ASSEMBLY));
4158: PetscFunctionReturn(PETSC_SUCCESS);
4159: }
4161: static PetscErrorCode MatCreateSeqAIJFromDenseExpand(Mat D, PetscInt n, const PetscInt j[], Mat *mat)
4162: {
4163: Mat_SeqAIJ *aij;
4164: PetscInt *ii, *jj;
4165: PetscScalar *aa;
4166: PetscInt nnz = 0, m, nc;
4167: const PetscScalar *a;
4168: const PetscScalar zero = 0.0;
4170: PetscFunctionBegin;
4171: PetscCall(MatGetLocalSize(D, &m, &nc));
4172: PetscCall(MatDenseGetArrayRead(D, &a));
4173: PetscCall(PetscMalloc1(m + 1, &ii));
4174: PetscCall(PetscMalloc1(m * nc, &jj));
4175: PetscCall(PetscMalloc1(m * nc, &aa));
4176: ii[0] = 0;
4177: for (PetscInt k = 0; k < m; k++) {
4178: for (PetscInt s = 0; s < nc; s++) {
4179: const PetscInt c = s + k * nc;
4180: const PetscScalar v = a[k + s * m];
4182: if (PetscUnlikely(j[c] < 0 || v == zero)) continue;
4183: jj[nnz] = j[c];
4184: aa[nnz] = a[k + s * m];
4185: nnz++;
4186: }
4187: ii[k + 1] = nnz;
4188: }
4190: PetscCall(MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)D), m, n, ii, jj, aa, mat));
4191: PetscCall(MatDenseRestoreArrayRead(D, &a));
4193: aij = (Mat_SeqAIJ *)(*mat)->data;
4194: aij->free_a = PETSC_TRUE;
4195: aij->free_ij = PETSC_TRUE;
4196: PetscFunctionReturn(PETSC_SUCCESS);
4197: }
4199: /* adapted from MatInvertVariableBlockDiagonal_SeqAIJ */
4200: static PetscErrorCode MatSeqAIJInvertVariableBlockDiagonalMat(Mat A, PetscInt nblocks, const PetscInt *bsizes, Mat *B)
4201: {
4202: PetscInt n = A->rmap->n, ncnt = 0, ncnt2 = 0, bsizemax = 0, *v_pivots = NULL;
4203: const PetscBool allowzeropivot = PETSC_FALSE;
4204: PetscBool zeropivotdetected = PETSC_FALSE;
4205: const PetscReal shift = 0.0;
4206: PetscInt ipvt[5], *ii, *jj, *indi, *indj;
4207: PetscScalar work[25], *v_work = NULL, *aa, *diag;
4208: PetscLogDouble flops = 0.0;
4210: PetscFunctionBegin;
4211: PetscCheck(A->rmap->n == A->cmap->n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Not for rectangular matrices");
4212: for (PetscInt i = 0; i < nblocks; i++) {
4213: ncnt += bsizes[i];
4214: ncnt2 += PetscSqr(bsizes[i]);
4215: }
4216: PetscCheck(ncnt == n, PETSC_COMM_SELF, PETSC_ERR_ARG_SIZ, "Total blocksizes %" PetscInt_FMT " doesn't match number matrix rows %" PetscInt_FMT, ncnt, n);
4217: for (PetscInt i = 0; i < nblocks; i++) bsizemax = PetscMax(bsizemax, bsizes[i]);
4218: if (bsizemax > 7) PetscCall(PetscMalloc2(bsizemax, &v_work, bsizemax, &v_pivots));
4220: PetscCall(PetscMalloc1(n + 1, &ii));
4221: PetscCall(PetscMalloc1(ncnt2, &jj));
4222: PetscCall(PetscCalloc1(ncnt2, &aa));
4224: ncnt = 0;
4225: ii[0] = 0;
4226: indi = ii;
4227: indj = jj;
4228: diag = aa;
4229: for (PetscInt i = 0; i < nblocks; i++) {
4230: const PetscInt bs = bsizes[i];
4232: for (PetscInt k = 0; k < bs; k++) {
4233: indi[k + 1] = indi[k] + bs;
4234: for (PetscInt j = 0; j < bs; j++) indj[k * bs + j] = ncnt + j;
4235: }
4236: PetscCall(MatGetValues(A, bs, indj, bs, indj, diag));
4237: switch (bs) {
4238: case 1:
4239: *diag = 1.0 / (*diag);
4240: break;
4241: case 2:
4242: PetscCall(PetscKernel_A_gets_inverse_A_2(diag, shift, allowzeropivot, &zeropivotdetected));
4243: break;
4244: case 3:
4245: PetscCall(PetscKernel_A_gets_inverse_A_3(diag, shift, allowzeropivot, &zeropivotdetected));
4246: break;
4247: case 4:
4248: PetscCall(PetscKernel_A_gets_inverse_A_4(diag, shift, allowzeropivot, &zeropivotdetected));
4249: break;
4250: case 5:
4251: PetscCall(PetscKernel_A_gets_inverse_A_5(diag, ipvt, work, shift, allowzeropivot, &zeropivotdetected));
4252: break;
4253: case 6:
4254: PetscCall(PetscKernel_A_gets_inverse_A_6(diag, shift, allowzeropivot, &zeropivotdetected));
4255: break;
4256: case 7:
4257: PetscCall(PetscKernel_A_gets_inverse_A_7(diag, shift, allowzeropivot, &zeropivotdetected));
4258: break;
4259: default:
4260: PetscCall(PetscKernel_A_gets_inverse_A(bs, diag, v_pivots, v_work, allowzeropivot, &zeropivotdetected));
4261: }
4262: ncnt += bs;
4263: flops += 2.0 * PetscPowInt(bs, 3) / 3.0;
4264: diag += bs * bs;
4265: indj += bs * bs;
4266: indi += bs;
4267: }
4268: PetscCall(PetscLogFlops(flops));
4269: PetscCall(PetscFree2(v_work, v_pivots));
4270: PetscCall(MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A), n, n, ii, jj, aa, B));
4271: {
4272: Mat_SeqAIJ *aij = (Mat_SeqAIJ *)(*B)->data;
4273: aij->free_a = PETSC_TRUE;
4274: aij->free_ij = PETSC_TRUE;
4275: }
4276: PetscFunctionReturn(PETSC_SUCCESS);
4277: }
4279: static PetscErrorCode MatDenseScatter(Mat A, PetscSF sf, Mat B)
4280: {
4281: const PetscScalar *rarr;
4282: PetscScalar *larr;
4283: PetscSF vsf;
4284: PetscInt n, rld, lld;
4286: PetscFunctionBegin;
4287: PetscCall(MatGetSize(A, NULL, &n));
4288: PetscCall(MatDenseGetLDA(A, &rld));
4289: PetscCall(MatDenseGetLDA(B, &lld));
4290: PetscCall(MatDenseGetArrayRead(A, &rarr));
4291: PetscCall(MatDenseGetArrayWrite(B, &larr));
4292: PetscCall(PetscSFCreateStridedSF(sf, n, rld, lld, &vsf));
4293: PetscCall(PetscSFBcastBegin(vsf, MPIU_SCALAR, rarr, larr, MPI_REPLACE));
4294: PetscCall(PetscSFBcastEnd(vsf, MPIU_SCALAR, rarr, larr, MPI_REPLACE));
4295: PetscCall(MatDenseRestoreArrayRead(A, &rarr));
4296: PetscCall(MatDenseRestoreArrayWrite(B, &larr));
4297: PetscCall(PetscSFDestroy(&vsf));
4298: PetscFunctionReturn(PETSC_SUCCESS);
4299: }
4301: PetscErrorCode PCBDDCSetUpCorrection(PC pc, Mat *coarse_submat)
4302: {
4303: PC_IS *pcis = (PC_IS *)pc->data;
4304: PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
4305: PCBDDCGraph graph = pcbddc->mat_graph;
4306: PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
4307: /* submatrices of local problem */
4308: Mat A_RV = NULL, A_VR, A_VV, local_auxmat2_R = NULL;
4309: /* submatrices of local coarse problem */
4310: Mat S_CV = NULL, S_VC = NULL, S_CC = NULL;
4311: /* working matrices */
4312: Mat C_CR;
4314: /* additional working stuff */
4315: PC pc_R;
4316: IS is_R, is_V, is_C;
4317: const PetscInt *idx_V, *idx_C;
4318: Mat F, Brhs = NULL;
4319: Vec dummy_vec;
4320: PetscBool isLU, isCHOL, need_benign_correction, sparserhs;
4321: PetscInt *idx_V_B;
4322: PetscInt lda_rhs, n_vertices, n_constraints, *p0_lidx_I;
4323: PetscInt n_eff_vertices, n_eff_constraints;
4324: PetscInt i, n_R, n_D, n_B;
4325: PetscScalar one = 1.0, m_one = -1.0;
4327: /* Multi-element support */
4328: PetscBool multi_element = graph->multi_element;
4329: PetscInt *V_to_eff_V = NULL, *C_to_eff_C = NULL;
4330: PetscInt *B_eff_V_J = NULL, *R_eff_V_J = NULL, *B_eff_C_J = NULL, *R_eff_C_J = NULL;
4331: IS is_C_perm = NULL;
4332: PetscInt n_C_bss = 0, *C_bss = NULL;
4333: Mat coarse_phi_multi;
4335: PetscFunctionBegin;
4336: PetscCheck(pcbddc->symmetric_primal || !pcbddc->benign_n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Non-symmetric primal basis computation with benign trick not yet implemented");
4337: PetscCall(PetscLogEventBegin(PC_BDDC_CorrectionSetUp[pcbddc->current_level], pc, 0, 0, 0));
4339: /* Set Non-overlapping dimensions */
4340: n_vertices = pcbddc->n_vertices;
4341: n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices;
4342: n_B = pcis->n_B;
4343: n_D = pcis->n - n_B;
4344: n_R = pcis->n - n_vertices;
4346: /* vertices in boundary numbering */
4347: PetscCall(PetscMalloc1(n_vertices, &idx_V_B));
4348: PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap, IS_GTOLM_DROP, n_vertices, pcbddc->local_primal_ref_node, &i, idx_V_B));
4349: PetscCheck(i == n_vertices, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in boundary numbering for BDDC vertices! %" PetscInt_FMT " != %" PetscInt_FMT, n_vertices, i);
4351: /* these two cases still need to be optimized */
4352: if (pcbddc->benign_saddle_point || !pcbddc->symmetric_primal) multi_element = PETSC_FALSE;
4354: /* Subdomain contribution (Non-overlapping) to coarse matrix */
4355: if (multi_element) {
4356: PetscCheck(!pcbddc->benign_n, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented");
4358: PetscCall(MatCreate(PETSC_COMM_SELF, coarse_submat));
4359: PetscCall(MatSetSizes(*coarse_submat, pcbddc->local_primal_size, pcbddc->local_primal_size, pcbddc->local_primal_size, pcbddc->local_primal_size));
4360: PetscCall(MatSetType(*coarse_submat, MATSEQAIJ));
4361: PetscCall(MatSetOption(*coarse_submat, MAT_IGNORE_ZERO_ENTRIES, PETSC_TRUE));
4362: PetscCall(MatSetOption(*coarse_submat, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_TRUE));
4364: /* group vertices and constraints by subdomain id */
4365: const PetscInt *vidxs = pcbddc->primal_indices_local_idxs;
4366: const PetscInt *cidxs = pcbddc->primal_indices_local_idxs + n_vertices;
4367: PetscInt *count_eff, *V_eff_to_V, *C_eff_to_C, *nnz;
4368: PetscInt n_el = PetscMax(graph->n_local_subs, 1);
4370: PetscCall(PetscCalloc1(2 * n_el, &count_eff));
4371: PetscCall(PetscMalloc1(n_vertices, &V_to_eff_V));
4372: PetscCall(PetscMalloc1(n_constraints, &C_to_eff_C));
4373: for (PetscInt i = 0; i < n_vertices; i++) {
4374: PetscInt s = 2 * graph->nodes[vidxs[i]].local_sub;
4376: V_to_eff_V[i] = count_eff[s];
4377: count_eff[s] += 1;
4378: }
4379: for (PetscInt i = 0; i < n_constraints; i++) {
4380: PetscInt s = 2 * graph->nodes[cidxs[i]].local_sub + 1;
4382: C_to_eff_C[i] = count_eff[s];
4383: count_eff[s] += 1;
4384: }
4386: /* preallocation */
4387: PetscCall(PetscMalloc1(n_vertices + n_constraints, &nnz));
4388: for (PetscInt i = 0; i < n_vertices; i++) {
4389: PetscInt s = 2 * graph->nodes[vidxs[i]].local_sub;
4391: nnz[i] = count_eff[s] + count_eff[s + 1];
4392: }
4393: for (PetscInt i = 0; i < n_constraints; i++) {
4394: PetscInt s = 2 * graph->nodes[cidxs[i]].local_sub;
4396: nnz[i + n_vertices] = count_eff[s] + count_eff[s + 1];
4397: }
4398: PetscCall(MatSeqAIJSetPreallocation(*coarse_submat, 0, nnz));
4399: PetscCall(PetscFree(nnz));
4401: n_eff_vertices = 0;
4402: n_eff_constraints = 0;
4403: for (PetscInt i = 0; i < n_el; i++) {
4404: n_eff_vertices = PetscMax(n_eff_vertices, count_eff[2 * i]);
4405: n_eff_constraints = PetscMax(n_eff_constraints, count_eff[2 * i + 1]);
4406: count_eff[2 * i] = 0;
4407: count_eff[2 * i + 1] = 0;
4408: }
4410: const PetscInt *idx;
4411: PetscCall(PetscMalloc2(n_el * n_eff_vertices, &V_eff_to_V, n_el * n_eff_constraints, &C_eff_to_C));
4413: for (PetscInt i = 0; i < n_vertices; i++) {
4414: const PetscInt e = graph->nodes[vidxs[i]].local_sub;
4415: const PetscInt s = 2 * e;
4417: V_eff_to_V[e * n_eff_vertices + count_eff[s]] = i;
4418: count_eff[s] += 1;
4419: }
4420: for (PetscInt i = 0; i < n_constraints; i++) {
4421: const PetscInt e = graph->nodes[cidxs[i]].local_sub;
4422: const PetscInt s = 2 * e + 1;
4424: C_eff_to_C[e * n_eff_constraints + count_eff[s]] = i;
4425: count_eff[s] += 1;
4426: }
4428: PetscCall(PetscMalloc1(n_R * n_eff_vertices, &R_eff_V_J));
4429: PetscCall(PetscMalloc1(n_R * n_eff_constraints, &R_eff_C_J));
4430: PetscCall(PetscMalloc1(n_B * n_eff_vertices, &B_eff_V_J));
4431: PetscCall(PetscMalloc1(n_B * n_eff_constraints, &B_eff_C_J));
4432: for (PetscInt i = 0; i < n_R * n_eff_vertices; i++) R_eff_V_J[i] = -1;
4433: for (PetscInt i = 0; i < n_R * n_eff_constraints; i++) R_eff_C_J[i] = -1;
4434: for (PetscInt i = 0; i < n_B * n_eff_vertices; i++) B_eff_V_J[i] = -1;
4435: for (PetscInt i = 0; i < n_B * n_eff_constraints; i++) B_eff_C_J[i] = -1;
4437: PetscCall(ISGetIndices(pcbddc->is_R_local, &idx));
4438: for (PetscInt i = 0; i < n_R; i++) {
4439: const PetscInt e = graph->nodes[idx[i]].local_sub;
4440: const PetscInt s = 2 * e;
4441: PetscInt j;
4443: for (j = 0; j < count_eff[s]; j++) R_eff_V_J[i * n_eff_vertices + j] = V_eff_to_V[e * n_eff_vertices + j];
4444: for (j = 0; j < count_eff[s + 1]; j++) R_eff_C_J[i * n_eff_constraints + j] = C_eff_to_C[e * n_eff_constraints + j];
4445: }
4446: PetscCall(ISRestoreIndices(pcbddc->is_R_local, &idx));
4447: PetscCall(ISGetIndices(pcis->is_B_local, &idx));
4448: for (PetscInt i = 0; i < n_B; i++) {
4449: const PetscInt e = graph->nodes[idx[i]].local_sub;
4450: const PetscInt s = 2 * e;
4451: PetscInt j;
4453: for (j = 0; j < count_eff[s]; j++) B_eff_V_J[i * n_eff_vertices + j] = V_eff_to_V[e * n_eff_vertices + j];
4454: for (j = 0; j < count_eff[s + 1]; j++) B_eff_C_J[i * n_eff_constraints + j] = C_eff_to_C[e * n_eff_constraints + j];
4455: }
4456: PetscCall(ISRestoreIndices(pcis->is_B_local, &idx));
4458: /* permutation and blocksizes for block invert of S_CC */
4459: PetscInt *idxp;
4461: PetscCall(PetscMalloc1(n_constraints, &idxp));
4462: PetscCall(PetscMalloc1(n_el, &C_bss));
4463: n_C_bss = 0;
4464: for (PetscInt e = 0, cnt = 0; e < n_el; e++) {
4465: const PetscInt nc = count_eff[2 * e + 1];
4467: if (nc) C_bss[n_C_bss++] = nc;
4468: for (PetscInt c = 0; c < nc; c++) idxp[cnt + c] = C_eff_to_C[e * n_eff_constraints + c];
4469: cnt += nc;
4470: }
4472: PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n_constraints, idxp, PETSC_OWN_POINTER, &is_C_perm));
4474: PetscCall(PetscFree2(V_eff_to_V, C_eff_to_C));
4475: PetscCall(PetscFree(count_eff));
4476: } else {
4477: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, pcbddc->local_primal_size, pcbddc->local_primal_size, NULL, coarse_submat));
4478: n_eff_constraints = n_constraints;
4479: n_eff_vertices = n_vertices;
4480: }
4482: /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */
4483: PetscCall(KSPGetPC(pcbddc->ksp_R, &pc_R));
4484: PetscCall(PCSetUp(pc_R));
4485: PetscCall(PetscObjectTypeCompare((PetscObject)pc_R, PCLU, &isLU));
4486: PetscCall(PetscObjectTypeCompare((PetscObject)pc_R, PCCHOLESKY, &isCHOL));
4487: lda_rhs = n_R;
4488: need_benign_correction = PETSC_FALSE;
4489: if (isLU || isCHOL) {
4490: PetscCall(PCFactorGetMatrix(pc_R, &F));
4491: } else if (sub_schurs && sub_schurs->reuse_solver) {
4492: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4493: MatFactorType type;
4495: F = reuse_solver->F;
4496: PetscCall(MatGetFactorType(F, &type));
4497: if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE;
4498: if (type == MAT_FACTOR_LU) isLU = PETSC_TRUE;
4499: PetscCall(MatGetSize(F, &lda_rhs, NULL));
4500: need_benign_correction = (PetscBool)(!!reuse_solver->benign_n);
4501: } else F = NULL;
4503: /* determine if we can use a sparse right-hand side */
4504: sparserhs = PETSC_FALSE;
4505: if (F && !multi_element) {
4506: MatSolverType solver;
4508: PetscCall(MatFactorGetSolverType(F, &solver));
4509: PetscCall(PetscStrcmp(solver, MATSOLVERMUMPS, &sparserhs));
4510: }
4512: /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */
4513: dummy_vec = NULL;
4514: if (need_benign_correction && lda_rhs != n_R && F) {
4515: PetscCall(VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N), &dummy_vec));
4516: PetscCall(VecSetSizes(dummy_vec, lda_rhs, PETSC_DECIDE));
4517: PetscCall(VecSetType(dummy_vec, ((PetscObject)pcis->vec1_N)->type_name));
4518: }
4520: PetscCall(MatDestroy(&pcbddc->local_auxmat1));
4521: PetscCall(MatDestroy(&pcbddc->local_auxmat2));
4523: PetscCall(ISCreateStride(PETSC_COMM_SELF, n_R, 0, 1, &is_R));
4524: PetscCall(ISCreateStride(PETSC_COMM_SELF, n_vertices, 0, 1, &is_V));
4525: PetscCall(ISCreateStride(PETSC_COMM_SELF, n_constraints, n_vertices, 1, &is_C));
4526: PetscCall(ISGetIndices(is_V, &idx_V));
4527: PetscCall(ISGetIndices(is_C, &idx_C));
4529: /* Precompute stuffs needed for preprocessing and application of BDDC*/
4530: if (n_constraints) {
4531: Mat C_B;
4533: /* Extract constraints on R nodes: C_{CR} */
4534: PetscCall(MatCreateSubMatrix(pcbddc->ConstraintMatrix, is_C, pcbddc->is_R_local, MAT_INITIAL_MATRIX, &C_CR));
4535: PetscCall(MatCreateSubMatrix(pcbddc->ConstraintMatrix, is_C, pcis->is_B_local, MAT_INITIAL_MATRIX, &C_B));
4537: /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */
4538: /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */
4539: if (!sparserhs) {
4540: PetscScalar *marr;
4542: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_eff_constraints, NULL, &Brhs));
4543: PetscCall(MatDenseGetArrayWrite(Brhs, &marr));
4544: for (i = 0; i < n_constraints; i++) {
4545: const PetscScalar *row_cmat_values;
4546: const PetscInt *row_cmat_indices;
4547: PetscInt size_of_constraint, j, col = C_to_eff_C ? C_to_eff_C[i] : i;
4549: PetscCall(MatGetRow(C_CR, i, &size_of_constraint, &row_cmat_indices, &row_cmat_values));
4550: for (j = 0; j < size_of_constraint; j++) marr[row_cmat_indices[j] + col * lda_rhs] = -row_cmat_values[j];
4551: PetscCall(MatRestoreRow(C_CR, i, &size_of_constraint, &row_cmat_indices, &row_cmat_values));
4552: }
4553: PetscCall(MatDenseRestoreArrayWrite(Brhs, &marr));
4554: } else {
4555: Mat tC_CR;
4557: PetscCall(MatScale(C_CR, -1.0));
4558: if (lda_rhs != n_R) {
4559: PetscScalar *aa;
4560: PetscInt r, *ii, *jj;
4561: PetscBool done;
4563: PetscCall(MatGetRowIJ(C_CR, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done));
4564: PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_PLIB, "GetRowIJ failed");
4565: PetscCall(MatSeqAIJGetArray(C_CR, &aa));
4566: PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, n_constraints, lda_rhs, ii, jj, aa, &tC_CR));
4567: PetscCall(MatRestoreRowIJ(C_CR, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done));
4568: PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_PLIB, "RestoreRowIJ failed");
4569: } else {
4570: PetscCall(PetscObjectReference((PetscObject)C_CR));
4571: tC_CR = C_CR;
4572: }
4573: PetscCall(MatCreateTranspose(tC_CR, &Brhs));
4574: PetscCall(MatDestroy(&tC_CR));
4575: }
4576: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_eff_constraints, NULL, &local_auxmat2_R));
4577: if (F) {
4578: if (need_benign_correction) {
4579: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4581: /* rhs is already zero on interior dofs, no need to change the rhs */
4582: PetscCall(PetscArrayzero(reuse_solver->benign_save_vals, pcbddc->benign_n));
4583: }
4584: PetscCall(MatMatSolve(F, Brhs, local_auxmat2_R));
4585: if (need_benign_correction) {
4586: PetscScalar *marr;
4587: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4589: /* XXX multi_element? */
4590: PetscCall(MatDenseGetArray(local_auxmat2_R, &marr));
4591: if (lda_rhs != n_R) {
4592: for (i = 0; i < n_eff_constraints; i++) {
4593: PetscCall(VecPlaceArray(dummy_vec, marr + i * lda_rhs));
4594: PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, dummy_vec, NULL, PETSC_TRUE, PETSC_TRUE));
4595: PetscCall(VecResetArray(dummy_vec));
4596: }
4597: } else {
4598: for (i = 0; i < n_eff_constraints; i++) {
4599: PetscCall(VecPlaceArray(pcbddc->vec1_R, marr + i * lda_rhs));
4600: PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, pcbddc->vec1_R, NULL, PETSC_TRUE, PETSC_TRUE));
4601: PetscCall(VecResetArray(pcbddc->vec1_R));
4602: }
4603: }
4604: PetscCall(MatDenseRestoreArray(local_auxmat2_R, &marr));
4605: }
4606: } else {
4607: const PetscScalar *barr;
4608: PetscScalar *marr;
4610: PetscCall(MatDenseGetArrayRead(Brhs, &barr));
4611: PetscCall(MatDenseGetArray(local_auxmat2_R, &marr));
4612: for (i = 0; i < n_eff_constraints; i++) {
4613: PetscCall(VecPlaceArray(pcbddc->vec1_R, barr + i * lda_rhs));
4614: PetscCall(VecPlaceArray(pcbddc->vec2_R, marr + i * lda_rhs));
4615: PetscCall(KSPSolve(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R));
4616: PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R));
4617: PetscCall(VecResetArray(pcbddc->vec1_R));
4618: PetscCall(VecResetArray(pcbddc->vec2_R));
4619: }
4620: PetscCall(MatDenseRestoreArrayRead(Brhs, &barr));
4621: PetscCall(MatDenseRestoreArray(local_auxmat2_R, &marr));
4622: }
4623: if (sparserhs) PetscCall(MatScale(C_CR, -1.0));
4624: PetscCall(MatDestroy(&Brhs));
4625: /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR})^{-1} */
4626: if (!pcbddc->switch_static) {
4627: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_B, n_eff_constraints, NULL, &pcbddc->local_auxmat2));
4628: for (i = 0; i < n_eff_constraints; i++) {
4629: Vec r, b;
4630: PetscCall(MatDenseGetColumnVecRead(local_auxmat2_R, i, &r));
4631: PetscCall(MatDenseGetColumnVec(pcbddc->local_auxmat2, i, &b));
4632: PetscCall(VecScatterBegin(pcbddc->R_to_B, r, b, INSERT_VALUES, SCATTER_FORWARD));
4633: PetscCall(VecScatterEnd(pcbddc->R_to_B, r, b, INSERT_VALUES, SCATTER_FORWARD));
4634: PetscCall(MatDenseRestoreColumnVec(pcbddc->local_auxmat2, i, &b));
4635: PetscCall(MatDenseRestoreColumnVecRead(local_auxmat2_R, i, &r));
4636: }
4637: if (multi_element) {
4638: Mat T;
4640: PetscCall(MatCreateSeqAIJFromDenseExpand(local_auxmat2_R, n_constraints, R_eff_C_J, &T));
4641: PetscCall(MatDestroy(&local_auxmat2_R));
4642: local_auxmat2_R = T;
4643: PetscCall(MatCreateSeqAIJFromDenseExpand(pcbddc->local_auxmat2, n_constraints, B_eff_C_J, &T));
4644: PetscCall(MatDestroy(&pcbddc->local_auxmat2));
4645: pcbddc->local_auxmat2 = T;
4646: }
4647: PetscCall(MatMatMult(C_B, pcbddc->local_auxmat2, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &S_CC));
4648: } else {
4649: if (multi_element) {
4650: Mat T;
4652: PetscCall(MatCreateSeqAIJFromDenseExpand(local_auxmat2_R, n_constraints, R_eff_C_J, &T));
4653: PetscCall(MatDestroy(&local_auxmat2_R));
4654: local_auxmat2_R = T;
4655: }
4656: if (lda_rhs != n_R) {
4657: PetscCall(MatCreateSubMatrix(local_auxmat2_R, is_R, NULL, MAT_INITIAL_MATRIX, &pcbddc->local_auxmat2));
4658: } else {
4659: PetscCall(PetscObjectReference((PetscObject)local_auxmat2_R));
4660: pcbddc->local_auxmat2 = local_auxmat2_R;
4661: }
4662: PetscCall(MatMatMult(C_CR, pcbddc->local_auxmat2, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &S_CC));
4663: }
4664: PetscCall(MatScale(S_CC, m_one));
4665: if (multi_element) {
4666: Mat T, T2;
4667: IS isp, ispi;
4669: isp = is_C_perm;
4671: PetscCall(ISInvertPermutation(isp, PETSC_DECIDE, &ispi));
4672: PetscCall(MatPermute(S_CC, isp, isp, &T));
4673: PetscCall(MatSeqAIJInvertVariableBlockDiagonalMat(T, n_C_bss, C_bss, &T2));
4674: PetscCall(MatDestroy(&T));
4675: PetscCall(MatDestroy(&S_CC));
4676: PetscCall(MatPermute(T2, ispi, ispi, &S_CC));
4677: PetscCall(MatDestroy(&T2));
4678: PetscCall(ISDestroy(&ispi));
4679: } else {
4680: if (isCHOL) {
4681: PetscCall(MatCholeskyFactor(S_CC, NULL, NULL));
4682: } else {
4683: PetscCall(MatLUFactor(S_CC, NULL, NULL, NULL));
4684: }
4685: PetscCall(MatSeqDenseInvertFactors_Private(S_CC));
4686: }
4687: /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */
4688: PetscCall(MatMatMult(S_CC, C_B, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &pcbddc->local_auxmat1));
4689: PetscCall(MatDestroy(&C_B));
4690: PetscCall(MatSetValuesSubMat(*coarse_submat, S_CC, n_constraints, idx_C, n_constraints, idx_C, INSERT_VALUES));
4691: }
4693: /* Get submatrices from subdomain matrix */
4694: if (n_vertices) {
4695: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
4696: PetscBool oldpin;
4697: #endif
4698: IS is_aux;
4700: if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */
4701: IS tis;
4703: PetscCall(ISDuplicate(pcbddc->is_R_local, &tis));
4704: PetscCall(ISSort(tis));
4705: PetscCall(ISComplement(tis, 0, pcis->n, &is_aux));
4706: PetscCall(ISDestroy(&tis));
4707: } else {
4708: PetscCall(ISComplement(pcbddc->is_R_local, 0, pcis->n, &is_aux));
4709: }
4710: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
4711: oldpin = pcbddc->local_mat->boundtocpu;
4712: #endif
4713: PetscCall(MatBindToCPU(pcbddc->local_mat, PETSC_TRUE));
4714: PetscCall(MatCreateSubMatrix(pcbddc->local_mat, pcbddc->is_R_local, is_aux, MAT_INITIAL_MATRIX, &A_RV));
4715: PetscCall(MatCreateSubMatrix(pcbddc->local_mat, is_aux, pcbddc->is_R_local, MAT_INITIAL_MATRIX, &A_VR));
4716: /* TODO REMOVE: MatMatMult(A_VR,A_RRmA_RV) below may raise an error */
4717: PetscCall(MatConvert(A_VR, MATSEQAIJ, MAT_INPLACE_MATRIX, &A_VR));
4718: PetscCall(MatCreateSubMatrix(pcbddc->local_mat, is_aux, is_aux, MAT_INITIAL_MATRIX, &A_VV));
4719: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
4720: PetscCall(MatBindToCPU(pcbddc->local_mat, oldpin));
4721: #endif
4722: PetscCall(ISDestroy(&is_aux));
4723: }
4724: PetscCall(ISDestroy(&is_C_perm));
4725: PetscCall(PetscFree(C_bss));
4727: p0_lidx_I = NULL;
4728: if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) {
4729: const PetscInt *idxs;
4731: PetscCall(ISGetIndices(pcis->is_I_local, &idxs));
4732: PetscCall(PetscMalloc1(pcbddc->benign_n, &p0_lidx_I));
4733: for (i = 0; i < pcbddc->benign_n; i++) PetscCall(PetscFindInt(pcbddc->benign_p0_lidx[i], pcis->n - pcis->n_B, idxs, &p0_lidx_I[i]));
4734: PetscCall(ISRestoreIndices(pcis->is_I_local, &idxs));
4735: }
4737: /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */
4739: /* Matrices of coarse basis functions (local) */
4740: PetscCall(MatDestroy(&pcbddc->coarse_phi_B));
4741: PetscCall(MatDestroy(&pcbddc->coarse_psi_B));
4742: PetscCall(MatDestroy(&pcbddc->coarse_phi_D));
4743: PetscCall(MatDestroy(&pcbddc->coarse_psi_D));
4744: if (!multi_element) {
4745: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_B, pcbddc->local_primal_size, NULL, &pcbddc->coarse_phi_B));
4746: if (pcbddc->switch_static || pcbddc->dbg_flag) PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_D, pcbddc->local_primal_size, NULL, &pcbddc->coarse_phi_D));
4747: coarse_phi_multi = NULL;
4748: } else { /* Create temporary NEST matrix to hold coarse basis functions blocks */
4749: IS is_rows[2] = {pcbddc->is_R_local, NULL};
4750: IS is_cols[2] = {is_V, is_C};
4752: PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n_vertices, pcbddc->local_primal_ref_node, PETSC_USE_POINTER, &is_rows[1]));
4753: PetscCall(MatCreateNest(PETSC_COMM_SELF, 2, is_rows, 2, is_cols, NULL, &coarse_phi_multi));
4754: PetscCall(ISDestroy(&is_rows[1]));
4755: }
4757: /* vertices */
4758: if (n_vertices) {
4759: PetscBool restoreavr = PETSC_FALSE;
4760: Mat A_RRmA_RV = NULL;
4762: PetscCall(MatSetValuesSubMat(*coarse_submat, A_VV, n_vertices, idx_V, n_vertices, idx_V, ADD_VALUES));
4763: PetscCall(MatDestroy(&A_VV));
4765: if (n_R) {
4766: Mat A_RV_bcorr = NULL, S_VV;
4768: PetscCall(MatScale(A_RV, m_one));
4769: if (need_benign_correction) {
4770: ISLocalToGlobalMapping RtoN;
4771: IS is_p0;
4772: PetscInt *idxs_p0, n;
4774: PetscCall(PetscMalloc1(pcbddc->benign_n, &idxs_p0));
4775: PetscCall(ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local, &RtoN));
4776: PetscCall(ISGlobalToLocalMappingApply(RtoN, IS_GTOLM_DROP, pcbddc->benign_n, pcbddc->benign_p0_lidx, &n, idxs_p0));
4777: PetscCheck(n == pcbddc->benign_n, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in R numbering for benign p0! %" PetscInt_FMT " != %" PetscInt_FMT, n, pcbddc->benign_n);
4778: PetscCall(ISLocalToGlobalMappingDestroy(&RtoN));
4779: PetscCall(ISCreateGeneral(PETSC_COMM_SELF, n, idxs_p0, PETSC_OWN_POINTER, &is_p0));
4780: PetscCall(MatCreateSubMatrix(A_RV, is_p0, NULL, MAT_INITIAL_MATRIX, &A_RV_bcorr));
4781: PetscCall(ISDestroy(&is_p0));
4782: }
4784: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_eff_vertices, NULL, &A_RRmA_RV));
4785: if (!sparserhs || need_benign_correction) {
4786: if (lda_rhs == n_R && !multi_element) {
4787: PetscCall(MatConvert(A_RV, MATDENSE, MAT_INPLACE_MATRIX, &A_RV));
4788: } else {
4789: Mat T;
4790: PetscScalar *av, *array;
4791: const PetscInt *xadj, *adjncy;
4792: PetscInt n;
4793: PetscBool flg_row;
4795: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, lda_rhs, n_eff_vertices, NULL, &T));
4796: PetscCall(MatDenseGetArrayWrite(T, &array));
4797: PetscCall(MatConvert(A_RV, MATSEQAIJ, MAT_INPLACE_MATRIX, &A_RV));
4798: PetscCall(MatGetRowIJ(A_RV, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row));
4799: PetscCall(MatSeqAIJGetArray(A_RV, &av));
4800: for (i = 0; i < n; i++) {
4801: PetscInt j;
4802: for (j = xadj[i]; j < xadj[i + 1]; j++) array[lda_rhs * (V_to_eff_V ? V_to_eff_V[adjncy[j]] : adjncy[j]) + i] = av[j];
4803: }
4804: PetscCall(MatRestoreRowIJ(A_RV, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row));
4805: PetscCall(MatDenseRestoreArrayWrite(T, &array));
4806: PetscCall(MatDestroy(&A_RV));
4807: A_RV = T;
4808: }
4809: if (need_benign_correction) {
4810: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4811: PetscScalar *marr;
4813: /* XXX multi_element */
4814: PetscCall(MatDenseGetArray(A_RV, &marr));
4815: /* need \Phi^T A_RV = (I+L)A_RV, L given by
4817: | 0 0 0 | (V)
4818: L = | 0 0 -1 | (P-p0)
4819: | 0 0 -1 | (p0)
4821: */
4822: for (i = 0; i < reuse_solver->benign_n; i++) {
4823: const PetscScalar *vals;
4824: const PetscInt *idxs, *idxs_zero;
4825: PetscInt n, j, nz;
4827: PetscCall(ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i], &nz));
4828: PetscCall(ISGetIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero));
4829: PetscCall(MatGetRow(A_RV_bcorr, i, &n, &idxs, &vals));
4830: for (j = 0; j < n; j++) {
4831: PetscScalar val = vals[j];
4832: PetscInt k, col = idxs[j];
4833: for (k = 0; k < nz; k++) marr[idxs_zero[k] + lda_rhs * col] -= val;
4834: }
4835: PetscCall(MatRestoreRow(A_RV_bcorr, i, &n, &idxs, &vals));
4836: PetscCall(ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero));
4837: }
4838: PetscCall(MatDenseRestoreArray(A_RV, &marr));
4839: }
4840: PetscCall(PetscObjectReference((PetscObject)A_RV));
4841: Brhs = A_RV;
4842: } else {
4843: Mat tA_RVT, A_RVT;
4845: if (!pcbddc->symmetric_primal) {
4846: /* A_RV already scaled by -1 */
4847: PetscCall(MatTranspose(A_RV, MAT_INITIAL_MATRIX, &A_RVT));
4848: } else {
4849: restoreavr = PETSC_TRUE;
4850: PetscCall(MatScale(A_VR, -1.0));
4851: PetscCall(PetscObjectReference((PetscObject)A_VR));
4852: A_RVT = A_VR;
4853: }
4854: if (lda_rhs != n_R) {
4855: PetscScalar *aa;
4856: PetscInt r, *ii, *jj;
4857: PetscBool done;
4859: PetscCall(MatGetRowIJ(A_RVT, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done));
4860: PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_PLIB, "GetRowIJ failed");
4861: PetscCall(MatSeqAIJGetArray(A_RVT, &aa));
4862: PetscCall(MatCreateSeqAIJWithArrays(PETSC_COMM_SELF, n_vertices, lda_rhs, ii, jj, aa, &tA_RVT));
4863: PetscCall(MatRestoreRowIJ(A_RVT, 0, PETSC_FALSE, PETSC_FALSE, &r, (const PetscInt **)&ii, (const PetscInt **)&jj, &done));
4864: PetscCheck(done, PETSC_COMM_SELF, PETSC_ERR_PLIB, "RestoreRowIJ failed");
4865: } else {
4866: PetscCall(PetscObjectReference((PetscObject)A_RVT));
4867: tA_RVT = A_RVT;
4868: }
4869: PetscCall(MatCreateTranspose(tA_RVT, &Brhs));
4870: PetscCall(MatDestroy(&tA_RVT));
4871: PetscCall(MatDestroy(&A_RVT));
4872: }
4873: if (F) {
4874: /* need to correct the rhs */
4875: if (need_benign_correction) {
4876: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4877: PetscScalar *marr;
4879: PetscCall(MatDenseGetArray(Brhs, &marr));
4880: if (lda_rhs != n_R) {
4881: for (i = 0; i < n_eff_vertices; i++) {
4882: PetscCall(VecPlaceArray(dummy_vec, marr + i * lda_rhs));
4883: PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, dummy_vec, NULL, PETSC_FALSE, PETSC_TRUE));
4884: PetscCall(VecResetArray(dummy_vec));
4885: }
4886: } else {
4887: for (i = 0; i < n_eff_vertices; i++) {
4888: PetscCall(VecPlaceArray(pcbddc->vec1_R, marr + i * lda_rhs));
4889: PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, pcbddc->vec1_R, NULL, PETSC_FALSE, PETSC_TRUE));
4890: PetscCall(VecResetArray(pcbddc->vec1_R));
4891: }
4892: }
4893: PetscCall(MatDenseRestoreArray(Brhs, &marr));
4894: }
4895: PetscCall(MatMatSolve(F, Brhs, A_RRmA_RV));
4896: if (restoreavr) PetscCall(MatScale(A_VR, -1.0));
4897: /* need to correct the solution */
4898: if (need_benign_correction) {
4899: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4900: PetscScalar *marr;
4902: PetscCall(MatDenseGetArray(A_RRmA_RV, &marr));
4903: if (lda_rhs != n_R) {
4904: for (i = 0; i < n_eff_vertices; i++) {
4905: PetscCall(VecPlaceArray(dummy_vec, marr + i * lda_rhs));
4906: PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, dummy_vec, NULL, PETSC_TRUE, PETSC_TRUE));
4907: PetscCall(VecResetArray(dummy_vec));
4908: }
4909: } else {
4910: for (i = 0; i < n_eff_vertices; i++) {
4911: PetscCall(VecPlaceArray(pcbddc->vec1_R, marr + i * lda_rhs));
4912: PetscCall(PCBDDCReuseSolversBenignAdapt(reuse_solver, pcbddc->vec1_R, NULL, PETSC_TRUE, PETSC_TRUE));
4913: PetscCall(VecResetArray(pcbddc->vec1_R));
4914: }
4915: }
4916: PetscCall(MatDenseRestoreArray(A_RRmA_RV, &marr));
4917: }
4918: } else {
4919: const PetscScalar *barr;
4920: PetscScalar *marr;
4922: PetscCall(MatDenseGetArrayRead(Brhs, &barr));
4923: PetscCall(MatDenseGetArray(A_RRmA_RV, &marr));
4924: for (i = 0; i < n_eff_vertices; i++) {
4925: PetscCall(VecPlaceArray(pcbddc->vec1_R, barr + i * lda_rhs));
4926: PetscCall(VecPlaceArray(pcbddc->vec2_R, marr + i * lda_rhs));
4927: PetscCall(KSPSolve(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R));
4928: PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R));
4929: PetscCall(VecResetArray(pcbddc->vec1_R));
4930: PetscCall(VecResetArray(pcbddc->vec2_R));
4931: }
4932: PetscCall(MatDenseRestoreArrayRead(Brhs, &barr));
4933: PetscCall(MatDenseRestoreArray(A_RRmA_RV, &marr));
4934: }
4935: PetscCall(MatDestroy(&A_RV));
4936: PetscCall(MatDestroy(&Brhs));
4937: /* S_VV and S_CV */
4938: if (n_constraints) {
4939: Mat B;
4941: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_B, n_eff_vertices, NULL, &B));
4942: PetscCall(MatDenseScatter(A_RRmA_RV, pcbddc->R_to_B, B));
4944: /* S_CV = pcbddc->local_auxmat1 * B */
4945: if (multi_element) {
4946: Mat T;
4948: PetscCall(MatCreateSeqAIJFromDenseExpand(B, n_vertices, B_eff_V_J, &T));
4949: PetscCall(MatDestroy(&B));
4950: B = T;
4951: }
4952: PetscCall(MatProductCreate(pcbddc->local_auxmat1, B, NULL, &S_CV));
4953: PetscCall(MatProductSetType(S_CV, MATPRODUCT_AB));
4954: PetscCall(MatProductSetFromOptions(S_CV));
4955: PetscCall(MatProductSymbolic(S_CV));
4956: PetscCall(MatProductNumeric(S_CV));
4957: PetscCall(MatProductClear(S_CV));
4958: PetscCall(MatDestroy(&B));
4960: /* B = local_auxmat2_R * S_CV */
4961: PetscCall(MatProductCreate(local_auxmat2_R, S_CV, NULL, &B));
4962: PetscCall(MatProductSetType(B, MATPRODUCT_AB));
4963: PetscCall(MatProductSetFromOptions(B));
4964: PetscCall(MatProductSymbolic(B));
4965: PetscCall(MatProductNumeric(B));
4967: PetscCall(MatScale(S_CV, m_one));
4968: PetscCall(MatSetValuesSubMat(*coarse_submat, S_CV, n_constraints, idx_C, n_vertices, idx_V, INSERT_VALUES));
4970: if (multi_element) {
4971: Mat T;
4973: PetscCall(MatCreateSeqAIJFromDenseExpand(A_RRmA_RV, n_vertices, R_eff_V_J, &T));
4974: PetscCall(MatDestroy(&A_RRmA_RV));
4975: A_RRmA_RV = T;
4976: }
4977: PetscCall(MatAXPY(A_RRmA_RV, 1.0, B, UNKNOWN_NONZERO_PATTERN)); /* XXX ? */
4978: PetscCall(MatDestroy(&B));
4979: } else if (multi_element) {
4980: Mat T;
4982: PetscCall(MatCreateSeqAIJFromDenseExpand(A_RRmA_RV, n_vertices, R_eff_V_J, &T));
4983: PetscCall(MatDestroy(&A_RRmA_RV));
4984: A_RRmA_RV = T;
4985: }
4987: if (lda_rhs != n_R) {
4988: Mat T;
4990: PetscCall(MatCreateSubMatrix(A_RRmA_RV, is_R, NULL, MAT_INITIAL_MATRIX, &T));
4991: PetscCall(MatDestroy(&A_RRmA_RV));
4992: A_RRmA_RV = T;
4993: }
4995: /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */
4996: if (need_benign_correction) { /* XXX SPARSE */
4997: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
4998: PetscScalar *sums;
4999: const PetscScalar *marr;
5001: PetscCall(MatDenseGetArrayRead(A_RRmA_RV, &marr));
5002: PetscCall(PetscMalloc1(n_vertices, &sums));
5003: for (i = 0; i < reuse_solver->benign_n; i++) {
5004: const PetscScalar *vals;
5005: const PetscInt *idxs, *idxs_zero;
5006: PetscInt n, j, nz;
5008: PetscCall(ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i], &nz));
5009: PetscCall(ISGetIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero));
5010: for (j = 0; j < n_vertices; j++) {
5011: sums[j] = 0.;
5012: for (PetscInt k = 0; k < nz; k++) sums[j] += marr[idxs_zero[k] + j * n_R];
5013: }
5014: PetscCall(MatGetRow(A_RV_bcorr, i, &n, &idxs, &vals));
5015: for (j = 0; j < n; j++) {
5016: PetscScalar val = vals[j];
5017: for (PetscInt k = 0; k < n_vertices; k++) PetscCall(MatSetValue(*coarse_submat, idx_V[idxs[j]], idx_V[k], val * sums[k], ADD_VALUES));
5018: }
5019: PetscCall(MatRestoreRow(A_RV_bcorr, i, &n, &idxs, &vals));
5020: PetscCall(ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i], &idxs_zero));
5021: }
5022: PetscCall(PetscFree(sums));
5023: PetscCall(MatDestroy(&A_RV_bcorr));
5024: PetscCall(MatDenseRestoreArrayRead(A_RRmA_RV, &marr));
5025: }
5027: PetscCall(MatMatMult(A_VR, A_RRmA_RV, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &S_VV));
5028: PetscCall(MatSetValuesSubMat(*coarse_submat, S_VV, n_vertices, idx_V, n_vertices, idx_V, ADD_VALUES));
5029: PetscCall(MatDestroy(&S_VV));
5030: }
5032: /* coarse basis functions */
5033: if (coarse_phi_multi) {
5034: Mat Vid;
5036: PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, n_vertices, n_vertices, 1, NULL, &Vid));
5037: PetscCall(MatShift_Basic(Vid, 1.0));
5038: PetscCall(MatNestSetSubMat(coarse_phi_multi, 0, 0, A_RRmA_RV));
5039: PetscCall(MatNestSetSubMat(coarse_phi_multi, 1, 0, Vid));
5040: PetscCall(MatDestroy(&Vid));
5041: } else {
5042: if (A_RRmA_RV) {
5043: PetscCall(MatDenseScatter(A_RRmA_RV, pcbddc->R_to_B, pcbddc->coarse_phi_B));
5044: if (pcbddc->switch_static || pcbddc->dbg_flag) {
5045: PetscCall(MatDenseScatter(A_RRmA_RV, pcbddc->R_to_D, pcbddc->coarse_phi_D));
5046: if (pcbddc->benign_n) {
5047: for (i = 0; i < n_vertices; i++) PetscCall(MatSetValues(pcbddc->coarse_phi_D, pcbddc->benign_n, p0_lidx_I, 1, &i, NULL, INSERT_VALUES));
5048: PetscCall(MatAssemblyBegin(pcbddc->coarse_phi_D, MAT_FINAL_ASSEMBLY));
5049: PetscCall(MatAssemblyEnd(pcbddc->coarse_phi_D, MAT_FINAL_ASSEMBLY));
5050: }
5051: }
5052: }
5053: for (i = 0; i < n_vertices; i++) PetscCall(MatSetValues(pcbddc->coarse_phi_B, 1, &idx_V_B[i], 1, &i, &one, INSERT_VALUES));
5054: PetscCall(MatAssemblyBegin(pcbddc->coarse_phi_B, MAT_FINAL_ASSEMBLY));
5055: PetscCall(MatAssemblyEnd(pcbddc->coarse_phi_B, MAT_FINAL_ASSEMBLY));
5056: }
5057: PetscCall(MatDestroy(&A_RRmA_RV));
5058: }
5059: PetscCall(MatDestroy(&A_RV));
5060: PetscCall(VecDestroy(&dummy_vec));
5062: if (n_constraints) {
5063: Mat B, B2;
5065: PetscCall(MatScale(S_CC, m_one));
5066: PetscCall(MatProductCreate(local_auxmat2_R, S_CC, NULL, &B));
5067: PetscCall(MatProductSetType(B, MATPRODUCT_AB));
5068: PetscCall(MatProductSetFromOptions(B));
5069: PetscCall(MatProductSymbolic(B));
5070: PetscCall(MatProductNumeric(B));
5072: if (n_vertices) {
5073: if (isCHOL || need_benign_correction) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */
5074: PetscCall(MatTranspose(S_CV, MAT_INITIAL_MATRIX, &S_VC));
5075: } else {
5076: if (lda_rhs != n_R) {
5077: Mat tB;
5079: PetscCall(MatCreateSubMatrix(B, is_R, NULL, MAT_INITIAL_MATRIX, &tB));
5080: PetscCall(MatDestroy(&B));
5081: B = tB;
5082: }
5083: PetscCall(MatMatMult(A_VR, B, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &S_VC));
5084: }
5085: PetscCall(MatSetValuesSubMat(*coarse_submat, S_VC, n_vertices, idx_V, n_constraints, idx_C, INSERT_VALUES));
5086: }
5088: /* coarse basis functions */
5089: if (coarse_phi_multi) {
5090: PetscCall(MatNestSetSubMat(coarse_phi_multi, 0, 1, B));
5091: } else {
5092: PetscCall(MatDenseGetSubMatrix(pcbddc->coarse_phi_B, PETSC_DECIDE, PETSC_DECIDE, n_vertices, n_vertices + n_constraints, &B2));
5093: PetscCall(MatDenseScatter(B, pcbddc->R_to_B, B2));
5094: PetscCall(MatDenseRestoreSubMatrix(pcbddc->coarse_phi_B, &B2));
5095: if (pcbddc->switch_static || pcbddc->dbg_flag) {
5096: PetscCall(MatDenseGetSubMatrix(pcbddc->coarse_phi_D, PETSC_DECIDE, PETSC_DECIDE, n_vertices, n_vertices + n_constraints, &B2));
5097: PetscCall(MatDenseScatter(B, pcbddc->R_to_D, B2));
5098: if (pcbddc->benign_n) {
5099: for (i = 0; i < n_constraints; i++) PetscCall(MatSetValues(B2, pcbddc->benign_n, p0_lidx_I, 1, &i, NULL, INSERT_VALUES));
5100: }
5101: PetscCall(MatDenseRestoreSubMatrix(pcbddc->coarse_phi_D, &B2));
5102: }
5103: }
5104: PetscCall(MatDestroy(&B));
5105: }
5107: /* assemble sparse coarse basis functions */
5108: if (coarse_phi_multi) {
5109: Mat T;
5111: PetscCall(MatConvert(coarse_phi_multi, MATSEQAIJ, MAT_INITIAL_MATRIX, &T));
5112: PetscCall(MatDestroy(&coarse_phi_multi));
5113: PetscCall(MatCreateSubMatrix(T, pcis->is_B_local, NULL, MAT_INITIAL_MATRIX, &pcbddc->coarse_phi_B));
5114: if (pcbddc->switch_static || pcbddc->dbg_flag) PetscCall(MatCreateSubMatrix(T, pcis->is_I_local, NULL, MAT_INITIAL_MATRIX, &pcbddc->coarse_phi_D));
5115: PetscCall(MatDestroy(&T));
5116: }
5117: PetscCall(MatDestroy(&local_auxmat2_R));
5118: PetscCall(PetscFree(p0_lidx_I));
5120: /* coarse matrix entries relative to B_0 */
5121: if (pcbddc->benign_n) {
5122: Mat B0_B, B0_BPHI;
5123: IS is_dummy;
5124: const PetscScalar *data;
5125: PetscInt j;
5127: PetscCall(ISCreateStride(PETSC_COMM_SELF, pcbddc->benign_n, 0, 1, &is_dummy));
5128: PetscCall(MatCreateSubMatrix(pcbddc->benign_B0, is_dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &B0_B));
5129: PetscCall(ISDestroy(&is_dummy));
5130: PetscCall(MatMatMult(B0_B, pcbddc->coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &B0_BPHI));
5131: PetscCall(MatConvert(B0_BPHI, MATSEQDENSE, MAT_INPLACE_MATRIX, &B0_BPHI));
5132: PetscCall(MatDenseGetArrayRead(B0_BPHI, &data));
5133: for (j = 0; j < pcbddc->benign_n; j++) {
5134: PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j;
5135: for (i = 0; i < pcbddc->local_primal_size; i++) {
5136: PetscCall(MatSetValue(*coarse_submat, primal_idx, i, data[i * pcbddc->benign_n + j], INSERT_VALUES));
5137: PetscCall(MatSetValue(*coarse_submat, i, primal_idx, data[i * pcbddc->benign_n + j], INSERT_VALUES));
5138: }
5139: }
5140: PetscCall(MatDenseRestoreArrayRead(B0_BPHI, &data));
5141: PetscCall(MatDestroy(&B0_B));
5142: PetscCall(MatDestroy(&B0_BPHI));
5143: }
5145: /* compute other basis functions for non-symmetric problems */
5146: if (!pcbddc->symmetric_primal) {
5147: Mat B_V = NULL, B_C = NULL;
5148: PetscScalar *marray, *work;
5150: /* TODO multi_element MatDenseScatter */
5151: if (n_constraints) {
5152: Mat S_CCT, C_CRT;
5154: PetscCall(MatScale(S_CC, m_one));
5155: PetscCall(MatTranspose(C_CR, MAT_INITIAL_MATRIX, &C_CRT));
5156: PetscCall(MatTranspose(S_CC, MAT_INITIAL_MATRIX, &S_CCT));
5157: PetscCall(MatMatMult(C_CRT, S_CCT, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &B_C));
5158: PetscCall(MatConvert(B_C, MATDENSE, MAT_INPLACE_MATRIX, &B_C));
5159: PetscCall(MatDestroy(&S_CCT));
5160: if (n_vertices) {
5161: Mat S_VCT;
5163: PetscCall(MatTranspose(S_VC, MAT_INITIAL_MATRIX, &S_VCT));
5164: PetscCall(MatMatMult(C_CRT, S_VCT, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &B_V));
5165: PetscCall(MatDestroy(&S_VCT));
5166: PetscCall(MatConvert(B_V, MATDENSE, MAT_INPLACE_MATRIX, &B_V));
5167: }
5168: PetscCall(MatDestroy(&C_CRT));
5169: } else {
5170: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_R, n_vertices, NULL, &B_V));
5171: }
5172: if (n_vertices && n_R) {
5173: PetscScalar *av, *marray;
5174: const PetscInt *xadj, *adjncy;
5175: PetscInt n;
5176: PetscBool flg_row;
5178: /* B_V = B_V - A_VR^T */
5179: PetscCall(MatConvert(A_VR, MATSEQAIJ, MAT_INPLACE_MATRIX, &A_VR));
5180: PetscCall(MatGetRowIJ(A_VR, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row));
5181: PetscCall(MatSeqAIJGetArray(A_VR, &av));
5182: PetscCall(MatDenseGetArray(B_V, &marray));
5183: for (i = 0; i < n; i++) {
5184: PetscInt j;
5185: for (j = xadj[i]; j < xadj[i + 1]; j++) marray[i * n_R + adjncy[j]] -= av[j];
5186: }
5187: PetscCall(MatDenseRestoreArray(B_V, &marray));
5188: PetscCall(MatRestoreRowIJ(A_VR, 0, PETSC_FALSE, PETSC_FALSE, &n, &xadj, &adjncy, &flg_row));
5189: PetscCall(MatDestroy(&A_VR));
5190: }
5192: /* currently there's no support for MatTransposeMatSolve(F,B,X) */
5193: PetscCall(PetscMalloc1(n_R * pcbddc->local_primal_size, &work));
5194: if (n_vertices) {
5195: PetscCall(MatDenseGetArray(B_V, &marray));
5196: for (i = 0; i < n_vertices; i++) {
5197: PetscCall(VecPlaceArray(pcbddc->vec1_R, marray + i * n_R));
5198: PetscCall(VecPlaceArray(pcbddc->vec2_R, work + i * n_R));
5199: PetscCall(KSPSolveTranspose(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R));
5200: PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R));
5201: PetscCall(VecResetArray(pcbddc->vec1_R));
5202: PetscCall(VecResetArray(pcbddc->vec2_R));
5203: }
5204: PetscCall(MatDenseRestoreArray(B_V, &marray));
5205: }
5206: if (B_C) {
5207: PetscCall(MatDenseGetArray(B_C, &marray));
5208: for (i = n_vertices; i < n_constraints + n_vertices; i++) {
5209: PetscCall(VecPlaceArray(pcbddc->vec1_R, marray + (i - n_vertices) * n_R));
5210: PetscCall(VecPlaceArray(pcbddc->vec2_R, work + i * n_R));
5211: PetscCall(KSPSolveTranspose(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec2_R));
5212: PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R));
5213: PetscCall(VecResetArray(pcbddc->vec1_R));
5214: PetscCall(VecResetArray(pcbddc->vec2_R));
5215: }
5216: PetscCall(MatDenseRestoreArray(B_C, &marray));
5217: }
5218: /* coarse basis functions */
5219: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_B, pcbddc->local_primal_size, NULL, &pcbddc->coarse_psi_B));
5220: if (pcbddc->switch_static || pcbddc->dbg_flag) PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, n_D, pcbddc->local_primal_size, NULL, &pcbddc->coarse_psi_D));
5221: for (i = 0; i < pcbddc->local_primal_size; i++) {
5222: Vec v;
5224: PetscCall(VecPlaceArray(pcbddc->vec1_R, work + i * n_R));
5225: PetscCall(MatDenseGetColumnVec(pcbddc->coarse_psi_B, i, &v));
5226: PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD));
5227: PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD));
5228: if (i < n_vertices) {
5229: PetscScalar one = 1.0;
5230: PetscCall(VecSetValues(v, 1, &idx_V_B[i], &one, INSERT_VALUES));
5231: PetscCall(VecAssemblyBegin(v));
5232: PetscCall(VecAssemblyEnd(v));
5233: }
5234: PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_psi_B, i, &v));
5236: if (pcbddc->switch_static || pcbddc->dbg_flag) {
5237: PetscCall(MatDenseGetColumnVec(pcbddc->coarse_psi_D, i, &v));
5238: PetscCall(VecScatterBegin(pcbddc->R_to_D, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD));
5239: PetscCall(VecScatterEnd(pcbddc->R_to_D, pcbddc->vec1_R, v, INSERT_VALUES, SCATTER_FORWARD));
5240: PetscCall(MatDenseRestoreColumnVec(pcbddc->coarse_psi_D, i, &v));
5241: }
5242: PetscCall(VecResetArray(pcbddc->vec1_R));
5243: }
5244: PetscCall(MatDestroy(&B_V));
5245: PetscCall(MatDestroy(&B_C));
5246: PetscCall(PetscFree(work));
5247: } else {
5248: PetscCall(PetscObjectReference((PetscObject)pcbddc->coarse_phi_B));
5249: pcbddc->coarse_psi_B = pcbddc->coarse_phi_B;
5250: PetscCall(PetscObjectReference((PetscObject)pcbddc->coarse_phi_D));
5251: pcbddc->coarse_psi_D = pcbddc->coarse_phi_D;
5252: }
5253: PetscCall(MatAssemblyBegin(*coarse_submat, MAT_FINAL_ASSEMBLY));
5254: PetscCall(MatAssemblyEnd(*coarse_submat, MAT_FINAL_ASSEMBLY));
5256: /* free memory */
5257: PetscCall(PetscFree(V_to_eff_V));
5258: PetscCall(PetscFree(C_to_eff_C));
5259: PetscCall(PetscFree(R_eff_V_J));
5260: PetscCall(PetscFree(R_eff_C_J));
5261: PetscCall(PetscFree(B_eff_V_J));
5262: PetscCall(PetscFree(B_eff_C_J));
5263: PetscCall(ISDestroy(&is_R));
5264: PetscCall(ISRestoreIndices(is_V, &idx_V));
5265: PetscCall(ISRestoreIndices(is_C, &idx_C));
5266: PetscCall(ISDestroy(&is_V));
5267: PetscCall(ISDestroy(&is_C));
5268: PetscCall(PetscFree(idx_V_B));
5269: PetscCall(MatDestroy(&S_CV));
5270: PetscCall(MatDestroy(&S_VC));
5271: PetscCall(MatDestroy(&S_CC));
5272: if (n_vertices) PetscCall(MatDestroy(&A_VR));
5273: if (n_constraints) PetscCall(MatDestroy(&C_CR));
5274: PetscCall(PetscLogEventEnd(PC_BDDC_CorrectionSetUp[pcbddc->current_level], pc, 0, 0, 0));
5276: /* Checking coarse_sub_mat and coarse basis functions */
5277: /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
5278: /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */
5279: if (pcbddc->dbg_flag) {
5280: Mat AUXMAT, TM1, TM2, TM3, TM4;
5281: Mat coarse_phi_D, coarse_phi_B;
5282: Mat coarse_psi_D, coarse_psi_B;
5283: Mat A_II, A_BB, A_IB, A_BI;
5284: Mat C_B, CPHI;
5285: IS is_dummy;
5286: Vec mones;
5287: MatType checkmattype = MATSEQAIJ;
5288: PetscReal real_value;
5290: if (pcbddc->benign_n && !pcbddc->benign_change_explicit) {
5291: Mat A;
5292: PetscCall(PCBDDCBenignProject(pc, NULL, NULL, &A));
5293: PetscCall(MatCreateSubMatrix(A, pcis->is_I_local, pcis->is_I_local, MAT_INITIAL_MATRIX, &A_II));
5294: PetscCall(MatCreateSubMatrix(A, pcis->is_I_local, pcis->is_B_local, MAT_INITIAL_MATRIX, &A_IB));
5295: PetscCall(MatCreateSubMatrix(A, pcis->is_B_local, pcis->is_I_local, MAT_INITIAL_MATRIX, &A_BI));
5296: PetscCall(MatCreateSubMatrix(A, pcis->is_B_local, pcis->is_B_local, MAT_INITIAL_MATRIX, &A_BB));
5297: PetscCall(MatDestroy(&A));
5298: } else {
5299: PetscCall(MatConvert(pcis->A_II, checkmattype, MAT_INITIAL_MATRIX, &A_II));
5300: PetscCall(MatConvert(pcis->A_IB, checkmattype, MAT_INITIAL_MATRIX, &A_IB));
5301: PetscCall(MatConvert(pcis->A_BI, checkmattype, MAT_INITIAL_MATRIX, &A_BI));
5302: PetscCall(MatConvert(pcis->A_BB, checkmattype, MAT_INITIAL_MATRIX, &A_BB));
5303: }
5304: PetscCall(MatConvert(pcbddc->coarse_phi_D, checkmattype, MAT_INITIAL_MATRIX, &coarse_phi_D));
5305: PetscCall(MatConvert(pcbddc->coarse_phi_B, checkmattype, MAT_INITIAL_MATRIX, &coarse_phi_B));
5306: if (!pcbddc->symmetric_primal) {
5307: PetscCall(MatConvert(pcbddc->coarse_psi_D, checkmattype, MAT_INITIAL_MATRIX, &coarse_psi_D));
5308: PetscCall(MatConvert(pcbddc->coarse_psi_B, checkmattype, MAT_INITIAL_MATRIX, &coarse_psi_B));
5309: }
5310: PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n"));
5311: PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Check coarse sub mat computation (symmetric %d)\n", pcbddc->symmetric_primal));
5312: PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
5313: if (!pcbddc->symmetric_primal) {
5314: PetscCall(MatMatMult(A_II, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &AUXMAT));
5315: PetscCall(MatTransposeMatMult(coarse_psi_D, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM1));
5316: PetscCall(MatDestroy(&AUXMAT));
5317: PetscCall(MatMatMult(A_BB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &AUXMAT));
5318: PetscCall(MatTransposeMatMult(coarse_psi_B, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM2));
5319: PetscCall(MatDestroy(&AUXMAT));
5320: PetscCall(MatMatMult(A_IB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &AUXMAT));
5321: PetscCall(MatTransposeMatMult(coarse_psi_D, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM3));
5322: PetscCall(MatDestroy(&AUXMAT));
5323: PetscCall(MatMatMult(A_BI, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &AUXMAT));
5324: PetscCall(MatTransposeMatMult(coarse_psi_B, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM4));
5325: PetscCall(MatDestroy(&AUXMAT));
5326: } else {
5327: PetscCall(MatPtAP(A_II, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &TM1));
5328: PetscCall(MatPtAP(A_BB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &TM2));
5329: PetscCall(MatMatMult(A_IB, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &AUXMAT));
5330: PetscCall(MatTransposeMatMult(coarse_phi_D, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM3));
5331: PetscCall(MatDestroy(&AUXMAT));
5332: PetscCall(MatMatMult(A_BI, coarse_phi_D, MAT_INITIAL_MATRIX, 1.0, &AUXMAT));
5333: PetscCall(MatTransposeMatMult(coarse_phi_B, AUXMAT, MAT_INITIAL_MATRIX, 1.0, &TM4));
5334: PetscCall(MatDestroy(&AUXMAT));
5335: }
5336: PetscCall(MatAXPY(TM1, one, TM2, DIFFERENT_NONZERO_PATTERN));
5337: PetscCall(MatAXPY(TM1, one, TM3, DIFFERENT_NONZERO_PATTERN));
5338: PetscCall(MatAXPY(TM1, one, TM4, DIFFERENT_NONZERO_PATTERN));
5339: PetscCall(MatConvert(TM1, MATSEQDENSE, MAT_INPLACE_MATRIX, &TM1));
5340: if (pcbddc->benign_n) {
5341: Mat B0_B, B0_BPHI;
5342: const PetscScalar *data2;
5343: PetscScalar *data;
5344: PetscInt j;
5346: PetscCall(ISCreateStride(PETSC_COMM_SELF, pcbddc->benign_n, 0, 1, &is_dummy));
5347: PetscCall(MatCreateSubMatrix(pcbddc->benign_B0, is_dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &B0_B));
5348: PetscCall(MatMatMult(B0_B, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &B0_BPHI));
5349: PetscCall(MatConvert(B0_BPHI, MATSEQDENSE, MAT_INPLACE_MATRIX, &B0_BPHI));
5350: PetscCall(MatDenseGetArray(TM1, &data));
5351: PetscCall(MatDenseGetArrayRead(B0_BPHI, &data2));
5352: for (j = 0; j < pcbddc->benign_n; j++) {
5353: PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j;
5354: for (i = 0; i < pcbddc->local_primal_size; i++) {
5355: data[primal_idx * pcbddc->local_primal_size + i] += data2[i * pcbddc->benign_n + j];
5356: data[i * pcbddc->local_primal_size + primal_idx] += data2[i * pcbddc->benign_n + j];
5357: }
5358: }
5359: PetscCall(MatDenseRestoreArray(TM1, &data));
5360: PetscCall(MatDenseRestoreArrayRead(B0_BPHI, &data2));
5361: PetscCall(MatDestroy(&B0_B));
5362: PetscCall(ISDestroy(&is_dummy));
5363: PetscCall(MatDestroy(&B0_BPHI));
5364: }
5365: PetscCall(MatAXPY(TM1, m_one, *coarse_submat, DIFFERENT_NONZERO_PATTERN));
5366: PetscCall(MatNorm(TM1, NORM_FROBENIUS, &real_value));
5367: PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer));
5368: PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d matrix error % 1.14e\n", PetscGlobalRank, (double)real_value));
5370: /* check constraints */
5371: PetscCall(ISCreateStride(PETSC_COMM_SELF, pcbddc->local_primal_size - pcbddc->benign_n, 0, 1, &is_dummy));
5372: PetscCall(MatCreateSubMatrix(pcbddc->ConstraintMatrix, is_dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &C_B));
5373: if (!pcbddc->benign_n) { /* TODO: add benign case */
5374: PetscCall(MatMatMult(C_B, coarse_phi_B, MAT_INITIAL_MATRIX, 1.0, &CPHI));
5375: } else {
5376: PetscScalar *data;
5377: Mat tmat;
5378: PetscCall(MatDenseGetArray(pcbddc->coarse_phi_B, &data));
5379: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, pcis->n_B, pcbddc->local_primal_size - pcbddc->benign_n, data, &tmat));
5380: PetscCall(MatDenseRestoreArray(pcbddc->coarse_phi_B, &data));
5381: PetscCall(MatMatMult(C_B, tmat, MAT_INITIAL_MATRIX, 1.0, &CPHI));
5382: PetscCall(MatDestroy(&tmat));
5383: }
5384: PetscCall(MatCreateVecs(CPHI, &mones, NULL));
5385: PetscCall(VecSet(mones, -1.0));
5386: PetscCall(MatDiagonalSet(CPHI, mones, ADD_VALUES));
5387: PetscCall(MatNorm(CPHI, NORM_FROBENIUS, &real_value));
5388: PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d phi constraints error % 1.14e\n", PetscGlobalRank, (double)real_value));
5389: if (!pcbddc->symmetric_primal) {
5390: PetscCall(MatMatMult(C_B, coarse_psi_B, MAT_REUSE_MATRIX, 1.0, &CPHI));
5391: PetscCall(VecSet(mones, -1.0));
5392: PetscCall(MatDiagonalSet(CPHI, mones, ADD_VALUES));
5393: PetscCall(MatNorm(CPHI, NORM_FROBENIUS, &real_value));
5394: PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d psi constraints error % 1.14e\n", PetscGlobalRank, (double)real_value));
5395: }
5396: PetscCall(MatDestroy(&C_B));
5397: PetscCall(MatDestroy(&CPHI));
5398: PetscCall(ISDestroy(&is_dummy));
5399: PetscCall(VecDestroy(&mones));
5400: PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
5401: PetscCall(MatDestroy(&A_II));
5402: PetscCall(MatDestroy(&A_BB));
5403: PetscCall(MatDestroy(&A_IB));
5404: PetscCall(MatDestroy(&A_BI));
5405: PetscCall(MatDestroy(&TM1));
5406: PetscCall(MatDestroy(&TM2));
5407: PetscCall(MatDestroy(&TM3));
5408: PetscCall(MatDestroy(&TM4));
5409: PetscCall(MatDestroy(&coarse_phi_D));
5410: PetscCall(MatDestroy(&coarse_phi_B));
5411: if (!pcbddc->symmetric_primal) {
5412: PetscCall(MatDestroy(&coarse_psi_D));
5413: PetscCall(MatDestroy(&coarse_psi_B));
5414: }
5415: }
5417: #if 0
5418: {
5419: PetscViewer viewer;
5420: char filename[256];
5422: PetscCall(PetscSNPrintf(filename, PETSC_STATIC_ARRAY_LENGTH(filename), "details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level));
5423: PetscCall(PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer));
5424: PetscCall(PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB));
5425: PetscCall(PetscObjectSetName((PetscObject)*coarse_submat,"coarse submat"));
5426: PetscCall(MatView(*coarse_submat,viewer));
5427: if (pcbddc->coarse_phi_B) {
5428: PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B"));
5429: PetscCall(MatView(pcbddc->coarse_phi_B,viewer));
5430: }
5431: if (pcbddc->coarse_phi_D) {
5432: PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D"));
5433: PetscCall(MatView(pcbddc->coarse_phi_D,viewer));
5434: }
5435: if (pcbddc->coarse_psi_B) {
5436: PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B"));
5437: PetscCall(MatView(pcbddc->coarse_psi_B,viewer));
5438: }
5439: if (pcbddc->coarse_psi_D) {
5440: PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D"));
5441: PetscCall(MatView(pcbddc->coarse_psi_D,viewer));
5442: }
5443: PetscCall(PetscObjectSetName((PetscObject)pcbddc->local_mat,"A"));
5444: PetscCall(MatView(pcbddc->local_mat,viewer));
5445: PetscCall(PetscObjectSetName((PetscObject)pcbddc->ConstraintMatrix,"C"));
5446: PetscCall(MatView(pcbddc->ConstraintMatrix,viewer));
5447: PetscCall(PetscObjectSetName((PetscObject)pcis->is_I_local,"I"));
5448: PetscCall(ISView(pcis->is_I_local,viewer));
5449: PetscCall(PetscObjectSetName((PetscObject)pcis->is_B_local,"B"));
5450: PetscCall(ISView(pcis->is_B_local,viewer));
5451: PetscCall(PetscObjectSetName((PetscObject)pcbddc->is_R_local,"R"));
5452: PetscCall(ISView(pcbddc->is_R_local,viewer));
5453: PetscCall(PetscViewerDestroy(&viewer));
5454: }
5455: #endif
5457: /* device support */
5458: {
5459: PetscBool iscuda, iship, iskokkos;
5460: MatType mtype = NULL;
5462: PetscCall(PetscObjectTypeCompareAny((PetscObject)pcis->vec1_N, &iscuda, VECCUDA, VECMPICUDA, VECSEQCUDA, ""));
5463: PetscCall(PetscObjectTypeCompareAny((PetscObject)pcis->vec1_N, &iship, VECHIP, VECMPIHIP, VECSEQHIP, ""));
5464: PetscCall(PetscObjectTypeCompareAny((PetscObject)pcis->vec1_N, &iskokkos, VECKOKKOS, VECMPIKOKKOS, VECSEQKOKKOS, ""));
5465: if (iskokkos) {
5466: if (PetscDefined(HAVE_MACRO_KOKKOS_ENABLE_CUDA)) iscuda = PETSC_TRUE;
5467: else if (PetscDefined(HAVE_MACRO_KOKKOS_ENABLE_HIP)) iship = PETSC_TRUE;
5468: }
5469: if (iskokkos) mtype = multi_element ? MATSEQAIJKOKKOS : (iscuda ? MATSEQDENSECUDA : MATSEQDENSEHIP);
5470: else if (iship) mtype = multi_element ? MATSEQAIJHIPSPARSE : MATSEQDENSEHIP;
5471: else if (iscuda) mtype = multi_element ? MATSEQAIJCUSPARSE : MATSEQDENSECUDA;
5472: if (mtype) {
5473: if (pcbddc->local_auxmat1) PetscCall(MatConvert(pcbddc->local_auxmat1, mtype, MAT_INPLACE_MATRIX, &pcbddc->local_auxmat1));
5474: if (pcbddc->local_auxmat2) PetscCall(MatConvert(pcbddc->local_auxmat2, mtype, MAT_INPLACE_MATRIX, &pcbddc->local_auxmat2));
5475: if (pcbddc->coarse_phi_B) PetscCall(MatConvert(pcbddc->coarse_phi_B, mtype, MAT_INPLACE_MATRIX, &pcbddc->coarse_phi_B));
5476: if (pcbddc->coarse_phi_D) PetscCall(MatConvert(pcbddc->coarse_phi_D, mtype, MAT_INPLACE_MATRIX, &pcbddc->coarse_phi_D));
5477: if (pcbddc->coarse_psi_B) PetscCall(MatConvert(pcbddc->coarse_psi_B, mtype, MAT_INPLACE_MATRIX, &pcbddc->coarse_psi_B));
5478: if (pcbddc->coarse_psi_D) PetscCall(MatConvert(pcbddc->coarse_psi_D, mtype, MAT_INPLACE_MATRIX, &pcbddc->coarse_psi_D));
5479: }
5480: }
5481: PetscFunctionReturn(PETSC_SUCCESS);
5482: }
5484: PetscErrorCode MatCreateSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat *B)
5485: {
5486: Mat *work_mat;
5487: IS isrow_s, iscol_s;
5488: PetscBool rsorted, csorted;
5489: PetscInt rsize, *idxs_perm_r = NULL, csize, *idxs_perm_c = NULL;
5491: PetscFunctionBegin;
5492: PetscCall(ISSorted(isrow, &rsorted));
5493: PetscCall(ISSorted(iscol, &csorted));
5494: PetscCall(ISGetLocalSize(isrow, &rsize));
5495: PetscCall(ISGetLocalSize(iscol, &csize));
5497: if (!rsorted) {
5498: const PetscInt *idxs;
5499: PetscInt *idxs_sorted, i;
5501: PetscCall(PetscMalloc1(rsize, &idxs_perm_r));
5502: PetscCall(PetscMalloc1(rsize, &idxs_sorted));
5503: for (i = 0; i < rsize; i++) idxs_perm_r[i] = i;
5504: PetscCall(ISGetIndices(isrow, &idxs));
5505: PetscCall(PetscSortIntWithPermutation(rsize, idxs, idxs_perm_r));
5506: for (i = 0; i < rsize; i++) idxs_sorted[i] = idxs[idxs_perm_r[i]];
5507: PetscCall(ISRestoreIndices(isrow, &idxs));
5508: PetscCall(ISCreateGeneral(PETSC_COMM_SELF, rsize, idxs_sorted, PETSC_OWN_POINTER, &isrow_s));
5509: } else {
5510: PetscCall(PetscObjectReference((PetscObject)isrow));
5511: isrow_s = isrow;
5512: }
5514: if (!csorted) {
5515: if (isrow == iscol) {
5516: PetscCall(PetscObjectReference((PetscObject)isrow_s));
5517: iscol_s = isrow_s;
5518: } else {
5519: const PetscInt *idxs;
5520: PetscInt *idxs_sorted, i;
5522: PetscCall(PetscMalloc1(csize, &idxs_perm_c));
5523: PetscCall(PetscMalloc1(csize, &idxs_sorted));
5524: for (i = 0; i < csize; i++) idxs_perm_c[i] = i;
5525: PetscCall(ISGetIndices(iscol, &idxs));
5526: PetscCall(PetscSortIntWithPermutation(csize, idxs, idxs_perm_c));
5527: for (i = 0; i < csize; i++) idxs_sorted[i] = idxs[idxs_perm_c[i]];
5528: PetscCall(ISRestoreIndices(iscol, &idxs));
5529: PetscCall(ISCreateGeneral(PETSC_COMM_SELF, csize, idxs_sorted, PETSC_OWN_POINTER, &iscol_s));
5530: }
5531: } else {
5532: PetscCall(PetscObjectReference((PetscObject)iscol));
5533: iscol_s = iscol;
5534: }
5536: PetscCall(MatCreateSubMatrices(A, 1, &isrow_s, &iscol_s, MAT_INITIAL_MATRIX, &work_mat));
5538: if (!rsorted || !csorted) {
5539: Mat new_mat;
5540: IS is_perm_r, is_perm_c;
5542: if (!rsorted) {
5543: PetscInt *idxs_r, i;
5544: PetscCall(PetscMalloc1(rsize, &idxs_r));
5545: for (i = 0; i < rsize; i++) idxs_r[idxs_perm_r[i]] = i;
5546: PetscCall(PetscFree(idxs_perm_r));
5547: PetscCall(ISCreateGeneral(PETSC_COMM_SELF, rsize, idxs_r, PETSC_OWN_POINTER, &is_perm_r));
5548: } else {
5549: PetscCall(ISCreateStride(PETSC_COMM_SELF, rsize, 0, 1, &is_perm_r));
5550: }
5551: PetscCall(ISSetPermutation(is_perm_r));
5553: if (!csorted) {
5554: if (isrow_s == iscol_s) {
5555: PetscCall(PetscObjectReference((PetscObject)is_perm_r));
5556: is_perm_c = is_perm_r;
5557: } else {
5558: PetscInt *idxs_c, i;
5559: PetscCheck(idxs_perm_c, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Permutation array not present");
5560: PetscCall(PetscMalloc1(csize, &idxs_c));
5561: for (i = 0; i < csize; i++) idxs_c[idxs_perm_c[i]] = i;
5562: PetscCall(PetscFree(idxs_perm_c));
5563: PetscCall(ISCreateGeneral(PETSC_COMM_SELF, csize, idxs_c, PETSC_OWN_POINTER, &is_perm_c));
5564: }
5565: } else {
5566: PetscCall(ISCreateStride(PETSC_COMM_SELF, csize, 0, 1, &is_perm_c));
5567: }
5568: PetscCall(ISSetPermutation(is_perm_c));
5570: PetscCall(MatPermute(work_mat[0], is_perm_r, is_perm_c, &new_mat));
5571: PetscCall(MatDestroy(&work_mat[0]));
5572: work_mat[0] = new_mat;
5573: PetscCall(ISDestroy(&is_perm_r));
5574: PetscCall(ISDestroy(&is_perm_c));
5575: }
5577: PetscCall(PetscObjectReference((PetscObject)work_mat[0]));
5578: *B = work_mat[0];
5579: PetscCall(MatDestroyMatrices(1, &work_mat));
5580: PetscCall(ISDestroy(&isrow_s));
5581: PetscCall(ISDestroy(&iscol_s));
5582: PetscFunctionReturn(PETSC_SUCCESS);
5583: }
5585: static PetscErrorCode MatPtAPWithPrefix_Private(Mat A, Mat P, PetscReal fill, const char *prefix, Mat *C)
5586: {
5587: PetscFunctionBegin;
5588: PetscCall(MatProductCreate(A, P, NULL, C));
5589: PetscCall(MatProductSetType(*C, MATPRODUCT_PtAP));
5590: PetscCall(MatProductSetAlgorithm(*C, "default"));
5591: PetscCall(MatProductSetFill(*C, fill));
5592: PetscCall(MatSetOptionsPrefix(*C, prefix));
5593: PetscCall(MatProductSetFromOptions(*C));
5594: PetscCall(MatProductSymbolic(*C));
5595: PetscCall(MatProductNumeric(*C));
5596: (*C)->symmetric = A->symmetric;
5597: (*C)->spd = A->spd;
5598: PetscFunctionReturn(PETSC_SUCCESS);
5599: }
5601: PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix)
5602: {
5603: Mat_IS *matis = (Mat_IS *)pc->pmat->data;
5604: PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
5605: Mat new_mat, lA;
5606: IS is_local, is_global;
5607: PetscInt local_size;
5608: PetscBool isseqaij, issym, isset;
5609: char ptapprefix[256];
5611: PetscFunctionBegin;
5612: PetscCall(MatDestroy(&pcbddc->local_mat));
5613: PetscCall(MatGetSize(matis->A, &local_size, NULL));
5614: if (pcbddc->mat_graph->multi_element) {
5615: Mat *mats, *bdiags;
5616: IS *gsubs;
5617: PetscInt nsubs = pcbddc->n_local_subs;
5619: PetscCall(PetscCalloc1(nsubs * nsubs, &mats));
5620: #if 1
5621: PetscCall(PetscMalloc1(nsubs, &gsubs));
5622: for (PetscInt i = 0; i < nsubs; i++) PetscCall(ISLocalToGlobalMappingApplyIS(matis->rmapping, pcbddc->local_subs[i], &gsubs[i]));
5623: PetscCall(MatCreateSubMatrices(ChangeOfBasisMatrix, nsubs, gsubs, gsubs, MAT_INITIAL_MATRIX, &bdiags));
5624: for (PetscInt i = 0; i < nsubs; i++) PetscCall(ISDestroy(&gsubs[i]));
5625: PetscCall(PetscFree(gsubs));
5626: #else /* this does not work since MatCreateSubMatrices does not support repeated indices */
5627: Mat *tmats;
5628: PetscCall(ISCreateStride(PetscObjectComm((PetscObject)matis->A), local_size, 0, 1, &is_local));
5629: PetscCall(ISLocalToGlobalMappingApplyIS(matis->rmapping, is_local, &is_global));
5630: PetscCall(ISDestroy(&is_local));
5631: PetscCall(MatSetOption(ChangeOfBasisMatrix, MAT_SUBMAT_SINGLEIS, PETSC_TRUE));
5632: PetscCall(MatCreateSubMatrices(ChangeOfBasisMatrix, 1, &is_global, &is_global, MAT_INITIAL_MATRIX, &tmats));
5633: PetscCall(ISDestroy(&is_global));
5634: PetscCall(MatCreateSubMatrices(tmats[0], nsubs, pcbddc->local_subs, pcbddc->local_subs, MAT_INITIAL_MATRIX, &bdiags));
5635: PetscCall(MatDestroySubMatrices(1, &tmats));
5636: #endif
5637: for (PetscInt i = 0; i < nsubs; i++) mats[i * (1 + nsubs)] = bdiags[i];
5638: PetscCall(MatCreateNest(PETSC_COMM_SELF, nsubs, pcbddc->local_subs, nsubs, pcbddc->local_subs, mats, &new_mat));
5639: PetscCall(MatConvert(new_mat, MATSEQAIJ, MAT_INPLACE_MATRIX, &new_mat));
5640: PetscCall(MatDestroySubMatrices(nsubs, &bdiags));
5641: PetscCall(PetscFree(mats));
5642: } else {
5643: PetscCall(ISCreateStride(PetscObjectComm((PetscObject)matis->A), local_size, 0, 1, &is_local));
5644: PetscCall(ISLocalToGlobalMappingApplyIS(matis->rmapping, is_local, &is_global));
5645: PetscCall(ISDestroy(&is_local));
5646: PetscCall(MatCreateSubMatrixUnsorted(ChangeOfBasisMatrix, is_global, is_global, &new_mat));
5647: PetscCall(ISDestroy(&is_global));
5648: }
5649: if (pcbddc->dbg_flag) {
5650: Vec x, x_change;
5651: PetscReal error;
5653: PetscCall(MatCreateVecs(ChangeOfBasisMatrix, &x, &x_change));
5654: PetscCall(VecSetRandom(x, NULL));
5655: PetscCall(MatMult(ChangeOfBasisMatrix, x, x_change));
5656: PetscCall(VecScatterBegin(matis->cctx, x, matis->x, INSERT_VALUES, SCATTER_FORWARD));
5657: PetscCall(VecScatterEnd(matis->cctx, x, matis->x, INSERT_VALUES, SCATTER_FORWARD));
5658: PetscCall(MatMult(new_mat, matis->x, matis->y));
5659: if (!pcbddc->change_interior) {
5660: const PetscScalar *x, *y, *v;
5661: PetscReal lerror = 0.;
5662: PetscInt i;
5664: PetscCall(VecGetArrayRead(matis->x, &x));
5665: PetscCall(VecGetArrayRead(matis->y, &y));
5666: PetscCall(VecGetArrayRead(matis->counter, &v));
5667: for (i = 0; i < local_size; i++)
5668: if (PetscRealPart(v[i]) < 1.5 && PetscAbsScalar(x[i] - y[i]) > lerror) lerror = PetscAbsScalar(x[i] - y[i]);
5669: PetscCall(VecRestoreArrayRead(matis->x, &x));
5670: PetscCall(VecRestoreArrayRead(matis->y, &y));
5671: PetscCall(VecRestoreArrayRead(matis->counter, &v));
5672: PetscCallMPI(MPIU_Allreduce(&lerror, &error, 1, MPIU_REAL, MPIU_MAX, PetscObjectComm((PetscObject)pc)));
5673: if (error > PETSC_SMALL) {
5674: if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) {
5675: SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "Error global vs local change on I: %1.6e", (double)error);
5676: } else {
5677: SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_USER, "Error global vs local change on I: %1.6e", (double)error);
5678: }
5679: }
5680: }
5681: PetscCall(VecScatterBegin(matis->rctx, matis->y, x, INSERT_VALUES, SCATTER_REVERSE));
5682: PetscCall(VecScatterEnd(matis->rctx, matis->y, x, INSERT_VALUES, SCATTER_REVERSE));
5683: PetscCall(VecAXPY(x, -1.0, x_change));
5684: PetscCall(VecNorm(x, NORM_INFINITY, &error));
5685: if (error > PETSC_SMALL) {
5686: if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) {
5687: SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "Error global vs local change on N: %1.6e", (double)error);
5688: } else {
5689: SETERRQ(PetscObjectComm((PetscObject)pc), PETSC_ERR_USER, "Error global vs local change on N: %1.6e", (double)error);
5690: }
5691: }
5692: PetscCall(VecDestroy(&x));
5693: PetscCall(VecDestroy(&x_change));
5694: }
5696: /* lA is present if we are setting up an inner BDDC for a saddle point FETI-DP */
5697: PetscCall(PetscObjectQuery((PetscObject)pc, "__KSPFETIDP_lA", (PetscObject *)&lA));
5699: /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */
5700: if (((PetscObject)pc)->prefix) PetscCall(PetscSNPrintf(ptapprefix, sizeof(ptapprefix), "%spc_bddc_change_", ((PetscObject)pc)->prefix));
5701: else PetscCall(PetscSNPrintf(ptapprefix, sizeof(ptapprefix), "pc_bddc_change_"));
5702: PetscCall(PetscObjectBaseTypeCompare((PetscObject)matis->A, MATSEQAIJ, &isseqaij));
5703: if (isseqaij) {
5704: PetscCall(MatDestroy(&pcbddc->local_mat));
5705: PetscCall(MatPtAPWithPrefix_Private(matis->A, new_mat, PETSC_DEFAULT, ptapprefix, &pcbddc->local_mat));
5706: if (lA) {
5707: Mat work;
5708: PetscCall(MatPtAPWithPrefix_Private(lA, new_mat, PETSC_DEFAULT, ptapprefix, &work));
5709: PetscCall(PetscObjectCompose((PetscObject)pc, "__KSPFETIDP_lA", (PetscObject)work));
5710: PetscCall(MatDestroy(&work));
5711: }
5712: } else {
5713: Mat work_mat;
5715: PetscCall(MatDestroy(&pcbddc->local_mat));
5716: PetscCall(MatConvert(matis->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &work_mat));
5717: PetscCall(MatPtAPWithPrefix_Private(work_mat, new_mat, PETSC_DEFAULT, ptapprefix, &pcbddc->local_mat));
5718: PetscCall(MatDestroy(&work_mat));
5719: if (lA) {
5720: Mat work;
5721: PetscCall(MatConvert(lA, MATSEQAIJ, MAT_INITIAL_MATRIX, &work_mat));
5722: PetscCall(MatPtAPWithPrefix_Private(work_mat, new_mat, PETSC_DEFAULT, ptapprefix, &work));
5723: PetscCall(PetscObjectCompose((PetscObject)pc, "__KSPFETIDP_lA", (PetscObject)work));
5724: PetscCall(MatDestroy(&work));
5725: }
5726: }
5727: PetscCall(MatIsSymmetricKnown(matis->A, &isset, &issym));
5728: if (isset) PetscCall(MatSetOption(pcbddc->local_mat, MAT_SYMMETRIC, issym));
5729: PetscCall(MatDestroy(&new_mat));
5730: PetscFunctionReturn(PETSC_SUCCESS);
5731: }
5733: PetscErrorCode PCBDDCSetUpLocalScatters(PC pc)
5734: {
5735: PC_IS *pcis = (PC_IS *)pc->data;
5736: PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
5737: PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
5738: PetscInt *idx_R_local = NULL;
5739: PetscInt n_vertices, i, j, n_R, n_D, n_B;
5740: PetscInt vbs, bs;
5741: PetscBT bitmask = NULL;
5743: PetscFunctionBegin;
5744: /*
5745: No need to setup local scatters if
5746: - primal space is unchanged
5747: AND
5748: - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains)
5749: AND
5750: - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine
5751: */
5752: if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) PetscFunctionReturn(PETSC_SUCCESS);
5753: /* destroy old objects */
5754: PetscCall(ISDestroy(&pcbddc->is_R_local));
5755: PetscCall(VecScatterDestroy(&pcbddc->R_to_B));
5756: PetscCall(VecScatterDestroy(&pcbddc->R_to_D));
5757: /* Set Non-overlapping dimensions */
5758: n_B = pcis->n_B;
5759: n_D = pcis->n - n_B;
5760: n_vertices = pcbddc->n_vertices;
5762: /* Dohrmann's notation: dofs split in R (Remaining: all dofs but the vertices) and V (Vertices) */
5764: /* create auxiliary bitmask and allocate workspace */
5765: if (!sub_schurs || !sub_schurs->reuse_solver) {
5766: PetscCall(PetscMalloc1(pcis->n - n_vertices, &idx_R_local));
5767: PetscCall(PetscBTCreate(pcis->n, &bitmask));
5768: for (i = 0; i < n_vertices; i++) PetscCall(PetscBTSet(bitmask, pcbddc->local_primal_ref_node[i]));
5770: for (i = 0, n_R = 0; i < pcis->n; i++) {
5771: if (!PetscBTLookup(bitmask, i)) idx_R_local[n_R++] = i;
5772: }
5773: } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */
5774: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5776: PetscCall(ISGetIndices(reuse_solver->is_R, (const PetscInt **)&idx_R_local));
5777: PetscCall(ISGetLocalSize(reuse_solver->is_R, &n_R));
5778: }
5780: /* Block code */
5781: vbs = 1;
5782: PetscCall(MatGetBlockSize(pcbddc->local_mat, &bs));
5783: if (bs > 1 && !(n_vertices % bs)) {
5784: PetscBool is_blocked = PETSC_TRUE;
5785: PetscInt *vary;
5786: if (!sub_schurs || !sub_schurs->reuse_solver) {
5787: PetscCall(PetscMalloc1(pcis->n / bs, &vary));
5788: PetscCall(PetscArrayzero(vary, pcis->n / bs));
5789: /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */
5790: /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */
5791: for (i = 0; i < n_vertices; i++) vary[pcbddc->local_primal_ref_node[i] / bs]++;
5792: for (i = 0; i < pcis->n / bs; i++) {
5793: if (vary[i] != 0 && vary[i] != bs) {
5794: is_blocked = PETSC_FALSE;
5795: break;
5796: }
5797: }
5798: PetscCall(PetscFree(vary));
5799: } else {
5800: /* Verify directly the R set */
5801: for (i = 0; i < n_R / bs; i++) {
5802: PetscInt j, node = idx_R_local[bs * i];
5803: for (j = 1; j < bs; j++) {
5804: if (node != idx_R_local[bs * i + j] - j) {
5805: is_blocked = PETSC_FALSE;
5806: break;
5807: }
5808: }
5809: }
5810: }
5811: if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */
5812: vbs = bs;
5813: for (i = 0; i < n_R / vbs; i++) idx_R_local[i] = idx_R_local[vbs * i] / vbs;
5814: }
5815: }
5816: PetscCall(ISCreateBlock(PETSC_COMM_SELF, vbs, n_R / vbs, idx_R_local, PETSC_COPY_VALUES, &pcbddc->is_R_local));
5817: if (sub_schurs && sub_schurs->reuse_solver) {
5818: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5820: PetscCall(ISRestoreIndices(reuse_solver->is_R, (const PetscInt **)&idx_R_local));
5821: PetscCall(ISDestroy(&reuse_solver->is_R));
5822: PetscCall(PetscObjectReference((PetscObject)pcbddc->is_R_local));
5823: reuse_solver->is_R = pcbddc->is_R_local;
5824: } else {
5825: PetscCall(PetscFree(idx_R_local));
5826: }
5828: /* print some info if requested */
5829: if (pcbddc->dbg_flag) {
5830: PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n"));
5831: PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
5832: PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer));
5833: PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d local dimensions\n", PetscGlobalRank));
5834: PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "local_size = %" PetscInt_FMT ", dirichlet_size = %" PetscInt_FMT ", boundary_size = %" PetscInt_FMT "\n", pcis->n, n_D, n_B));
5835: PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "r_size = %" PetscInt_FMT ", v_size = %" PetscInt_FMT ", constraints = %" PetscInt_FMT ", local_primal_size = %" PetscInt_FMT "\n", n_R, n_vertices,
5836: pcbddc->local_primal_size - n_vertices - pcbddc->benign_n, pcbddc->local_primal_size));
5837: PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
5838: }
5840: /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */
5841: if (!sub_schurs || !sub_schurs->reuse_solver) {
5842: IS is_aux1, is_aux2;
5843: PetscInt *aux_array1, *aux_array2, *is_indices, *idx_R_local;
5845: PetscCall(ISGetIndices(pcbddc->is_R_local, (const PetscInt **)&idx_R_local));
5846: PetscCall(PetscMalloc1(pcis->n_B - n_vertices, &aux_array1));
5847: PetscCall(PetscMalloc1(pcis->n_B - n_vertices, &aux_array2));
5848: PetscCall(ISGetIndices(pcis->is_I_local, (const PetscInt **)&is_indices));
5849: for (i = 0; i < n_D; i++) PetscCall(PetscBTSet(bitmask, is_indices[i]));
5850: PetscCall(ISRestoreIndices(pcis->is_I_local, (const PetscInt **)&is_indices));
5851: for (i = 0, j = 0; i < n_R; i++) {
5852: if (!PetscBTLookup(bitmask, idx_R_local[i])) aux_array1[j++] = i;
5853: }
5854: PetscCall(ISCreateGeneral(PETSC_COMM_SELF, j, aux_array1, PETSC_OWN_POINTER, &is_aux1));
5855: PetscCall(ISGetIndices(pcis->is_B_local, (const PetscInt **)&is_indices));
5856: for (i = 0, j = 0; i < n_B; i++) {
5857: if (!PetscBTLookup(bitmask, is_indices[i])) aux_array2[j++] = i;
5858: }
5859: PetscCall(ISRestoreIndices(pcis->is_B_local, (const PetscInt **)&is_indices));
5860: PetscCall(ISCreateGeneral(PETSC_COMM_SELF, j, aux_array2, PETSC_OWN_POINTER, &is_aux2));
5861: PetscCall(VecScatterCreate(pcbddc->vec1_R, is_aux1, pcis->vec1_B, is_aux2, &pcbddc->R_to_B));
5862: PetscCall(ISDestroy(&is_aux1));
5863: PetscCall(ISDestroy(&is_aux2));
5865: if (pcbddc->switch_static || pcbddc->dbg_flag) {
5866: PetscCall(PetscMalloc1(n_D, &aux_array1));
5867: for (i = 0, j = 0; i < n_R; i++) {
5868: if (PetscBTLookup(bitmask, idx_R_local[i])) aux_array1[j++] = i;
5869: }
5870: PetscCall(ISCreateGeneral(PETSC_COMM_SELF, j, aux_array1, PETSC_OWN_POINTER, &is_aux1));
5871: PetscCall(VecScatterCreate(pcbddc->vec1_R, is_aux1, pcis->vec1_D, (IS)0, &pcbddc->R_to_D));
5872: PetscCall(ISDestroy(&is_aux1));
5873: }
5874: PetscCall(PetscBTDestroy(&bitmask));
5875: PetscCall(ISRestoreIndices(pcbddc->is_R_local, (const PetscInt **)&idx_R_local));
5876: } else {
5877: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
5878: IS tis;
5879: PetscInt schur_size;
5881: PetscCall(ISGetLocalSize(reuse_solver->is_B, &schur_size));
5882: PetscCall(ISCreateStride(PETSC_COMM_SELF, schur_size, n_D, 1, &tis));
5883: PetscCall(VecScatterCreate(pcbddc->vec1_R, tis, pcis->vec1_B, reuse_solver->is_B, &pcbddc->R_to_B));
5884: PetscCall(ISDestroy(&tis));
5885: if (pcbddc->switch_static || pcbddc->dbg_flag) {
5886: PetscCall(ISCreateStride(PETSC_COMM_SELF, n_D, 0, 1, &tis));
5887: PetscCall(VecScatterCreate(pcbddc->vec1_R, tis, pcis->vec1_D, (IS)0, &pcbddc->R_to_D));
5888: PetscCall(ISDestroy(&tis));
5889: }
5890: }
5891: PetscFunctionReturn(PETSC_SUCCESS);
5892: }
5894: PetscErrorCode MatNullSpacePropagateAny_Private(Mat A, IS is, Mat B)
5895: {
5896: MatNullSpace NullSpace;
5897: Mat dmat;
5898: const Vec *nullvecs;
5899: Vec v, v2, *nullvecs2;
5900: VecScatter sct = NULL;
5901: PetscScalar *ddata;
5902: PetscInt k, nnsp_size, bsiz, bsiz2, n, N, bs;
5903: PetscBool nnsp_has_cnst;
5905: PetscFunctionBegin;
5906: if (!is && !B) { /* MATIS */
5907: Mat_IS *matis = (Mat_IS *)A->data;
5909: if (!B) PetscCall(MatISGetLocalMat(A, &B));
5910: sct = matis->cctx;
5911: PetscCall(PetscObjectReference((PetscObject)sct));
5912: } else {
5913: PetscCall(MatGetNullSpace(B, &NullSpace));
5914: if (!NullSpace) PetscCall(MatGetNearNullSpace(B, &NullSpace));
5915: if (NullSpace) PetscFunctionReturn(PETSC_SUCCESS);
5916: }
5917: PetscCall(MatGetNullSpace(A, &NullSpace));
5918: if (!NullSpace) PetscCall(MatGetNearNullSpace(A, &NullSpace));
5919: if (!NullSpace) PetscFunctionReturn(PETSC_SUCCESS);
5921: PetscCall(MatCreateVecs(A, &v, NULL));
5922: PetscCall(MatCreateVecs(B, &v2, NULL));
5923: if (!sct) PetscCall(VecScatterCreate(v, is, v2, NULL, &sct));
5924: PetscCall(MatNullSpaceGetVecs(NullSpace, &nnsp_has_cnst, &nnsp_size, &nullvecs));
5925: bsiz = bsiz2 = nnsp_size + !!nnsp_has_cnst;
5926: PetscCall(PetscMalloc1(bsiz, &nullvecs2));
5927: PetscCall(VecGetBlockSize(v2, &bs));
5928: PetscCall(VecGetSize(v2, &N));
5929: PetscCall(VecGetLocalSize(v2, &n));
5930: PetscCall(PetscMalloc1(n * bsiz, &ddata));
5931: for (k = 0; k < nnsp_size; k++) {
5932: PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), bs, n, N, ddata + n * k, &nullvecs2[k]));
5933: PetscCall(VecScatterBegin(sct, nullvecs[k], nullvecs2[k], INSERT_VALUES, SCATTER_FORWARD));
5934: PetscCall(VecScatterEnd(sct, nullvecs[k], nullvecs2[k], INSERT_VALUES, SCATTER_FORWARD));
5935: }
5936: if (nnsp_has_cnst) {
5937: PetscCall(VecCreateMPIWithArray(PetscObjectComm((PetscObject)B), bs, n, N, ddata + n * nnsp_size, &nullvecs2[nnsp_size]));
5938: PetscCall(VecSet(nullvecs2[nnsp_size], 1.0));
5939: }
5940: PetscCall(PCBDDCOrthonormalizeVecs(&bsiz2, nullvecs2));
5941: PetscCall(MatNullSpaceCreate(PetscObjectComm((PetscObject)B), PETSC_FALSE, bsiz2, nullvecs2, &NullSpace));
5943: PetscCall(MatCreateDense(PetscObjectComm((PetscObject)B), n, PETSC_DECIDE, N, bsiz2, ddata, &dmat));
5944: PetscCall(PetscObjectContainerCompose((PetscObject)dmat, "_PBDDC_Null_dmat_arr", ddata, PetscCtxDestroyDefault));
5945: PetscCall(PetscObjectCompose((PetscObject)NullSpace, "_PBDDC_Null_dmat", (PetscObject)dmat));
5946: PetscCall(MatDestroy(&dmat));
5948: for (k = 0; k < bsiz; k++) PetscCall(VecDestroy(&nullvecs2[k]));
5949: PetscCall(PetscFree(nullvecs2));
5950: PetscCall(MatSetNearNullSpace(B, NullSpace));
5951: PetscCall(MatNullSpaceDestroy(&NullSpace));
5952: PetscCall(VecDestroy(&v));
5953: PetscCall(VecDestroy(&v2));
5954: PetscCall(VecScatterDestroy(&sct));
5955: PetscFunctionReturn(PETSC_SUCCESS);
5956: }
5958: PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann)
5959: {
5960: PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
5961: PC_IS *pcis = (PC_IS *)pc->data;
5962: PC pc_temp;
5963: Mat A_RR;
5964: MatNullSpace nnsp;
5965: MatReuse reuse;
5966: PetscScalar m_one = -1.0;
5967: PetscReal value;
5968: PetscInt n_D, n_R;
5969: PetscBool issbaij, opts, isset, issym;
5970: PetscBool f = PETSC_FALSE;
5971: char dir_prefix[256], neu_prefix[256], str_level[16];
5972: size_t len;
5974: PetscFunctionBegin;
5975: PetscCall(PetscLogEventBegin(PC_BDDC_LocalSolvers[pcbddc->current_level], pc, 0, 0, 0));
5976: /* approximate solver, propagate NearNullSpace if needed */
5977: if (!pc->setupcalled && (pcbddc->NullSpace_corr[0] || pcbddc->NullSpace_corr[2])) {
5978: MatNullSpace gnnsp1, gnnsp2;
5979: PetscBool lhas, ghas;
5981: PetscCall(MatGetNearNullSpace(pcbddc->local_mat, &nnsp));
5982: PetscCall(MatGetNearNullSpace(pc->pmat, &gnnsp1));
5983: PetscCall(MatGetNullSpace(pc->pmat, &gnnsp2));
5984: lhas = nnsp ? PETSC_TRUE : PETSC_FALSE;
5985: PetscCallMPI(MPIU_Allreduce(&lhas, &ghas, 1, MPI_C_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc)));
5986: if (!ghas && (gnnsp1 || gnnsp2)) PetscCall(MatNullSpacePropagateAny_Private(pc->pmat, NULL, NULL));
5987: }
5989: /* compute prefixes */
5990: PetscCall(PetscStrncpy(dir_prefix, "", sizeof(dir_prefix)));
5991: PetscCall(PetscStrncpy(neu_prefix, "", sizeof(neu_prefix)));
5992: if (!pcbddc->current_level) {
5993: PetscCall(PetscStrncpy(dir_prefix, ((PetscObject)pc)->prefix, sizeof(dir_prefix)));
5994: PetscCall(PetscStrncpy(neu_prefix, ((PetscObject)pc)->prefix, sizeof(neu_prefix)));
5995: PetscCall(PetscStrlcat(dir_prefix, "pc_bddc_dirichlet_", sizeof(dir_prefix)));
5996: PetscCall(PetscStrlcat(neu_prefix, "pc_bddc_neumann_", sizeof(neu_prefix)));
5997: } else {
5998: PetscCall(PetscSNPrintf(str_level, sizeof(str_level), "l%" PetscInt_FMT "_", pcbddc->current_level));
5999: PetscCall(PetscStrlen(((PetscObject)pc)->prefix, &len));
6000: len -= 15; /* remove "pc_bddc_coarse_" */
6001: if (pcbddc->current_level > 1) len -= 3; /* remove "lX_" with X level number */
6002: if (pcbddc->current_level > 10) len -= 1; /* remove another char from level number */
6003: /* Nonstandard use of PetscStrncpy() to only copy a portion of the input string */
6004: PetscCall(PetscStrncpy(dir_prefix, ((PetscObject)pc)->prefix, len + 1));
6005: PetscCall(PetscStrncpy(neu_prefix, ((PetscObject)pc)->prefix, len + 1));
6006: PetscCall(PetscStrlcat(dir_prefix, "pc_bddc_dirichlet_", sizeof(dir_prefix)));
6007: PetscCall(PetscStrlcat(neu_prefix, "pc_bddc_neumann_", sizeof(neu_prefix)));
6008: PetscCall(PetscStrlcat(dir_prefix, str_level, sizeof(dir_prefix)));
6009: PetscCall(PetscStrlcat(neu_prefix, str_level, sizeof(neu_prefix)));
6010: }
6012: /* DIRICHLET PROBLEM */
6013: if (dirichlet) {
6014: PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
6015: if (pcbddc->benign_n && !pcbddc->benign_change_explicit) {
6016: PetscCheck(sub_schurs && sub_schurs->reuse_solver, PETSC_COMM_SELF, PETSC_ERR_SUP, "Not yet implemented");
6017: if (pcbddc->dbg_flag) {
6018: Mat A_IIn;
6020: PetscCall(PCBDDCBenignProject(pc, pcis->is_I_local, pcis->is_I_local, &A_IIn));
6021: PetscCall(MatDestroy(&pcis->A_II));
6022: pcis->A_II = A_IIn;
6023: }
6024: }
6025: PetscCall(MatIsSymmetricKnown(pcbddc->local_mat, &isset, &issym));
6026: if (isset) PetscCall(MatSetOption(pcis->A_II, MAT_SYMMETRIC, issym));
6028: /* Matrix for Dirichlet problem is pcis->A_II */
6029: n_D = pcis->n - pcis->n_B;
6030: opts = PETSC_FALSE;
6031: if (!pcbddc->ksp_D) { /* create object if not yet build */
6032: opts = PETSC_TRUE;
6033: PetscCall(KSPCreate(PETSC_COMM_SELF, &pcbddc->ksp_D));
6034: PetscCall(KSPSetNestLevel(pcbddc->ksp_D, pc->kspnestlevel));
6035: PetscCall(PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D, (PetscObject)pc, 1));
6036: /* default */
6037: PetscCall(KSPSetType(pcbddc->ksp_D, KSPPREONLY));
6038: PetscCall(KSPSetOptionsPrefix(pcbddc->ksp_D, dir_prefix));
6039: PetscCall(PetscObjectTypeCompare((PetscObject)pcis->pA_II, MATSEQSBAIJ, &issbaij));
6040: PetscCall(KSPGetPC(pcbddc->ksp_D, &pc_temp));
6041: if (issbaij) {
6042: PetscCall(PCSetType(pc_temp, PCCHOLESKY));
6043: } else {
6044: PetscCall(PCSetType(pc_temp, PCLU));
6045: }
6046: PetscCall(KSPSetErrorIfNotConverged(pcbddc->ksp_D, pc->erroriffailure));
6047: }
6048: PetscCall(MatSetOptionsPrefix(pcis->pA_II, ((PetscObject)pcbddc->ksp_D)->prefix));
6049: PetscCall(MatViewFromOptions(pcis->pA_II, NULL, "-mat_view"));
6050: PetscCall(KSPSetOperators(pcbddc->ksp_D, pcis->A_II, pcis->pA_II));
6051: /* Allow user's customization */
6052: if (opts) PetscCall(KSPSetFromOptions(pcbddc->ksp_D));
6053: PetscCall(MatGetNearNullSpace(pcis->pA_II, &nnsp));
6054: if (pcbddc->NullSpace_corr[0] && !nnsp) { /* approximate solver, propagate NearNullSpace */
6055: PetscCall(MatNullSpacePropagateAny_Private(pcbddc->local_mat, pcis->is_I_local, pcis->pA_II));
6056: }
6057: PetscCall(MatGetNearNullSpace(pcis->pA_II, &nnsp));
6058: PetscCall(KSPGetPC(pcbddc->ksp_D, &pc_temp));
6059: PetscCall(PetscObjectHasFunction((PetscObject)pc_temp, "PCSetCoordinates_C", &f));
6060: if (f && pcbddc->mat_graph->cloc && !nnsp) {
6061: PetscReal *coords = pcbddc->mat_graph->coords, *scoords;
6062: const PetscInt *idxs;
6063: PetscInt cdim = pcbddc->mat_graph->cdim, nl, i, d;
6065: PetscCall(ISGetLocalSize(pcis->is_I_local, &nl));
6066: PetscCall(ISGetIndices(pcis->is_I_local, &idxs));
6067: PetscCall(PetscMalloc1(nl * cdim, &scoords));
6068: for (i = 0; i < nl; i++) {
6069: for (d = 0; d < cdim; d++) scoords[i * cdim + d] = coords[idxs[i] * cdim + d];
6070: }
6071: PetscCall(ISRestoreIndices(pcis->is_I_local, &idxs));
6072: PetscCall(PCSetCoordinates(pc_temp, cdim, nl, scoords));
6073: PetscCall(PetscFree(scoords));
6074: }
6075: if (sub_schurs && sub_schurs->reuse_solver) {
6076: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
6078: PetscCall(KSPSetPC(pcbddc->ksp_D, reuse_solver->interior_solver));
6079: }
6081: /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
6082: if (!n_D) {
6083: PetscCall(KSPGetPC(pcbddc->ksp_D, &pc_temp));
6084: PetscCall(PCSetType(pc_temp, PCNONE));
6085: }
6086: PetscCall(KSPSetUp(pcbddc->ksp_D));
6087: /* set ksp_D into pcis data */
6088: PetscCall(PetscObjectReference((PetscObject)pcbddc->ksp_D));
6089: PetscCall(KSPDestroy(&pcis->ksp_D));
6090: pcis->ksp_D = pcbddc->ksp_D;
6091: }
6093: /* NEUMANN PROBLEM */
6094: A_RR = NULL;
6095: if (neumann) {
6096: PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
6097: PetscInt ibs, mbs;
6098: PetscBool issbaij, reuse_neumann_solver, isset, issym;
6099: Mat_IS *matis = (Mat_IS *)pc->pmat->data;
6101: reuse_neumann_solver = PETSC_FALSE;
6102: if (sub_schurs && sub_schurs->reuse_solver) {
6103: IS iP;
6105: reuse_neumann_solver = PETSC_TRUE;
6106: PetscCall(PetscObjectQuery((PetscObject)sub_schurs->A, "__KSPFETIDP_iP", (PetscObject *)&iP));
6107: if (iP) reuse_neumann_solver = PETSC_FALSE;
6108: }
6109: /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */
6110: PetscCall(ISGetSize(pcbddc->is_R_local, &n_R));
6111: if (pcbddc->ksp_R) { /* already created ksp */
6112: PetscInt nn_R;
6113: PetscCall(KSPGetOperators(pcbddc->ksp_R, NULL, &A_RR));
6114: PetscCall(PetscObjectReference((PetscObject)A_RR));
6115: PetscCall(MatGetSize(A_RR, &nn_R, NULL));
6116: if (nn_R != n_R) { /* old ksp is not reusable, so reset it */
6117: PetscCall(KSPReset(pcbddc->ksp_R));
6118: PetscCall(MatDestroy(&A_RR));
6119: reuse = MAT_INITIAL_MATRIX;
6120: } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */
6121: if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */
6122: PetscCall(MatDestroy(&A_RR));
6123: reuse = MAT_INITIAL_MATRIX;
6124: } else { /* safe to reuse the matrix */
6125: reuse = MAT_REUSE_MATRIX;
6126: }
6127: }
6128: /* last check */
6129: if (pc->flag == DIFFERENT_NONZERO_PATTERN) {
6130: PetscCall(MatDestroy(&A_RR));
6131: reuse = MAT_INITIAL_MATRIX;
6132: }
6133: } else { /* first time, so we need to create the matrix */
6134: reuse = MAT_INITIAL_MATRIX;
6135: }
6136: /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection
6137: TODO: Get Rid of these conversions */
6138: PetscCall(MatGetBlockSize(pcbddc->local_mat, &mbs));
6139: PetscCall(ISGetBlockSize(pcbddc->is_R_local, &ibs));
6140: PetscCall(PetscObjectTypeCompare((PetscObject)pcbddc->local_mat, MATSEQSBAIJ, &issbaij));
6141: if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */
6142: if (matis->A == pcbddc->local_mat) {
6143: PetscCall(MatDestroy(&pcbddc->local_mat));
6144: PetscCall(MatConvert(matis->A, MATSEQAIJ, MAT_INITIAL_MATRIX, &pcbddc->local_mat));
6145: } else {
6146: PetscCall(MatConvert(pcbddc->local_mat, MATSEQAIJ, MAT_INPLACE_MATRIX, &pcbddc->local_mat));
6147: }
6148: } else if (issbaij) { /* need to convert to BAIJ to get off-diagonal blocks */
6149: if (matis->A == pcbddc->local_mat) {
6150: PetscCall(MatDestroy(&pcbddc->local_mat));
6151: PetscCall(MatConvert(matis->A, mbs > 1 ? MATSEQBAIJ : MATSEQAIJ, MAT_INITIAL_MATRIX, &pcbddc->local_mat));
6152: } else {
6153: PetscCall(MatConvert(pcbddc->local_mat, mbs > 1 ? MATSEQBAIJ : MATSEQAIJ, MAT_INPLACE_MATRIX, &pcbddc->local_mat));
6154: }
6155: }
6156: /* extract A_RR */
6157: if (reuse_neumann_solver) {
6158: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
6160: if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */
6161: PetscCall(MatDestroy(&A_RR));
6162: if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */
6163: PetscCall(PCBDDCBenignProject(pc, pcbddc->is_R_local, pcbddc->is_R_local, &A_RR));
6164: } else {
6165: PetscCall(MatCreateSubMatrix(pcbddc->local_mat, pcbddc->is_R_local, pcbddc->is_R_local, MAT_INITIAL_MATRIX, &A_RR));
6166: }
6167: } else {
6168: PetscCall(MatDestroy(&A_RR));
6169: PetscCall(PCGetOperators(reuse_solver->correction_solver, &A_RR, NULL));
6170: PetscCall(PetscObjectReference((PetscObject)A_RR));
6171: }
6172: } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */
6173: PetscCall(MatCreateSubMatrix(pcbddc->local_mat, pcbddc->is_R_local, pcbddc->is_R_local, reuse, &A_RR));
6174: }
6175: PetscCall(MatIsSymmetricKnown(pcbddc->local_mat, &isset, &issym));
6176: if (isset) PetscCall(MatSetOption(A_RR, MAT_SYMMETRIC, issym));
6177: opts = PETSC_FALSE;
6178: if (!pcbddc->ksp_R) { /* create object if not present */
6179: opts = PETSC_TRUE;
6180: PetscCall(KSPCreate(PETSC_COMM_SELF, &pcbddc->ksp_R));
6181: PetscCall(KSPSetNestLevel(pcbddc->ksp_R, pc->kspnestlevel));
6182: PetscCall(PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R, (PetscObject)pc, 1));
6183: /* default */
6184: PetscCall(KSPSetType(pcbddc->ksp_R, KSPPREONLY));
6185: PetscCall(KSPSetOptionsPrefix(pcbddc->ksp_R, neu_prefix));
6186: PetscCall(KSPGetPC(pcbddc->ksp_R, &pc_temp));
6187: PetscCall(PetscObjectTypeCompare((PetscObject)A_RR, MATSEQSBAIJ, &issbaij));
6188: if (issbaij) {
6189: PetscCall(PCSetType(pc_temp, PCCHOLESKY));
6190: } else {
6191: PetscCall(PCSetType(pc_temp, PCLU));
6192: }
6193: PetscCall(KSPSetErrorIfNotConverged(pcbddc->ksp_R, pc->erroriffailure));
6194: }
6195: PetscCall(MatSetOptionsPrefix(A_RR, ((PetscObject)pcbddc->ksp_R)->prefix));
6196: PetscCall(MatViewFromOptions(A_RR, NULL, "-mat_view"));
6197: PetscCall(KSPSetOperators(pcbddc->ksp_R, A_RR, A_RR));
6198: if (opts) { /* Allow user's customization once */
6199: PetscCall(KSPSetFromOptions(pcbddc->ksp_R));
6200: }
6201: PetscCall(MatGetNearNullSpace(A_RR, &nnsp));
6202: if (pcbddc->NullSpace_corr[2] && !nnsp) { /* approximate solver, propagate NearNullSpace */
6203: PetscCall(MatNullSpacePropagateAny_Private(pcbddc->local_mat, pcbddc->is_R_local, A_RR));
6204: }
6205: PetscCall(MatGetNearNullSpace(A_RR, &nnsp));
6206: PetscCall(KSPGetPC(pcbddc->ksp_R, &pc_temp));
6207: PetscCall(PetscObjectHasFunction((PetscObject)pc_temp, "PCSetCoordinates_C", &f));
6208: if (f && pcbddc->mat_graph->cloc && !nnsp) {
6209: PetscReal *coords = pcbddc->mat_graph->coords, *scoords;
6210: const PetscInt *idxs;
6211: PetscInt cdim = pcbddc->mat_graph->cdim, nl, i, d;
6213: PetscCall(ISGetLocalSize(pcbddc->is_R_local, &nl));
6214: PetscCall(ISGetIndices(pcbddc->is_R_local, &idxs));
6215: PetscCall(PetscMalloc1(nl * cdim, &scoords));
6216: for (i = 0; i < nl; i++) {
6217: for (d = 0; d < cdim; d++) scoords[i * cdim + d] = coords[idxs[i] * cdim + d];
6218: }
6219: PetscCall(ISRestoreIndices(pcbddc->is_R_local, &idxs));
6220: PetscCall(PCSetCoordinates(pc_temp, cdim, nl, scoords));
6221: PetscCall(PetscFree(scoords));
6222: }
6224: /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */
6225: if (!n_R) {
6226: PetscCall(KSPGetPC(pcbddc->ksp_R, &pc_temp));
6227: PetscCall(PCSetType(pc_temp, PCNONE));
6228: }
6229: /* Reuse solver if it is present */
6230: if (reuse_neumann_solver) {
6231: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
6233: PetscCall(KSPSetPC(pcbddc->ksp_R, reuse_solver->correction_solver));
6234: }
6235: PetscCall(KSPSetUp(pcbddc->ksp_R));
6236: }
6238: if (pcbddc->dbg_flag) {
6239: PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
6240: PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer));
6241: PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n"));
6242: }
6243: PetscCall(PetscLogEventEnd(PC_BDDC_LocalSolvers[pcbddc->current_level], pc, 0, 0, 0));
6245: /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */
6246: if (pcbddc->NullSpace_corr[0]) PetscCall(PCBDDCSetUseExactDirichlet(pc, PETSC_FALSE));
6247: if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) PetscCall(PCBDDCNullSpaceAssembleCorrection(pc, PETSC_TRUE, pcbddc->NullSpace_corr[1]));
6248: if (neumann && pcbddc->NullSpace_corr[2]) PetscCall(PCBDDCNullSpaceAssembleCorrection(pc, PETSC_FALSE, pcbddc->NullSpace_corr[3]));
6249: /* check Dirichlet and Neumann solvers */
6250: if (pcbddc->dbg_flag) {
6251: if (dirichlet) { /* Dirichlet */
6252: PetscCall(VecSetRandom(pcis->vec1_D, NULL));
6253: PetscCall(MatMult(pcis->A_II, pcis->vec1_D, pcis->vec2_D));
6254: PetscCall(KSPSolve(pcbddc->ksp_D, pcis->vec2_D, pcis->vec2_D));
6255: PetscCall(KSPCheckSolve(pcbddc->ksp_D, pc, pcis->vec2_D));
6256: PetscCall(VecAXPY(pcis->vec1_D, m_one, pcis->vec2_D));
6257: PetscCall(VecNorm(pcis->vec1_D, NORM_INFINITY, &value));
6258: PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n", PetscGlobalRank, ((PetscObject)pcbddc->ksp_D)->prefix, (double)value));
6259: PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
6260: }
6261: if (neumann) { /* Neumann */
6262: PetscCall(VecSetRandom(pcbddc->vec1_R, NULL));
6263: PetscCall(MatMult(A_RR, pcbddc->vec1_R, pcbddc->vec2_R));
6264: PetscCall(KSPSolve(pcbddc->ksp_R, pcbddc->vec2_R, pcbddc->vec2_R));
6265: PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec2_R));
6266: PetscCall(VecAXPY(pcbddc->vec1_R, m_one, pcbddc->vec2_R));
6267: PetscCall(VecNorm(pcbddc->vec1_R, NORM_INFINITY, &value));
6268: PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n", PetscGlobalRank, ((PetscObject)pcbddc->ksp_R)->prefix, (double)value));
6269: PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
6270: }
6271: }
6272: /* free Neumann problem's matrix */
6273: PetscCall(MatDestroy(&A_RR));
6274: PetscFunctionReturn(PETSC_SUCCESS);
6275: }
6277: static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose)
6278: {
6279: PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
6280: PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
6281: PetscBool reuse_solver = sub_schurs ? (sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE) : PETSC_FALSE;
6283: PetscFunctionBegin;
6284: if (!reuse_solver) PetscCall(VecSet(pcbddc->vec1_R, 0.));
6285: if (!pcbddc->switch_static) {
6286: if (applytranspose && pcbddc->local_auxmat1) {
6287: PetscCall(MatMultTranspose(pcbddc->local_auxmat2, inout_B, pcbddc->vec1_C));
6288: PetscCall(MatMultTransposeAdd(pcbddc->local_auxmat1, pcbddc->vec1_C, inout_B, inout_B));
6289: }
6290: if (!reuse_solver) {
6291: PetscCall(VecScatterBegin(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE));
6292: PetscCall(VecScatterEnd(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE));
6293: } else {
6294: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
6296: PetscCall(VecScatterBegin(reuse_solver->correction_scatter_B, inout_B, reuse_solver->rhs_B, INSERT_VALUES, SCATTER_FORWARD));
6297: PetscCall(VecScatterEnd(reuse_solver->correction_scatter_B, inout_B, reuse_solver->rhs_B, INSERT_VALUES, SCATTER_FORWARD));
6298: }
6299: } else {
6300: PetscCall(VecScatterBegin(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE));
6301: PetscCall(VecScatterEnd(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE));
6302: PetscCall(VecScatterBegin(pcbddc->R_to_D, inout_D, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE));
6303: PetscCall(VecScatterEnd(pcbddc->R_to_D, inout_D, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE));
6304: if (applytranspose && pcbddc->local_auxmat1) {
6305: PetscCall(MatMultTranspose(pcbddc->local_auxmat2, pcbddc->vec1_R, pcbddc->vec1_C));
6306: PetscCall(MatMultTransposeAdd(pcbddc->local_auxmat1, pcbddc->vec1_C, inout_B, inout_B));
6307: PetscCall(VecScatterBegin(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE));
6308: PetscCall(VecScatterEnd(pcbddc->R_to_B, inout_B, pcbddc->vec1_R, INSERT_VALUES, SCATTER_REVERSE));
6309: }
6310: }
6311: PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][1], pc, 0, 0, 0));
6312: if (!reuse_solver || pcbddc->switch_static) {
6313: if (applytranspose) {
6314: PetscCall(KSPSolveTranspose(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec1_R));
6315: } else {
6316: PetscCall(KSPSolve(pcbddc->ksp_R, pcbddc->vec1_R, pcbddc->vec1_R));
6317: }
6318: PetscCall(KSPCheckSolve(pcbddc->ksp_R, pc, pcbddc->vec1_R));
6319: } else {
6320: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
6322: if (applytranspose) {
6323: PetscCall(MatFactorSolveSchurComplementTranspose(reuse_solver->F, reuse_solver->rhs_B, reuse_solver->sol_B));
6324: } else {
6325: PetscCall(MatFactorSolveSchurComplement(reuse_solver->F, reuse_solver->rhs_B, reuse_solver->sol_B));
6326: }
6327: }
6328: PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][1], pc, 0, 0, 0));
6329: PetscCall(VecSet(inout_B, 0.));
6330: if (!pcbddc->switch_static) {
6331: if (!reuse_solver) {
6332: PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD));
6333: PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD));
6334: } else {
6335: PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver;
6337: PetscCall(VecScatterBegin(reuse_solver->correction_scatter_B, reuse_solver->sol_B, inout_B, INSERT_VALUES, SCATTER_REVERSE));
6338: PetscCall(VecScatterEnd(reuse_solver->correction_scatter_B, reuse_solver->sol_B, inout_B, INSERT_VALUES, SCATTER_REVERSE));
6339: }
6340: if (!applytranspose && pcbddc->local_auxmat1) {
6341: PetscCall(MatMult(pcbddc->local_auxmat1, inout_B, pcbddc->vec1_C));
6342: PetscCall(MatMultAdd(pcbddc->local_auxmat2, pcbddc->vec1_C, inout_B, inout_B));
6343: }
6344: } else {
6345: PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD));
6346: PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD));
6347: PetscCall(VecScatterBegin(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD));
6348: PetscCall(VecScatterEnd(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD));
6349: if (!applytranspose && pcbddc->local_auxmat1) {
6350: PetscCall(MatMult(pcbddc->local_auxmat1, inout_B, pcbddc->vec1_C));
6351: PetscCall(MatMultAdd(pcbddc->local_auxmat2, pcbddc->vec1_C, pcbddc->vec1_R, pcbddc->vec1_R));
6352: }
6353: PetscCall(VecScatterBegin(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD));
6354: PetscCall(VecScatterEnd(pcbddc->R_to_B, pcbddc->vec1_R, inout_B, INSERT_VALUES, SCATTER_FORWARD));
6355: PetscCall(VecScatterBegin(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD));
6356: PetscCall(VecScatterEnd(pcbddc->R_to_D, pcbddc->vec1_R, inout_D, INSERT_VALUES, SCATTER_FORWARD));
6357: }
6358: PetscFunctionReturn(PETSC_SUCCESS);
6359: }
6361: /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */
6362: PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose)
6363: {
6364: PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
6365: PC_IS *pcis = (PC_IS *)pc->data;
6366: const PetscScalar zero = 0.0;
6368: PetscFunctionBegin;
6369: /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */
6370: if (!pcbddc->benign_apply_coarse_only) {
6371: if (applytranspose) {
6372: PetscCall(MatMultTranspose(pcbddc->coarse_phi_B, pcis->vec1_B, pcbddc->vec1_P));
6373: if (pcbddc->switch_static) PetscCall(MatMultTransposeAdd(pcbddc->coarse_phi_D, pcis->vec1_D, pcbddc->vec1_P, pcbddc->vec1_P));
6374: } else {
6375: PetscCall(MatMultTranspose(pcbddc->coarse_psi_B, pcis->vec1_B, pcbddc->vec1_P));
6376: if (pcbddc->switch_static) PetscCall(MatMultTransposeAdd(pcbddc->coarse_psi_D, pcis->vec1_D, pcbddc->vec1_P, pcbddc->vec1_P));
6377: }
6378: } else {
6379: PetscCall(VecSet(pcbddc->vec1_P, zero));
6380: }
6382: /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */
6383: if (pcbddc->benign_n) {
6384: PetscScalar *array;
6385: PetscInt j;
6387: PetscCall(VecGetArray(pcbddc->vec1_P, &array));
6388: for (j = 0; j < pcbddc->benign_n; j++) array[pcbddc->local_primal_size - pcbddc->benign_n + j] += pcbddc->benign_p0[j];
6389: PetscCall(VecRestoreArray(pcbddc->vec1_P, &array));
6390: }
6392: /* start communications from local primal nodes to rhs of coarse solver */
6393: PetscCall(VecSet(pcbddc->coarse_vec, zero));
6394: PetscCall(PCBDDCScatterCoarseDataBegin(pc, ADD_VALUES, SCATTER_FORWARD));
6395: PetscCall(PCBDDCScatterCoarseDataEnd(pc, ADD_VALUES, SCATTER_FORWARD));
6397: /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */
6398: PetscCall(PetscLogEventBegin(PC_BDDC_Solves[pcbddc->current_level][2], pc, 0, 0, 0));
6399: if (pcbddc->coarse_ksp) {
6400: Mat coarse_mat;
6401: Vec rhs, sol;
6402: MatNullSpace nullsp;
6403: PetscBool isbddc = PETSC_FALSE;
6405: if (pcbddc->benign_have_null) {
6406: PC coarse_pc;
6408: PetscCall(KSPGetPC(pcbddc->coarse_ksp, &coarse_pc));
6409: PetscCall(PetscObjectTypeCompare((PetscObject)coarse_pc, PCBDDC, &isbddc));
6410: /* we need to propagate to coarser levels the need for a possible benign correction */
6411: if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) {
6412: PC_BDDC *coarsepcbddc = (PC_BDDC *)coarse_pc->data;
6413: coarsepcbddc->benign_skip_correction = PETSC_FALSE;
6414: coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE;
6415: }
6416: }
6417: PetscCall(KSPGetRhs(pcbddc->coarse_ksp, &rhs));
6418: PetscCall(KSPGetSolution(pcbddc->coarse_ksp, &sol));
6419: PetscCall(KSPGetOperators(pcbddc->coarse_ksp, &coarse_mat, NULL));
6420: if (applytranspose) {
6421: PetscCheck(!pcbddc->benign_apply_coarse_only, PetscObjectComm((PetscObject)pcbddc->coarse_ksp), PETSC_ERR_SUP, "Not yet implemented");
6422: PetscCall(KSPSolveTranspose(pcbddc->coarse_ksp, rhs, sol));
6423: PetscCall(KSPCheckSolve(pcbddc->coarse_ksp, pc, sol));
6424: PetscCall(MatGetTransposeNullSpace(coarse_mat, &nullsp));
6425: if (nullsp) PetscCall(MatNullSpaceRemove(nullsp, sol));
6426: } else {
6427: PetscCall(MatGetNullSpace(coarse_mat, &nullsp));
6428: if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */
6429: PC coarse_pc;
6431: if (nullsp) PetscCall(MatNullSpaceRemove(nullsp, rhs));
6432: PetscCall(KSPGetPC(pcbddc->coarse_ksp, &coarse_pc));
6433: PetscCall(PCPreSolve(coarse_pc, pcbddc->coarse_ksp));
6434: PetscCall(PCBDDCBenignRemoveInterior(coarse_pc, rhs, sol));
6435: PetscCall(PCPostSolve(coarse_pc, pcbddc->coarse_ksp));
6436: } else {
6437: PetscCall(KSPSolve(pcbddc->coarse_ksp, rhs, sol));
6438: PetscCall(KSPCheckSolve(pcbddc->coarse_ksp, pc, sol));
6439: if (nullsp) PetscCall(MatNullSpaceRemove(nullsp, sol));
6440: }
6441: }
6442: /* we don't need the benign correction at coarser levels anymore */
6443: if (pcbddc->benign_have_null && isbddc) {
6444: PC coarse_pc;
6445: PC_BDDC *coarsepcbddc;
6447: PetscCall(KSPGetPC(pcbddc->coarse_ksp, &coarse_pc));
6448: coarsepcbddc = (PC_BDDC *)coarse_pc->data;
6449: coarsepcbddc->benign_skip_correction = PETSC_TRUE;
6450: coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE;
6451: }
6452: }
6453: PetscCall(PetscLogEventEnd(PC_BDDC_Solves[pcbddc->current_level][2], pc, 0, 0, 0));
6455: /* Local solution on R nodes */
6456: if (!pcbddc->benign_apply_coarse_only) PetscCall(PCBDDCSolveSubstructureCorrection(pc, pcis->vec1_B, pcis->vec1_D, applytranspose));
6457: /* communications from coarse sol to local primal nodes */
6458: PetscCall(PCBDDCScatterCoarseDataBegin(pc, INSERT_VALUES, SCATTER_REVERSE));
6459: PetscCall(PCBDDCScatterCoarseDataEnd(pc, INSERT_VALUES, SCATTER_REVERSE));
6461: /* Sum contributions from the two levels */
6462: if (!pcbddc->benign_apply_coarse_only) {
6463: if (applytranspose) {
6464: PetscCall(MatMultAdd(pcbddc->coarse_psi_B, pcbddc->vec1_P, pcis->vec1_B, pcis->vec1_B));
6465: if (pcbddc->switch_static) PetscCall(MatMultAdd(pcbddc->coarse_psi_D, pcbddc->vec1_P, pcis->vec1_D, pcis->vec1_D));
6466: } else {
6467: PetscCall(MatMultAdd(pcbddc->coarse_phi_B, pcbddc->vec1_P, pcis->vec1_B, pcis->vec1_B));
6468: if (pcbddc->switch_static) PetscCall(MatMultAdd(pcbddc->coarse_phi_D, pcbddc->vec1_P, pcis->vec1_D, pcis->vec1_D));
6469: }
6470: /* store p0 */
6471: if (pcbddc->benign_n) {
6472: PetscScalar *array;
6473: PetscInt j;
6475: PetscCall(VecGetArray(pcbddc->vec1_P, &array));
6476: for (j = 0; j < pcbddc->benign_n; j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size - pcbddc->benign_n + j];
6477: PetscCall(VecRestoreArray(pcbddc->vec1_P, &array));
6478: }
6479: } else { /* expand the coarse solution */
6480: if (applytranspose) {
6481: PetscCall(MatMult(pcbddc->coarse_psi_B, pcbddc->vec1_P, pcis->vec1_B));
6482: } else {
6483: PetscCall(MatMult(pcbddc->coarse_phi_B, pcbddc->vec1_P, pcis->vec1_B));
6484: }
6485: }
6486: PetscFunctionReturn(PETSC_SUCCESS);
6487: }
6489: PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc, InsertMode imode, ScatterMode smode)
6490: {
6491: PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
6492: Vec from, to;
6493: const PetscScalar *array;
6495: PetscFunctionBegin;
6496: if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
6497: from = pcbddc->coarse_vec;
6498: to = pcbddc->vec1_P;
6499: if (pcbddc->coarse_ksp) { /* get array from coarse processes */
6500: Vec tvec;
6502: PetscCall(KSPGetRhs(pcbddc->coarse_ksp, &tvec));
6503: PetscCall(VecResetArray(tvec));
6504: PetscCall(KSPGetSolution(pcbddc->coarse_ksp, &tvec));
6505: PetscCall(VecGetArrayRead(tvec, &array));
6506: PetscCall(VecPlaceArray(from, array));
6507: PetscCall(VecRestoreArrayRead(tvec, &array));
6508: }
6509: } else { /* from local to global -> put data in coarse right-hand side */
6510: from = pcbddc->vec1_P;
6511: to = pcbddc->coarse_vec;
6512: }
6513: PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob, from, to, imode, smode));
6514: PetscFunctionReturn(PETSC_SUCCESS);
6515: }
6517: PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode)
6518: {
6519: PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
6520: Vec from, to;
6521: const PetscScalar *array;
6523: PetscFunctionBegin;
6524: if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */
6525: from = pcbddc->coarse_vec;
6526: to = pcbddc->vec1_P;
6527: } else { /* from local to global -> put data in coarse right-hand side */
6528: from = pcbddc->vec1_P;
6529: to = pcbddc->coarse_vec;
6530: }
6531: PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob, from, to, imode, smode));
6532: if (smode == SCATTER_FORWARD) {
6533: if (pcbddc->coarse_ksp) { /* get array from coarse processes */
6534: Vec tvec;
6536: PetscCall(KSPGetRhs(pcbddc->coarse_ksp, &tvec));
6537: PetscCall(VecGetArrayRead(to, &array));
6538: PetscCall(VecPlaceArray(tvec, array));
6539: PetscCall(VecRestoreArrayRead(to, &array));
6540: }
6541: } else {
6542: if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */
6543: PetscCall(VecResetArray(from));
6544: }
6545: }
6546: PetscFunctionReturn(PETSC_SUCCESS);
6547: }
6549: PetscErrorCode PCBDDCConstraintsSetUp(PC pc)
6550: {
6551: PC_IS *pcis = (PC_IS *)pc->data;
6552: PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
6553: Mat_IS *matis = (Mat_IS *)pc->pmat->data;
6554: /* one and zero */
6555: PetscScalar one = 1.0, zero = 0.0;
6556: /* space to store constraints and their local indices */
6557: PetscScalar *constraints_data;
6558: PetscInt *constraints_idxs, *constraints_idxs_B;
6559: PetscInt *constraints_idxs_ptr, *constraints_data_ptr;
6560: PetscInt *constraints_n;
6561: /* iterators */
6562: PetscInt i, j, k, total_counts, total_counts_cc, cum;
6563: /* BLAS integers */
6564: PetscBLASInt lwork, lierr;
6565: PetscBLASInt Blas_N, Blas_M, Blas_K, Blas_one = 1;
6566: PetscBLASInt Blas_LDA, Blas_LDB, Blas_LDC;
6567: /* reuse */
6568: PetscInt olocal_primal_size, olocal_primal_size_cc;
6569: PetscInt *olocal_primal_ref_node, *olocal_primal_ref_mult;
6570: /* change of basis */
6571: PetscBool qr_needed;
6572: PetscBT change_basis, qr_needed_idx;
6573: /* auxiliary stuff */
6574: PetscInt *nnz, *is_indices;
6575: PetscInt ncc;
6576: /* some quantities */
6577: PetscInt n_vertices, total_primal_vertices, valid_constraints;
6578: PetscInt size_of_constraint, max_size_of_constraint = 0, max_constraints, temp_constraints;
6579: PetscReal tol; /* tolerance for retaining eigenmodes */
6581: PetscFunctionBegin;
6582: tol = PetscSqrtReal(PETSC_SMALL);
6583: /* Destroy Mat objects computed previously */
6584: PetscCall(MatDestroy(&pcbddc->ChangeOfBasisMatrix));
6585: PetscCall(MatDestroy(&pcbddc->ConstraintMatrix));
6586: PetscCall(MatDestroy(&pcbddc->switch_static_change));
6587: /* save info on constraints from previous setup (if any) */
6588: olocal_primal_size = pcbddc->local_primal_size;
6589: olocal_primal_size_cc = pcbddc->local_primal_size_cc;
6590: PetscCall(PetscMalloc2(olocal_primal_size_cc, &olocal_primal_ref_node, olocal_primal_size_cc, &olocal_primal_ref_mult));
6591: PetscCall(PetscArraycpy(olocal_primal_ref_node, pcbddc->local_primal_ref_node, olocal_primal_size_cc));
6592: PetscCall(PetscArraycpy(olocal_primal_ref_mult, pcbddc->local_primal_ref_mult, olocal_primal_size_cc));
6593: PetscCall(PetscFree2(pcbddc->local_primal_ref_node, pcbddc->local_primal_ref_mult));
6594: PetscCall(PetscFree(pcbddc->primal_indices_local_idxs));
6596: if (!pcbddc->adaptive_selection) {
6597: IS ISForVertices, *ISForFaces, *ISForEdges;
6598: MatNullSpace nearnullsp;
6599: const Vec *nearnullvecs;
6600: Vec *localnearnullsp;
6601: PetscScalar *array;
6602: PetscInt n_ISForFaces, n_ISForEdges, nnsp_size, o_nf, o_ne;
6603: PetscBool nnsp_has_cnst;
6604: /* LAPACK working arrays for SVD or POD */
6605: PetscBool skip_lapack, boolforchange;
6606: PetscScalar *work;
6607: PetscReal *singular_vals;
6608: #if defined(PETSC_USE_COMPLEX)
6609: PetscReal *rwork;
6610: #endif
6611: PetscScalar *temp_basis = NULL, *correlation_mat = NULL;
6612: PetscBLASInt dummy_int = 1;
6613: PetscScalar dummy_scalar = 1.;
6614: PetscBool use_pod = PETSC_FALSE;
6616: /* MKL SVD with same input gives different results on different processes! */
6617: #if defined(PETSC_MISSING_LAPACK_GESVD) || defined(PETSC_HAVE_MKL_LIBS)
6618: use_pod = PETSC_TRUE;
6619: #endif
6620: /* Get index sets for faces, edges and vertices from graph */
6621: PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, &n_ISForFaces, &ISForFaces, &n_ISForEdges, &ISForEdges, &ISForVertices));
6622: o_nf = n_ISForFaces;
6623: o_ne = n_ISForEdges;
6624: n_vertices = 0;
6625: if (ISForVertices) PetscCall(ISGetSize(ISForVertices, &n_vertices));
6626: /* print some info */
6627: if (pcbddc->dbg_flag && (!pcbddc->sub_schurs || pcbddc->sub_schurs_rebuild)) {
6628: if (!pcbddc->dbg_viewer) pcbddc->dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pc));
6629: PetscCall(PCBDDCGraphASCIIView(pcbddc->mat_graph, pcbddc->dbg_flag, pcbddc->dbg_viewer));
6630: PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer));
6631: PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "--------------------------------------------------------------\n"));
6632: PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate vertices (%d)\n", PetscGlobalRank, n_vertices, pcbddc->use_vertices));
6633: PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate edges (%d)\n", PetscGlobalRank, n_ISForEdges, pcbddc->use_edges));
6634: PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate faces (%d)\n", PetscGlobalRank, n_ISForFaces, pcbddc->use_faces));
6635: PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
6636: PetscCall(PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer));
6637: }
6639: if (!pcbddc->use_vertices) n_vertices = 0;
6640: if (!pcbddc->use_edges) n_ISForEdges = 0;
6641: if (!pcbddc->use_faces) n_ISForFaces = 0;
6643: /* check if near null space is attached to global mat */
6644: if (pcbddc->use_nnsp) PetscCall(MatGetNearNullSpace(pc->pmat, &nearnullsp));
6645: else nearnullsp = NULL;
6647: if (nearnullsp) {
6648: PetscCall(MatNullSpaceGetVecs(nearnullsp, &nnsp_has_cnst, &nnsp_size, &nearnullvecs));
6649: /* remove any stored info */
6650: PetscCall(MatNullSpaceDestroy(&pcbddc->onearnullspace));
6651: PetscCall(PetscFree(pcbddc->onearnullvecs_state));
6652: /* store information for BDDC solver reuse */
6653: PetscCall(PetscObjectReference((PetscObject)nearnullsp));
6654: pcbddc->onearnullspace = nearnullsp;
6655: PetscCall(PetscMalloc1(nnsp_size, &pcbddc->onearnullvecs_state));
6656: for (i = 0; i < nnsp_size; i++) PetscCall(PetscObjectStateGet((PetscObject)nearnullvecs[i], &pcbddc->onearnullvecs_state[i]));
6657: } else { /* if near null space is not provided BDDC uses constants by default */
6658: nnsp_size = 0;
6659: nnsp_has_cnst = PETSC_TRUE;
6660: }
6661: /* get max number of constraints on a single cc */
6662: max_constraints = nnsp_size;
6663: if (nnsp_has_cnst) max_constraints++;
6665: /*
6666: Evaluate maximum storage size needed by the procedure
6667: - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]"
6668: - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]"
6669: There can be multiple constraints per connected component
6670: */
6671: ncc = n_vertices + n_ISForFaces + n_ISForEdges;
6672: PetscCall(PetscMalloc3(ncc + 1, &constraints_idxs_ptr, ncc + 1, &constraints_data_ptr, ncc, &constraints_n));
6674: total_counts = n_ISForFaces + n_ISForEdges;
6675: total_counts *= max_constraints;
6676: total_counts += n_vertices;
6677: PetscCall(PetscBTCreate(total_counts, &change_basis));
6679: total_counts = 0;
6680: max_size_of_constraint = 0;
6681: for (i = 0; i < n_ISForEdges + n_ISForFaces; i++) {
6682: IS used_is;
6683: if (i < n_ISForEdges) {
6684: used_is = ISForEdges[i];
6685: } else {
6686: used_is = ISForFaces[i - n_ISForEdges];
6687: }
6688: PetscCall(ISGetSize(used_is, &j));
6689: total_counts += j;
6690: max_size_of_constraint = PetscMax(j, max_size_of_constraint);
6691: }
6692: PetscCall(PetscMalloc3(total_counts * max_constraints + n_vertices, &constraints_data, total_counts + n_vertices, &constraints_idxs, total_counts + n_vertices, &constraints_idxs_B));
6694: /* get local part of global near null space vectors */
6695: PetscCall(PetscMalloc1(nnsp_size, &localnearnullsp));
6696: for (k = 0; k < nnsp_size; k++) {
6697: PetscCall(VecDuplicate(pcis->vec1_N, &localnearnullsp[k]));
6698: PetscCall(VecScatterBegin(matis->rctx, nearnullvecs[k], localnearnullsp[k], INSERT_VALUES, SCATTER_FORWARD));
6699: PetscCall(VecScatterEnd(matis->rctx, nearnullvecs[k], localnearnullsp[k], INSERT_VALUES, SCATTER_FORWARD));
6700: }
6702: /* whether or not to skip lapack calls */
6703: skip_lapack = PETSC_TRUE;
6704: if (n_ISForFaces + n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE;
6706: /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */
6707: if (!skip_lapack) {
6708: PetscScalar temp_work;
6710: if (use_pod) {
6711: /* Proper Orthogonal Decomposition (POD) using the snapshot method */
6712: PetscCall(PetscMalloc1(max_constraints * max_constraints, &correlation_mat));
6713: PetscCall(PetscMalloc1(max_constraints, &singular_vals));
6714: PetscCall(PetscMalloc1(max_size_of_constraint * max_constraints, &temp_basis));
6715: #if defined(PETSC_USE_COMPLEX)
6716: PetscCall(PetscMalloc1(3 * max_constraints, &rwork));
6717: #endif
6718: /* now we evaluate the optimal workspace using query with lwork=-1 */
6719: PetscCall(PetscBLASIntCast(max_constraints, &Blas_N));
6720: PetscCall(PetscBLASIntCast(max_constraints, &Blas_LDA));
6721: lwork = -1;
6722: PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
6723: #if !defined(PETSC_USE_COMPLEX)
6724: PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, &temp_work, &lwork, &lierr));
6725: #else
6726: PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, &temp_work, &lwork, rwork, &lierr));
6727: #endif
6728: PetscCall(PetscFPTrapPop());
6729: PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to SYEV Lapack routine %" PetscBLASInt_FMT, lierr);
6730: } else {
6731: #if !defined(PETSC_MISSING_LAPACK_GESVD)
6732: /* SVD */
6733: PetscInt max_n, min_n;
6734: max_n = max_size_of_constraint;
6735: min_n = max_constraints;
6736: if (max_size_of_constraint < max_constraints) {
6737: min_n = max_size_of_constraint;
6738: max_n = max_constraints;
6739: }
6740: PetscCall(PetscMalloc1(min_n, &singular_vals));
6741: #if defined(PETSC_USE_COMPLEX)
6742: PetscCall(PetscMalloc1(5 * min_n, &rwork));
6743: #endif
6744: /* now we evaluate the optimal workspace using query with lwork=-1 */
6745: lwork = -1;
6746: PetscCall(PetscBLASIntCast(max_n, &Blas_M));
6747: PetscCall(PetscBLASIntCast(min_n, &Blas_N));
6748: PetscCall(PetscBLASIntCast(max_n, &Blas_LDA));
6749: PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
6750: #if !defined(PETSC_USE_COMPLEX)
6751: PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, &constraints_data[0], &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, &temp_work, &lwork, &lierr));
6752: #else
6753: PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, &constraints_data[0], &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, &temp_work, &lwork, rwork, &lierr));
6754: #endif
6755: PetscCall(PetscFPTrapPop());
6756: PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to GESVD Lapack routine %" PetscBLASInt_FMT, lierr);
6757: #else
6758: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "This should not happen");
6759: #endif /* on missing GESVD */
6760: }
6761: /* Allocate optimal workspace */
6762: PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(temp_work), &lwork));
6763: PetscCall(PetscMalloc1(lwork, &work));
6764: }
6765: /* Now we can loop on constraining sets */
6766: total_counts = 0;
6767: constraints_idxs_ptr[0] = 0;
6768: constraints_data_ptr[0] = 0;
6769: /* vertices */
6770: if (n_vertices) {
6771: PetscCall(ISGetIndices(ISForVertices, (const PetscInt **)&is_indices));
6772: PetscCall(PetscArraycpy(constraints_idxs, is_indices, n_vertices));
6773: for (i = 0; i < n_vertices; i++) {
6774: constraints_n[total_counts] = 1;
6775: constraints_data[total_counts] = 1.0;
6776: constraints_idxs_ptr[total_counts + 1] = constraints_idxs_ptr[total_counts] + 1;
6777: constraints_data_ptr[total_counts + 1] = constraints_data_ptr[total_counts] + 1;
6778: total_counts++;
6779: }
6780: PetscCall(ISRestoreIndices(ISForVertices, (const PetscInt **)&is_indices));
6781: }
6783: /* edges and faces */
6784: total_counts_cc = total_counts;
6785: for (ncc = 0; ncc < n_ISForEdges + n_ISForFaces; ncc++) {
6786: IS used_is;
6787: PetscBool idxs_copied = PETSC_FALSE;
6789: if (ncc < n_ISForEdges) {
6790: used_is = ISForEdges[ncc];
6791: boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */
6792: } else {
6793: used_is = ISForFaces[ncc - n_ISForEdges];
6794: boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */
6795: }
6796: temp_constraints = 0; /* zero the number of constraints I have on this conn comp */
6798: PetscCall(ISGetSize(used_is, &size_of_constraint));
6799: if (!size_of_constraint) continue;
6800: PetscCall(ISGetIndices(used_is, (const PetscInt **)&is_indices));
6801: if (nnsp_has_cnst) {
6802: PetscScalar quad_value;
6804: PetscCall(PetscArraycpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc], is_indices, size_of_constraint));
6805: idxs_copied = PETSC_TRUE;
6807: if (!pcbddc->use_nnsp_true) {
6808: quad_value = (PetscScalar)(1.0 / PetscSqrtReal((PetscReal)size_of_constraint));
6809: } else {
6810: quad_value = 1.0;
6811: }
6812: for (j = 0; j < size_of_constraint; j++) constraints_data[constraints_data_ptr[total_counts_cc] + j] = quad_value;
6813: temp_constraints++;
6814: total_counts++;
6815: }
6816: for (k = 0; k < nnsp_size; k++) {
6817: PetscReal real_value;
6818: PetscScalar *ptr_to_data;
6820: PetscCall(VecGetArrayRead(localnearnullsp[k], (const PetscScalar **)&array));
6821: ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc] + temp_constraints * size_of_constraint];
6822: for (j = 0; j < size_of_constraint; j++) ptr_to_data[j] = array[is_indices[j]];
6823: PetscCall(VecRestoreArrayRead(localnearnullsp[k], (const PetscScalar **)&array));
6824: /* check if array is null on the connected component */
6825: PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N));
6826: PetscCallBLAS("BLASasum", real_value = BLASasum_(&Blas_N, ptr_to_data, &Blas_one));
6827: if (real_value > tol * size_of_constraint) { /* keep indices and values */
6828: temp_constraints++;
6829: total_counts++;
6830: if (!idxs_copied) {
6831: PetscCall(PetscArraycpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc], is_indices, size_of_constraint));
6832: idxs_copied = PETSC_TRUE;
6833: }
6834: }
6835: }
6836: PetscCall(ISRestoreIndices(used_is, (const PetscInt **)&is_indices));
6837: valid_constraints = temp_constraints;
6838: if (!pcbddc->use_nnsp_true && temp_constraints) {
6839: if (temp_constraints == 1) { /* just normalize the constraint */
6840: PetscScalar norm, *ptr_to_data;
6842: ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
6843: PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N));
6844: PetscCallBLAS("BLASdot", norm = BLASdot_(&Blas_N, ptr_to_data, &Blas_one, ptr_to_data, &Blas_one));
6845: norm = 1.0 / PetscSqrtReal(PetscRealPart(norm));
6846: PetscCallBLAS("BLASscal", BLASscal_(&Blas_N, &norm, ptr_to_data, &Blas_one));
6847: } else { /* perform SVD */
6848: PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]];
6850: if (use_pod) {
6851: /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag
6852: POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2)
6853: -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined
6854: the constraints basis will differ (by a complex factor with absolute value equal to 1)
6855: from that computed using LAPACKgesvd
6856: -> This is due to a different computation of eigenvectors in LAPACKheev
6857: -> The quality of the POD-computed basis will be the same */
6858: PetscCall(PetscArrayzero(correlation_mat, temp_constraints * temp_constraints));
6859: /* Store upper triangular part of correlation matrix */
6860: PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N));
6861: PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
6862: for (j = 0; j < temp_constraints; j++) {
6863: for (k = 0; k < j + 1; k++) PetscCallBLAS("BLASdot", correlation_mat[j * temp_constraints + k] = BLASdot_(&Blas_N, ptr_to_data + k * size_of_constraint, &Blas_one, ptr_to_data + j * size_of_constraint, &Blas_one));
6864: }
6865: /* compute eigenvalues and eigenvectors of correlation matrix */
6866: PetscCall(PetscBLASIntCast(temp_constraints, &Blas_N));
6867: PetscCall(PetscBLASIntCast(temp_constraints, &Blas_LDA));
6868: #if !defined(PETSC_USE_COMPLEX)
6869: PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, work, &lwork, &lierr));
6870: #else
6871: PetscCallBLAS("LAPACKsyev", LAPACKsyev_("V", "U", &Blas_N, correlation_mat, &Blas_LDA, singular_vals, work, &lwork, rwork, &lierr));
6872: #endif
6873: PetscCall(PetscFPTrapPop());
6874: PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in SYEV Lapack routine %" PetscBLASInt_FMT, lierr);
6875: /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */
6876: j = 0;
6877: while (j < temp_constraints && singular_vals[j] / singular_vals[temp_constraints - 1] < tol) j++;
6878: total_counts = total_counts - j;
6879: valid_constraints = temp_constraints - j;
6880: /* scale and copy POD basis into used quadrature memory */
6881: PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M));
6882: PetscCall(PetscBLASIntCast(temp_constraints, &Blas_N));
6883: PetscCall(PetscBLASIntCast(temp_constraints, &Blas_K));
6884: PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA));
6885: PetscCall(PetscBLASIntCast(temp_constraints, &Blas_LDB));
6886: PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDC));
6887: if (j < temp_constraints) {
6888: PetscInt ii;
6889: for (k = j; k < temp_constraints; k++) singular_vals[k] = 1.0 / PetscSqrtReal(singular_vals[k]);
6890: PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
6891: PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &Blas_M, &Blas_N, &Blas_K, &one, ptr_to_data, &Blas_LDA, correlation_mat, &Blas_LDB, &zero, temp_basis, &Blas_LDC));
6892: PetscCall(PetscFPTrapPop());
6893: for (k = 0; k < temp_constraints - j; k++) {
6894: for (ii = 0; ii < size_of_constraint; ii++) ptr_to_data[k * size_of_constraint + ii] = singular_vals[temp_constraints - 1 - k] * temp_basis[(temp_constraints - 1 - k) * size_of_constraint + ii];
6895: }
6896: }
6897: } else {
6898: #if !defined(PETSC_MISSING_LAPACK_GESVD)
6899: PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M));
6900: PetscCall(PetscBLASIntCast(temp_constraints, &Blas_N));
6901: PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA));
6902: PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
6903: #if !defined(PETSC_USE_COMPLEX)
6904: PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, ptr_to_data, &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, work, &lwork, &lierr));
6905: #else
6906: PetscCallBLAS("LAPACKgesvd", LAPACKgesvd_("O", "N", &Blas_M, &Blas_N, ptr_to_data, &Blas_LDA, singular_vals, &dummy_scalar, &dummy_int, &dummy_scalar, &dummy_int, work, &lwork, rwork, &lierr));
6907: #endif
6908: PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GESVD Lapack routine %" PetscBLASInt_FMT, lierr);
6909: PetscCall(PetscFPTrapPop());
6910: /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */
6911: k = temp_constraints;
6912: if (k > size_of_constraint) k = size_of_constraint;
6913: j = 0;
6914: while (j < k && singular_vals[k - j - 1] / singular_vals[0] < tol) j++;
6915: valid_constraints = k - j;
6916: total_counts = total_counts - temp_constraints + valid_constraints;
6917: #else
6918: SETERRQ(PETSC_COMM_SELF, PETSC_ERR_LIB, "This should not happen");
6919: #endif /* on missing GESVD */
6920: }
6921: }
6922: }
6923: /* update pointers information */
6924: if (valid_constraints) {
6925: constraints_n[total_counts_cc] = valid_constraints;
6926: constraints_idxs_ptr[total_counts_cc + 1] = constraints_idxs_ptr[total_counts_cc] + size_of_constraint;
6927: constraints_data_ptr[total_counts_cc + 1] = constraints_data_ptr[total_counts_cc] + size_of_constraint * valid_constraints;
6928: /* set change_of_basis flag */
6929: if (boolforchange) PetscCall(PetscBTSet(change_basis, total_counts_cc));
6930: total_counts_cc++;
6931: }
6932: }
6933: /* free workspace */
6934: if (!skip_lapack) {
6935: PetscCall(PetscFree(work));
6936: #if defined(PETSC_USE_COMPLEX)
6937: PetscCall(PetscFree(rwork));
6938: #endif
6939: PetscCall(PetscFree(singular_vals));
6940: PetscCall(PetscFree(correlation_mat));
6941: PetscCall(PetscFree(temp_basis));
6942: }
6943: for (k = 0; k < nnsp_size; k++) PetscCall(VecDestroy(&localnearnullsp[k]));
6944: PetscCall(PetscFree(localnearnullsp));
6945: /* free index sets of faces, edges and vertices */
6946: PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, &o_nf, &ISForFaces, &o_ne, &ISForEdges, &ISForVertices));
6947: } else {
6948: PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
6950: total_counts = 0;
6951: n_vertices = 0;
6952: if (sub_schurs->is_vertices && pcbddc->use_vertices) PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &n_vertices));
6953: max_constraints = 0;
6954: total_counts_cc = 0;
6955: for (i = 0; i < sub_schurs->n_subs + n_vertices; i++) {
6956: total_counts += pcbddc->adaptive_constraints_n[i];
6957: if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++;
6958: max_constraints = PetscMax(max_constraints, pcbddc->adaptive_constraints_n[i]);
6959: }
6960: constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr;
6961: constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr;
6962: constraints_idxs = pcbddc->adaptive_constraints_idxs;
6963: constraints_data = pcbddc->adaptive_constraints_data;
6964: /* constraints_n differs from pcbddc->adaptive_constraints_n */
6965: PetscCall(PetscMalloc1(total_counts_cc, &constraints_n));
6966: total_counts_cc = 0;
6967: for (i = 0; i < sub_schurs->n_subs + n_vertices; i++) {
6968: if (pcbddc->adaptive_constraints_n[i]) constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i];
6969: }
6971: max_size_of_constraint = 0;
6972: for (i = 0; i < total_counts_cc; i++) max_size_of_constraint = PetscMax(max_size_of_constraint, constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i]);
6973: PetscCall(PetscMalloc1(constraints_idxs_ptr[total_counts_cc], &constraints_idxs_B));
6974: /* Change of basis */
6975: PetscCall(PetscBTCreate(total_counts_cc, &change_basis));
6976: if (pcbddc->use_change_of_basis) {
6977: for (i = 0; i < sub_schurs->n_subs; i++) {
6978: if (PetscBTLookup(sub_schurs->is_edge, i) || pcbddc->use_change_on_faces) PetscCall(PetscBTSet(change_basis, i + n_vertices));
6979: }
6980: }
6981: }
6982: pcbddc->local_primal_size = total_counts;
6983: PetscCall(PetscMalloc1(pcbddc->local_primal_size + pcbddc->benign_n, &pcbddc->primal_indices_local_idxs));
6985: /* map constraints_idxs in boundary numbering */
6986: if (pcbddc->use_change_of_basis) {
6987: PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap, IS_GTOLM_DROP, constraints_idxs_ptr[total_counts_cc], constraints_idxs, &i, constraints_idxs_B));
6988: PetscCheck(i == constraints_idxs_ptr[total_counts_cc], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in boundary numbering for constraints indices %" PetscInt_FMT " != %" PetscInt_FMT, constraints_idxs_ptr[total_counts_cc], i);
6989: }
6991: /* Create constraint matrix */
6992: PetscCall(MatCreate(PETSC_COMM_SELF, &pcbddc->ConstraintMatrix));
6993: PetscCall(MatSetType(pcbddc->ConstraintMatrix, MATAIJ));
6994: PetscCall(MatSetSizes(pcbddc->ConstraintMatrix, pcbddc->local_primal_size, pcis->n, pcbddc->local_primal_size, pcis->n));
6996: /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */
6997: /* determine if a QR strategy is needed for change of basis */
6998: qr_needed = pcbddc->use_qr_single;
6999: PetscCall(PetscBTCreate(total_counts_cc, &qr_needed_idx));
7000: total_primal_vertices = 0;
7001: pcbddc->local_primal_size_cc = 0;
7002: for (i = 0; i < total_counts_cc; i++) {
7003: size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i];
7004: if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) {
7005: pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]];
7006: pcbddc->local_primal_size_cc += 1;
7007: } else if (PetscBTLookup(change_basis, i)) {
7008: for (k = 0; k < constraints_n[i]; k++) pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i] + k];
7009: pcbddc->local_primal_size_cc += constraints_n[i];
7010: if (constraints_n[i] > 1 || pcbddc->use_qr_single) {
7011: PetscCall(PetscBTSet(qr_needed_idx, i));
7012: qr_needed = PETSC_TRUE;
7013: }
7014: } else {
7015: pcbddc->local_primal_size_cc += 1;
7016: }
7017: }
7018: /* note that the local variable n_vertices used below stores the number of pointwise constraints */
7019: pcbddc->n_vertices = total_primal_vertices;
7020: /* permute indices in order to have a sorted set of vertices */
7021: PetscCall(PetscSortInt(total_primal_vertices, pcbddc->primal_indices_local_idxs));
7022: PetscCall(PetscMalloc2(pcbddc->local_primal_size_cc + pcbddc->benign_n, &pcbddc->local_primal_ref_node, pcbddc->local_primal_size_cc + pcbddc->benign_n, &pcbddc->local_primal_ref_mult));
7023: PetscCall(PetscArraycpy(pcbddc->local_primal_ref_node, pcbddc->primal_indices_local_idxs, total_primal_vertices));
7024: for (i = 0; i < total_primal_vertices; i++) pcbddc->local_primal_ref_mult[i] = 1;
7026: /* nonzero structure of constraint matrix */
7027: /* and get reference dof for local constraints */
7028: PetscCall(PetscMalloc1(pcbddc->local_primal_size, &nnz));
7029: for (i = 0; i < total_primal_vertices; i++) nnz[i] = 1;
7031: j = total_primal_vertices;
7032: total_counts = total_primal_vertices;
7033: cum = total_primal_vertices;
7034: for (i = n_vertices; i < total_counts_cc; i++) {
7035: if (!PetscBTLookup(change_basis, i)) {
7036: pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]];
7037: pcbddc->local_primal_ref_mult[cum] = constraints_n[i];
7038: cum++;
7039: size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i];
7040: for (k = 0; k < constraints_n[i]; k++) {
7041: pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i] + k];
7042: nnz[j + k] = size_of_constraint;
7043: }
7044: j += constraints_n[i];
7045: }
7046: }
7047: PetscCall(MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix, 0, nnz));
7048: PetscCall(MatSetOption(pcbddc->ConstraintMatrix, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
7049: PetscCall(MatSetOption(pcbddc->ConstraintMatrix, MAT_IGNORE_ZERO_ENTRIES, PETSC_TRUE));
7050: PetscCall(PetscFree(nnz));
7052: /* set values in constraint matrix */
7053: for (i = 0; i < total_primal_vertices; i++) PetscCall(MatSetValue(pcbddc->ConstraintMatrix, i, pcbddc->local_primal_ref_node[i], 1.0, INSERT_VALUES));
7054: total_counts = total_primal_vertices;
7055: for (i = n_vertices; i < total_counts_cc; i++) {
7056: if (!PetscBTLookup(change_basis, i)) {
7057: PetscInt *cols;
7059: size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i];
7060: cols = constraints_idxs + constraints_idxs_ptr[i];
7061: for (k = 0; k < constraints_n[i]; k++) {
7062: PetscInt row = total_counts + k;
7063: PetscScalar *vals;
7065: vals = constraints_data + constraints_data_ptr[i] + k * size_of_constraint;
7066: PetscCall(MatSetValues(pcbddc->ConstraintMatrix, 1, &row, size_of_constraint, cols, vals, INSERT_VALUES));
7067: }
7068: total_counts += constraints_n[i];
7069: }
7070: }
7071: /* assembling */
7072: PetscCall(MatAssemblyBegin(pcbddc->ConstraintMatrix, MAT_FINAL_ASSEMBLY));
7073: PetscCall(MatAssemblyEnd(pcbddc->ConstraintMatrix, MAT_FINAL_ASSEMBLY));
7074: PetscCall(MatViewFromOptions(pcbddc->ConstraintMatrix, (PetscObject)pc, "-pc_bddc_constraint_mat_view"));
7076: /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */
7077: if (pcbddc->use_change_of_basis) {
7078: /* dual and primal dofs on a single cc */
7079: PetscInt dual_dofs, primal_dofs;
7080: /* working stuff for GEQRF */
7081: PetscScalar *qr_basis = NULL, *qr_tau = NULL, *qr_work = NULL, lqr_work_t;
7082: PetscBLASInt lqr_work;
7083: /* working stuff for UNGQR */
7084: PetscScalar *gqr_work = NULL, lgqr_work_t = 0.0;
7085: PetscBLASInt lgqr_work;
7086: /* working stuff for TRTRS */
7087: PetscScalar *trs_rhs = NULL;
7088: PetscBLASInt Blas_NRHS;
7089: /* pointers for values insertion into change of basis matrix */
7090: PetscInt *start_rows, *start_cols;
7091: PetscScalar *start_vals;
7092: /* working stuff for values insertion */
7093: PetscBT is_primal;
7094: PetscInt *aux_primal_numbering_B;
7095: /* matrix sizes */
7096: PetscInt global_size, local_size;
7097: /* temporary change of basis */
7098: Mat localChangeOfBasisMatrix;
7099: /* extra space for debugging */
7100: PetscScalar *dbg_work = NULL;
7102: PetscCall(MatCreate(PETSC_COMM_SELF, &localChangeOfBasisMatrix));
7103: PetscCall(MatSetType(localChangeOfBasisMatrix, MATAIJ));
7104: PetscCall(MatSetSizes(localChangeOfBasisMatrix, pcis->n, pcis->n, pcis->n, pcis->n));
7105: /* nonzeros for local mat */
7106: PetscCall(PetscMalloc1(pcis->n, &nnz));
7107: if (!pcbddc->benign_change || pcbddc->fake_change) {
7108: for (i = 0; i < pcis->n; i++) nnz[i] = 1;
7109: } else {
7110: const PetscInt *ii;
7111: PetscInt n;
7112: PetscBool flg_row;
7113: PetscCall(MatGetRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, NULL, &flg_row));
7114: for (i = 0; i < n; i++) nnz[i] = ii[i + 1] - ii[i];
7115: PetscCall(MatRestoreRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, NULL, &flg_row));
7116: }
7117: for (i = n_vertices; i < total_counts_cc; i++) {
7118: if (PetscBTLookup(change_basis, i)) {
7119: size_of_constraint = constraints_idxs_ptr[i + 1] - constraints_idxs_ptr[i];
7120: if (PetscBTLookup(qr_needed_idx, i)) {
7121: for (j = 0; j < size_of_constraint; j++) nnz[constraints_idxs[constraints_idxs_ptr[i] + j]] = size_of_constraint;
7122: } else {
7123: nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint;
7124: for (j = 1; j < size_of_constraint; j++) nnz[constraints_idxs[constraints_idxs_ptr[i] + j]] = 2;
7125: }
7126: }
7127: }
7128: PetscCall(MatSeqAIJSetPreallocation(localChangeOfBasisMatrix, 0, nnz));
7129: PetscCall(MatSetOption(localChangeOfBasisMatrix, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_TRUE));
7130: PetscCall(PetscFree(nnz));
7131: /* Set interior change in the matrix */
7132: if (!pcbddc->benign_change || pcbddc->fake_change) {
7133: for (i = 0; i < pcis->n; i++) PetscCall(MatSetValue(localChangeOfBasisMatrix, i, i, 1.0, INSERT_VALUES));
7134: } else {
7135: const PetscInt *ii, *jj;
7136: PetscScalar *aa;
7137: PetscInt n;
7138: PetscBool flg_row;
7139: PetscCall(MatGetRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &jj, &flg_row));
7140: PetscCall(MatSeqAIJGetArray(pcbddc->benign_change, &aa));
7141: for (i = 0; i < n; i++) PetscCall(MatSetValues(localChangeOfBasisMatrix, 1, &i, ii[i + 1] - ii[i], jj + ii[i], aa + ii[i], INSERT_VALUES));
7142: PetscCall(MatSeqAIJRestoreArray(pcbddc->benign_change, &aa));
7143: PetscCall(MatRestoreRowIJ(pcbddc->benign_change, 0, PETSC_FALSE, PETSC_FALSE, &n, &ii, &jj, &flg_row));
7144: }
7146: if (pcbddc->dbg_flag) {
7147: PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "--------------------------------------------------------------\n"));
7148: PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Checking change of basis computation for subdomain %04d\n", PetscGlobalRank));
7149: }
7151: /* Now we loop on the constraints which need a change of basis */
7152: /*
7153: Change of basis matrix is evaluated similarly to the FIRST APPROACH in
7154: Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1)
7156: Basic blocks of change of basis matrix T computed:
7158: - By using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified)
7160: | 1 0 ... 0 s_1/S |
7161: | 0 1 ... 0 s_2/S |
7162: | ... |
7163: | 0 ... 1 s_{n-1}/S |
7164: | -s_1/s_n ... -s_{n-1}/s_n s_n/S |
7166: with S = \sum_{i=1}^n s_i^2
7167: NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering
7168: in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering
7170: - QR decomposition of constraints otherwise
7171: */
7172: if (qr_needed && max_size_of_constraint) {
7173: /* space to store Q */
7174: PetscCall(PetscMalloc1(max_size_of_constraint * max_size_of_constraint, &qr_basis));
7175: /* array to store scaling factors for reflectors */
7176: PetscCall(PetscMalloc1(max_constraints, &qr_tau));
7177: /* first we issue queries for optimal work */
7178: PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_M));
7179: PetscCall(PetscBLASIntCast(max_constraints, &Blas_N));
7180: PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_LDA));
7181: lqr_work = -1;
7182: PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&Blas_M, &Blas_N, qr_basis, &Blas_LDA, qr_tau, &lqr_work_t, &lqr_work, &lierr));
7183: PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to GEQRF Lapack routine %" PetscBLASInt_FMT, lierr);
7184: PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t), &lqr_work));
7185: PetscCall(PetscMalloc1(lqr_work, &qr_work));
7186: lgqr_work = -1;
7187: PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_M));
7188: PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_N));
7189: PetscCall(PetscBLASIntCast(max_constraints, &Blas_K));
7190: PetscCall(PetscBLASIntCast(max_size_of_constraint, &Blas_LDA));
7191: if (Blas_K > Blas_M) Blas_K = Blas_M; /* adjust just for computing optimal work */
7192: PetscCallBLAS("LAPACKorgqr", LAPACKorgqr_(&Blas_M, &Blas_N, &Blas_K, qr_basis, &Blas_LDA, qr_tau, &lgqr_work_t, &lgqr_work, &lierr));
7193: PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in query to ORGQR/UNGQR Lapack routine %" PetscBLASInt_FMT, lierr);
7194: PetscCall(PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t), &lgqr_work));
7195: PetscCall(PetscMalloc1(lgqr_work, &gqr_work));
7196: /* array to store rhs and solution of triangular solver */
7197: PetscCall(PetscMalloc1(max_constraints * max_constraints, &trs_rhs));
7198: /* allocating workspace for check */
7199: if (pcbddc->dbg_flag) PetscCall(PetscMalloc1(max_size_of_constraint * (max_constraints + max_size_of_constraint), &dbg_work));
7200: }
7201: /* array to store whether a node is primal or not */
7202: PetscCall(PetscBTCreate(pcis->n_B, &is_primal));
7203: PetscCall(PetscMalloc1(total_primal_vertices, &aux_primal_numbering_B));
7204: PetscCall(ISGlobalToLocalMappingApply(pcis->BtoNmap, IS_GTOLM_DROP, total_primal_vertices, pcbddc->local_primal_ref_node, &i, aux_primal_numbering_B));
7205: PetscCheck(i == total_primal_vertices, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Error in boundary numbering for BDDC vertices! %" PetscInt_FMT " != %" PetscInt_FMT, total_primal_vertices, i);
7206: for (i = 0; i < total_primal_vertices; i++) PetscCall(PetscBTSet(is_primal, aux_primal_numbering_B[i]));
7207: PetscCall(PetscFree(aux_primal_numbering_B));
7209: /* loop on constraints and see whether or not they need a change of basis and compute it */
7210: for (total_counts = n_vertices; total_counts < total_counts_cc; total_counts++) {
7211: size_of_constraint = constraints_idxs_ptr[total_counts + 1] - constraints_idxs_ptr[total_counts];
7212: if (PetscBTLookup(change_basis, total_counts)) {
7213: /* get constraint info */
7214: primal_dofs = constraints_n[total_counts];
7215: dual_dofs = size_of_constraint - primal_dofs;
7217: if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Constraints %" PetscInt_FMT ": %" PetscInt_FMT " need a change of basis (size %" PetscInt_FMT ")\n", total_counts, primal_dofs, size_of_constraint));
7219: if (PetscBTLookup(qr_needed_idx, total_counts)) { /* QR */
7221: /* copy quadrature constraints for change of basis check */
7222: if (pcbddc->dbg_flag) PetscCall(PetscArraycpy(dbg_work, &constraints_data[constraints_data_ptr[total_counts]], size_of_constraint * primal_dofs));
7223: /* copy temporary constraints into larger work vector (in order to store all columns of Q) */
7224: PetscCall(PetscArraycpy(qr_basis, &constraints_data[constraints_data_ptr[total_counts]], size_of_constraint * primal_dofs));
7226: /* compute QR decomposition of constraints */
7227: PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M));
7228: PetscCall(PetscBLASIntCast(primal_dofs, &Blas_N));
7229: PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA));
7230: PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
7231: PetscCallBLAS("LAPACKgeqrf", LAPACKgeqrf_(&Blas_M, &Blas_N, qr_basis, &Blas_LDA, qr_tau, qr_work, &lqr_work, &lierr));
7232: PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in GEQRF Lapack routine %" PetscBLASInt_FMT, lierr);
7233: PetscCall(PetscFPTrapPop());
7235: /* explicitly compute R^-T */
7236: PetscCall(PetscArrayzero(trs_rhs, primal_dofs * primal_dofs));
7237: for (j = 0; j < primal_dofs; j++) trs_rhs[j * (primal_dofs + 1)] = 1.0;
7238: PetscCall(PetscBLASIntCast(primal_dofs, &Blas_N));
7239: PetscCall(PetscBLASIntCast(primal_dofs, &Blas_NRHS));
7240: PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA));
7241: PetscCall(PetscBLASIntCast(primal_dofs, &Blas_LDB));
7242: PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
7243: PetscCallBLAS("LAPACKtrtrs", LAPACKtrtrs_("U", "T", "N", &Blas_N, &Blas_NRHS, qr_basis, &Blas_LDA, trs_rhs, &Blas_LDB, &lierr));
7244: PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in TRTRS Lapack routine %" PetscBLASInt_FMT, lierr);
7245: PetscCall(PetscFPTrapPop());
7247: /* explicitly compute all columns of Q (Q = [Q1 | Q2]) overwriting QR factorization in qr_basis */
7248: PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M));
7249: PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N));
7250: PetscCall(PetscBLASIntCast(primal_dofs, &Blas_K));
7251: PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA));
7252: PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
7253: PetscCallBLAS("LAPACKorgqr", LAPACKorgqr_(&Blas_M, &Blas_N, &Blas_K, qr_basis, &Blas_LDA, qr_tau, gqr_work, &lgqr_work, &lierr));
7254: PetscCheck(!lierr, PETSC_COMM_SELF, PETSC_ERR_LIB, "Error in ORGQR/UNGQR Lapack routine %" PetscBLASInt_FMT, lierr);
7255: PetscCall(PetscFPTrapPop());
7257: /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints
7258: i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below)
7259: where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */
7260: PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_M));
7261: PetscCall(PetscBLASIntCast(primal_dofs, &Blas_N));
7262: PetscCall(PetscBLASIntCast(primal_dofs, &Blas_K));
7263: PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA));
7264: PetscCall(PetscBLASIntCast(primal_dofs, &Blas_LDB));
7265: PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDC));
7266: PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
7267: PetscCallBLAS("BLASgemm", BLASgemm_("N", "N", &Blas_M, &Blas_N, &Blas_K, &one, qr_basis, &Blas_LDA, trs_rhs, &Blas_LDB, &zero, constraints_data + constraints_data_ptr[total_counts], &Blas_LDC));
7268: PetscCall(PetscFPTrapPop());
7269: PetscCall(PetscArraycpy(qr_basis, &constraints_data[constraints_data_ptr[total_counts]], size_of_constraint * primal_dofs));
7271: /* insert values in change of basis matrix respecting global ordering of new primal dofs */
7272: start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]];
7273: /* insert cols for primal dofs */
7274: for (j = 0; j < primal_dofs; j++) {
7275: start_vals = &qr_basis[j * size_of_constraint];
7276: start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts] + j];
7277: PetscCall(MatSetValues(localChangeOfBasisMatrix, size_of_constraint, start_rows, 1, start_cols, start_vals, INSERT_VALUES));
7278: }
7279: /* insert cols for dual dofs */
7280: for (j = 0, k = 0; j < dual_dofs; k++) {
7281: if (!PetscBTLookup(is_primal, constraints_idxs_B[constraints_idxs_ptr[total_counts] + k])) {
7282: start_vals = &qr_basis[(primal_dofs + j) * size_of_constraint];
7283: start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts] + k];
7284: PetscCall(MatSetValues(localChangeOfBasisMatrix, size_of_constraint, start_rows, 1, start_cols, start_vals, INSERT_VALUES));
7285: j++;
7286: }
7287: }
7289: /* check change of basis */
7290: if (pcbddc->dbg_flag) {
7291: PetscInt ii, jj;
7292: PetscBool valid_qr = PETSC_TRUE;
7293: PetscCall(PetscBLASIntCast(primal_dofs, &Blas_M));
7294: PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N));
7295: PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_K));
7296: PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDA));
7297: PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_LDB));
7298: PetscCall(PetscBLASIntCast(primal_dofs, &Blas_LDC));
7299: PetscCall(PetscFPTrapPush(PETSC_FP_TRAP_OFF));
7300: PetscCallBLAS("BLASgemm", BLASgemm_("T", "N", &Blas_M, &Blas_N, &Blas_K, &one, dbg_work, &Blas_LDA, qr_basis, &Blas_LDB, &zero, &dbg_work[size_of_constraint * primal_dofs], &Blas_LDC));
7301: PetscCall(PetscFPTrapPop());
7302: for (jj = 0; jj < size_of_constraint; jj++) {
7303: for (ii = 0; ii < primal_dofs; ii++) {
7304: if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii]) > 1.e-12) valid_qr = PETSC_FALSE;
7305: if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii] - (PetscReal)1) > 1.e-12) valid_qr = PETSC_FALSE;
7306: }
7307: }
7308: if (!valid_qr) {
7309: PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\t-> wrong change of basis!\n"));
7310: for (jj = 0; jj < size_of_constraint; jj++) {
7311: for (ii = 0; ii < primal_dofs; ii++) {
7312: if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii]) > 1.e-12) {
7313: PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\tQr basis function %" PetscInt_FMT " is not orthogonal to constraint %" PetscInt_FMT " (%1.14e)!\n", jj, ii, (double)PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii])));
7314: }
7315: if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii] - (PetscReal)1) > 1.e-12) {
7316: PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\tQr basis function %" PetscInt_FMT " is not unitary w.r.t constraint %" PetscInt_FMT " (%1.14e)!\n", jj, ii, (double)PetscAbsScalar(dbg_work[size_of_constraint * primal_dofs + jj * primal_dofs + ii])));
7317: }
7318: }
7319: }
7320: } else {
7321: PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\t-> right change of basis!\n"));
7322: }
7323: }
7324: } else { /* simple transformation block */
7325: PetscInt row, col;
7326: PetscScalar val, norm;
7328: PetscCall(PetscBLASIntCast(size_of_constraint, &Blas_N));
7329: PetscCallBLAS("BLASdot", norm = BLASdot_(&Blas_N, constraints_data + constraints_data_ptr[total_counts], &Blas_one, constraints_data + constraints_data_ptr[total_counts], &Blas_one));
7330: for (j = 0; j < size_of_constraint; j++) {
7331: PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts] + j];
7332: row = constraints_idxs[constraints_idxs_ptr[total_counts] + j];
7333: if (!PetscBTLookup(is_primal, row_B)) {
7334: col = constraints_idxs[constraints_idxs_ptr[total_counts]];
7335: PetscCall(MatSetValue(localChangeOfBasisMatrix, row, row, 1.0, INSERT_VALUES));
7336: PetscCall(MatSetValue(localChangeOfBasisMatrix, row, col, constraints_data[constraints_data_ptr[total_counts] + j] / norm, INSERT_VALUES));
7337: } else {
7338: for (k = 0; k < size_of_constraint; k++) {
7339: col = constraints_idxs[constraints_idxs_ptr[total_counts] + k];
7340: if (row != col) {
7341: val = -constraints_data[constraints_data_ptr[total_counts] + k] / constraints_data[constraints_data_ptr[total_counts]];
7342: } else {
7343: val = constraints_data[constraints_data_ptr[total_counts]] / norm;
7344: }
7345: PetscCall(MatSetValue(localChangeOfBasisMatrix, row, col, val, INSERT_VALUES));
7346: }
7347: }
7348: }
7349: if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "\t-> using standard change of basis\n"));
7350: }
7351: } else {
7352: if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Constraint %" PetscInt_FMT " does not need a change of basis (size %" PetscInt_FMT ")\n", total_counts, size_of_constraint));
7353: }
7354: }
7356: /* free workspace */
7357: if (qr_needed) {
7358: if (pcbddc->dbg_flag) PetscCall(PetscFree(dbg_work));
7359: PetscCall(PetscFree(trs_rhs));
7360: PetscCall(PetscFree(qr_tau));
7361: PetscCall(PetscFree(qr_work));
7362: PetscCall(PetscFree(gqr_work));
7363: PetscCall(PetscFree(qr_basis));
7364: }
7365: PetscCall(PetscBTDestroy(&is_primal));
7366: PetscCall(MatAssemblyBegin(localChangeOfBasisMatrix, MAT_FINAL_ASSEMBLY));
7367: PetscCall(MatAssemblyEnd(localChangeOfBasisMatrix, MAT_FINAL_ASSEMBLY));
7369: /* assembling of global change of variable */
7370: if (!pcbddc->fake_change) {
7371: Mat tmat;
7373: PetscCall(VecGetSize(pcis->vec1_global, &global_size));
7374: PetscCall(VecGetLocalSize(pcis->vec1_global, &local_size));
7375: PetscCall(MatDuplicate(pc->pmat, MAT_DO_NOT_COPY_VALUES, &tmat));
7376: PetscCall(MatISSetLocalMat(tmat, localChangeOfBasisMatrix));
7377: PetscCall(MatAssemblyBegin(tmat, MAT_FINAL_ASSEMBLY));
7378: PetscCall(MatAssemblyEnd(tmat, MAT_FINAL_ASSEMBLY));
7379: PetscCall(MatConvert(tmat, MATAIJ, MAT_INITIAL_MATRIX, &pcbddc->ChangeOfBasisMatrix));
7380: PetscCall(MatDestroy(&tmat));
7381: PetscCall(VecSet(pcis->vec1_global, 0.0));
7382: PetscCall(VecSet(pcis->vec1_N, 1.0));
7383: PetscCall(VecScatterBegin(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE));
7384: PetscCall(VecScatterEnd(matis->rctx, pcis->vec1_N, pcis->vec1_global, ADD_VALUES, SCATTER_REVERSE));
7385: PetscCall(VecReciprocal(pcis->vec1_global));
7386: PetscCall(MatDiagonalScale(pcbddc->ChangeOfBasisMatrix, pcis->vec1_global, NULL));
7388: /* check */
7389: if (pcbddc->dbg_flag) {
7390: PetscReal error;
7391: Vec x, x_change;
7393: PetscCall(VecDuplicate(pcis->vec1_global, &x));
7394: PetscCall(VecDuplicate(pcis->vec1_global, &x_change));
7395: PetscCall(VecSetRandom(x, NULL));
7396: PetscCall(VecCopy(x, pcis->vec1_global));
7397: PetscCall(VecScatterBegin(matis->rctx, x, pcis->vec1_N, INSERT_VALUES, SCATTER_FORWARD));
7398: PetscCall(VecScatterEnd(matis->rctx, x, pcis->vec1_N, INSERT_VALUES, SCATTER_FORWARD));
7399: PetscCall(MatMult(localChangeOfBasisMatrix, pcis->vec1_N, pcis->vec2_N));
7400: PetscCall(VecScatterBegin(matis->rctx, pcis->vec2_N, x, INSERT_VALUES, SCATTER_REVERSE));
7401: PetscCall(VecScatterEnd(matis->rctx, pcis->vec2_N, x, INSERT_VALUES, SCATTER_REVERSE));
7402: PetscCall(MatMult(pcbddc->ChangeOfBasisMatrix, pcis->vec1_global, x_change));
7403: PetscCall(VecAXPY(x, -1.0, x_change));
7404: PetscCall(VecNorm(x, NORM_INFINITY, &error));
7405: PetscCheck(error <= PETSC_SMALL, PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "Error global vs local change on N: %1.6e", (double)error);
7406: PetscCall(VecDestroy(&x));
7407: PetscCall(VecDestroy(&x_change));
7408: }
7409: /* adapt sub_schurs computed (if any) */
7410: if (pcbddc->use_deluxe_scaling) {
7411: PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
7413: PetscCheck(!pcbddc->use_change_of_basis || !pcbddc->adaptive_userdefined, PetscObjectComm((PetscObject)pc), PETSC_ERR_SUP, "Cannot mix automatic change of basis, adaptive selection and user-defined constraints");
7414: if (sub_schurs && sub_schurs->S_Ej_all) {
7415: Mat S_new, tmat;
7416: IS is_all_N, is_V_Sall = NULL;
7418: PetscCall(ISLocalToGlobalMappingApplyIS(pcis->BtoNmap, sub_schurs->is_Ej_all, &is_all_N));
7419: PetscCall(MatCreateSubMatrix(localChangeOfBasisMatrix, is_all_N, is_all_N, MAT_INITIAL_MATRIX, &tmat));
7420: if (pcbddc->deluxe_zerorows) {
7421: ISLocalToGlobalMapping NtoSall;
7422: IS is_V;
7423: PetscCall(ISCreateGeneral(PETSC_COMM_SELF, pcbddc->n_vertices, pcbddc->local_primal_ref_node, PETSC_COPY_VALUES, &is_V));
7424: PetscCall(ISLocalToGlobalMappingCreateIS(is_all_N, &NtoSall));
7425: PetscCall(ISGlobalToLocalMappingApplyIS(NtoSall, IS_GTOLM_DROP, is_V, &is_V_Sall));
7426: PetscCall(ISLocalToGlobalMappingDestroy(&NtoSall));
7427: PetscCall(ISDestroy(&is_V));
7428: }
7429: PetscCall(ISDestroy(&is_all_N));
7430: PetscCall(MatPtAP(sub_schurs->S_Ej_all, tmat, MAT_INITIAL_MATRIX, 1.0, &S_new));
7431: PetscCall(MatDestroy(&sub_schurs->S_Ej_all));
7432: PetscCall(PetscObjectReference((PetscObject)S_new));
7433: if (pcbddc->deluxe_zerorows) {
7434: const PetscScalar *array;
7435: const PetscInt *idxs_V, *idxs_all;
7436: PetscInt i, n_V;
7438: PetscCall(MatZeroRowsColumnsIS(S_new, is_V_Sall, 1., NULL, NULL));
7439: PetscCall(ISGetLocalSize(is_V_Sall, &n_V));
7440: PetscCall(ISGetIndices(is_V_Sall, &idxs_V));
7441: PetscCall(ISGetIndices(sub_schurs->is_Ej_all, &idxs_all));
7442: PetscCall(VecGetArrayRead(pcis->D, &array));
7443: for (i = 0; i < n_V; i++) {
7444: PetscScalar val;
7445: PetscInt idx;
7447: idx = idxs_V[i];
7448: val = array[idxs_all[idxs_V[i]]];
7449: PetscCall(MatSetValue(S_new, idx, idx, val, INSERT_VALUES));
7450: }
7451: PetscCall(MatAssemblyBegin(S_new, MAT_FINAL_ASSEMBLY));
7452: PetscCall(MatAssemblyEnd(S_new, MAT_FINAL_ASSEMBLY));
7453: PetscCall(VecRestoreArrayRead(pcis->D, &array));
7454: PetscCall(ISRestoreIndices(sub_schurs->is_Ej_all, &idxs_all));
7455: PetscCall(ISRestoreIndices(is_V_Sall, &idxs_V));
7456: }
7457: sub_schurs->S_Ej_all = S_new;
7458: PetscCall(MatDestroy(&S_new));
7459: if (sub_schurs->sum_S_Ej_all) {
7460: PetscCall(MatPtAP(sub_schurs->sum_S_Ej_all, tmat, MAT_INITIAL_MATRIX, 1.0, &S_new));
7461: PetscCall(MatDestroy(&sub_schurs->sum_S_Ej_all));
7462: PetscCall(PetscObjectReference((PetscObject)S_new));
7463: if (pcbddc->deluxe_zerorows) PetscCall(MatZeroRowsColumnsIS(S_new, is_V_Sall, 1., NULL, NULL));
7464: sub_schurs->sum_S_Ej_all = S_new;
7465: PetscCall(MatDestroy(&S_new));
7466: }
7467: PetscCall(ISDestroy(&is_V_Sall));
7468: PetscCall(MatDestroy(&tmat));
7469: }
7470: /* destroy any change of basis context in sub_schurs */
7471: if (sub_schurs && sub_schurs->change) {
7472: PetscInt i;
7474: for (i = 0; i < sub_schurs->n_subs; i++) PetscCall(KSPDestroy(&sub_schurs->change[i]));
7475: PetscCall(PetscFree(sub_schurs->change));
7476: }
7477: }
7478: if (pcbddc->switch_static) { /* need to save the local change */
7479: pcbddc->switch_static_change = localChangeOfBasisMatrix;
7480: } else {
7481: PetscCall(MatDestroy(&localChangeOfBasisMatrix));
7482: }
7483: /* determine if any process has changed the pressures locally */
7484: pcbddc->change_interior = pcbddc->benign_have_null;
7485: } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */
7486: PetscCall(MatDestroy(&pcbddc->ConstraintMatrix));
7487: pcbddc->ConstraintMatrix = localChangeOfBasisMatrix;
7488: pcbddc->use_qr_single = qr_needed;
7489: }
7490: } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) {
7491: if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) {
7492: PetscCall(PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix));
7493: pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix;
7494: } else {
7495: Mat benign_global = NULL;
7496: if (pcbddc->benign_have_null) {
7497: Mat M;
7499: pcbddc->change_interior = PETSC_TRUE;
7500: PetscCall(VecCopy(matis->counter, pcis->vec1_N));
7501: PetscCall(VecReciprocal(pcis->vec1_N));
7502: PetscCall(MatDuplicate(pc->pmat, MAT_DO_NOT_COPY_VALUES, &benign_global));
7503: if (pcbddc->benign_change) {
7504: PetscCall(MatDuplicate(pcbddc->benign_change, MAT_COPY_VALUES, &M));
7505: PetscCall(MatDiagonalScale(M, pcis->vec1_N, NULL));
7506: } else {
7507: PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, pcis->n, pcis->n, 1, NULL, &M));
7508: PetscCall(MatDiagonalSet(M, pcis->vec1_N, INSERT_VALUES));
7509: }
7510: PetscCall(MatISSetLocalMat(benign_global, M));
7511: PetscCall(MatDestroy(&M));
7512: PetscCall(MatAssemblyBegin(benign_global, MAT_FINAL_ASSEMBLY));
7513: PetscCall(MatAssemblyEnd(benign_global, MAT_FINAL_ASSEMBLY));
7514: }
7515: if (pcbddc->user_ChangeOfBasisMatrix) {
7516: PetscCall(MatMatMult(pcbddc->user_ChangeOfBasisMatrix, benign_global, MAT_INITIAL_MATRIX, PETSC_DETERMINE, &pcbddc->ChangeOfBasisMatrix));
7517: PetscCall(MatDestroy(&benign_global));
7518: } else if (pcbddc->benign_have_null) {
7519: pcbddc->ChangeOfBasisMatrix = benign_global;
7520: }
7521: }
7522: if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */
7523: IS is_global;
7524: const PetscInt *gidxs;
7526: PetscCall(ISLocalToGlobalMappingGetIndices(matis->rmapping, &gidxs));
7527: PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), pcis->n, gidxs, PETSC_COPY_VALUES, &is_global));
7528: PetscCall(ISLocalToGlobalMappingRestoreIndices(matis->rmapping, &gidxs));
7529: PetscCall(MatCreateSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix, is_global, is_global, &pcbddc->switch_static_change));
7530: PetscCall(ISDestroy(&is_global));
7531: }
7532: }
7533: if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) PetscCall(VecDuplicate(pcis->vec1_global, &pcbddc->work_change));
7535: if (!pcbddc->fake_change) {
7536: /* add pressure dofs to set of primal nodes for numbering purposes */
7537: for (i = 0; i < pcbddc->benign_n; i++) {
7538: pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i];
7539: pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i];
7540: pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1;
7541: pcbddc->local_primal_size_cc++;
7542: pcbddc->local_primal_size++;
7543: }
7545: /* check if a new primal space has been introduced (also take into account benign trick) */
7546: pcbddc->new_primal_space_local = PETSC_TRUE;
7547: if (olocal_primal_size == pcbddc->local_primal_size) {
7548: PetscCall(PetscArraycmp(pcbddc->local_primal_ref_node, olocal_primal_ref_node, olocal_primal_size_cc, &pcbddc->new_primal_space_local));
7549: pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
7550: if (!pcbddc->new_primal_space_local) {
7551: PetscCall(PetscArraycmp(pcbddc->local_primal_ref_mult, olocal_primal_ref_mult, olocal_primal_size_cc, &pcbddc->new_primal_space_local));
7552: pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local);
7553: }
7554: }
7555: /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */
7556: PetscCallMPI(MPIU_Allreduce(&pcbddc->new_primal_space_local, &pcbddc->new_primal_space, 1, MPI_C_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc)));
7557: }
7558: PetscCall(PetscFree2(olocal_primal_ref_node, olocal_primal_ref_mult));
7560: /* flush dbg viewer */
7561: if (pcbddc->dbg_flag) PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
7563: /* free workspace */
7564: PetscCall(PetscBTDestroy(&qr_needed_idx));
7565: PetscCall(PetscBTDestroy(&change_basis));
7566: if (!pcbddc->adaptive_selection) {
7567: PetscCall(PetscFree3(constraints_idxs_ptr, constraints_data_ptr, constraints_n));
7568: PetscCall(PetscFree3(constraints_data, constraints_idxs, constraints_idxs_B));
7569: } else {
7570: PetscCall(PetscFree5(pcbddc->adaptive_constraints_n, pcbddc->adaptive_constraints_idxs_ptr, pcbddc->adaptive_constraints_data_ptr, pcbddc->adaptive_constraints_idxs, pcbddc->adaptive_constraints_data));
7571: PetscCall(PetscFree(constraints_n));
7572: PetscCall(PetscFree(constraints_idxs_B));
7573: }
7574: PetscFunctionReturn(PETSC_SUCCESS);
7575: }
7577: PetscErrorCode PCBDDCAnalyzeInterface(PC pc)
7578: {
7579: ISLocalToGlobalMapping map;
7580: PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
7581: Mat_IS *matis = (Mat_IS *)pc->pmat->data;
7582: PetscInt i, N;
7583: PetscBool rcsr = PETSC_FALSE;
7585: PetscFunctionBegin;
7586: if (pcbddc->recompute_topography) {
7587: pcbddc->graphanalyzed = PETSC_FALSE;
7588: /* Reset previously computed graph */
7589: PetscCall(PCBDDCGraphReset(pcbddc->mat_graph));
7590: /* Init local Graph struct */
7591: PetscCall(MatGetSize(pc->pmat, &N, NULL));
7592: PetscCall(MatISGetLocalToGlobalMapping(pc->pmat, &map, NULL));
7593: PetscCall(PCBDDCGraphInit(pcbddc->mat_graph, map, N, pcbddc->graphmaxcount));
7595: if (pcbddc->user_primal_vertices_local && !pcbddc->user_primal_vertices) PetscCall(PCBDDCConsistencyCheckIS(pc, MPI_LOR, &pcbddc->user_primal_vertices_local));
7596: /* Check validity of the csr graph passed in by the user */
7597: PetscCheck(!pcbddc->mat_graph->nvtxs_csr || pcbddc->mat_graph->nvtxs_csr == pcbddc->mat_graph->nvtxs, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "Invalid size of local CSR graph! Found %" PetscInt_FMT ", expected %" PetscInt_FMT, pcbddc->mat_graph->nvtxs_csr,
7598: pcbddc->mat_graph->nvtxs);
7600: /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */
7601: if (!pcbddc->mat_graph->xadj && pcbddc->use_local_adj) {
7602: PetscInt *xadj, *adjncy;
7603: PetscInt nvtxs;
7604: PetscBool flg_row;
7605: Mat A;
7607: PetscCall(PetscObjectReference((PetscObject)matis->A));
7608: A = matis->A;
7609: for (PetscInt i = 0; i < pcbddc->local_adj_square; i++) {
7610: Mat AtA;
7612: PetscCall(MatProductCreate(A, A, NULL, &AtA));
7613: PetscCall(MatSetOptionsPrefix(AtA, "pc_bddc_graph_"));
7614: PetscCall(MatProductSetType(AtA, MATPRODUCT_AtB));
7615: PetscCall(MatProductSetFromOptions(AtA));
7616: PetscCall(MatProductSymbolic(AtA));
7617: PetscCall(MatProductClear(AtA));
7618: /* we only need the sparsity, cheat and tell PETSc the matrix has been assembled */
7619: AtA->assembled = PETSC_TRUE;
7620: PetscCall(MatDestroy(&A));
7621: A = AtA;
7622: }
7623: PetscCall(MatGetRowIJ(A, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row));
7624: if (flg_row) {
7625: PetscCall(PCBDDCSetLocalAdjacencyGraph(pc, nvtxs, xadj, adjncy, PETSC_COPY_VALUES));
7626: pcbddc->computed_rowadj = PETSC_TRUE;
7627: PetscCall(MatRestoreRowIJ(A, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, (const PetscInt **)&xadj, (const PetscInt **)&adjncy, &flg_row));
7628: rcsr = PETSC_TRUE;
7629: }
7630: PetscCall(MatDestroy(&A));
7631: }
7632: if (pcbddc->dbg_flag) PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
7634: if (pcbddc->mat_graph->cdim && !pcbddc->mat_graph->cloc) {
7635: PetscReal *lcoords;
7636: PetscInt n;
7637: MPI_Datatype dimrealtype;
7638: PetscMPIInt cdimi;
7640: /* TODO: support for blocked */
7641: PetscCheck(pcbddc->mat_graph->cnloc == pc->pmat->rmap->n, PETSC_COMM_SELF, PETSC_ERR_USER, "Invalid number of local coordinates! Got %" PetscInt_FMT ", expected %" PetscInt_FMT, pcbddc->mat_graph->cnloc, pc->pmat->rmap->n);
7642: PetscCall(MatGetLocalSize(matis->A, &n, NULL));
7643: PetscCall(PetscMalloc1(pcbddc->mat_graph->cdim * n, &lcoords));
7644: PetscCall(PetscMPIIntCast(pcbddc->mat_graph->cdim, &cdimi));
7645: PetscCallMPI(MPI_Type_contiguous(cdimi, MPIU_REAL, &dimrealtype));
7646: PetscCallMPI(MPI_Type_commit(&dimrealtype));
7647: PetscCall(PetscSFBcastBegin(matis->sf, dimrealtype, pcbddc->mat_graph->coords, lcoords, MPI_REPLACE));
7648: PetscCall(PetscSFBcastEnd(matis->sf, dimrealtype, pcbddc->mat_graph->coords, lcoords, MPI_REPLACE));
7649: PetscCallMPI(MPI_Type_free(&dimrealtype));
7650: PetscCall(PetscFree(pcbddc->mat_graph->coords));
7652: pcbddc->mat_graph->coords = lcoords;
7653: pcbddc->mat_graph->cloc = PETSC_TRUE;
7654: pcbddc->mat_graph->cnloc = n;
7655: }
7656: PetscCheck(!pcbddc->mat_graph->cnloc || pcbddc->mat_graph->cnloc == pcbddc->mat_graph->nvtxs, PETSC_COMM_SELF, PETSC_ERR_USER, "Invalid number of local subdomain coordinates! Got %" PetscInt_FMT ", expected %" PetscInt_FMT, pcbddc->mat_graph->cnloc,
7657: pcbddc->mat_graph->nvtxs);
7658: pcbddc->mat_graph->active_coords = (PetscBool)(pcbddc->corner_selection && pcbddc->mat_graph->cdim && !pcbddc->corner_selected);
7660: /* attach info on disconnected subdomains if present */
7661: if (pcbddc->n_local_subs) {
7662: PetscInt *local_subs, n, totn;
7664: PetscCall(MatGetLocalSize(matis->A, &n, NULL));
7665: PetscCall(PetscMalloc1(n, &local_subs));
7666: for (i = 0; i < n; i++) local_subs[i] = pcbddc->n_local_subs;
7667: for (i = 0; i < pcbddc->n_local_subs; i++) {
7668: const PetscInt *idxs;
7669: PetscInt nl, j;
7671: PetscCall(ISGetLocalSize(pcbddc->local_subs[i], &nl));
7672: PetscCall(ISGetIndices(pcbddc->local_subs[i], &idxs));
7673: for (j = 0; j < nl; j++) local_subs[idxs[j]] = i;
7674: PetscCall(ISRestoreIndices(pcbddc->local_subs[i], &idxs));
7675: }
7676: for (i = 0, totn = 0; i < n; i++) totn = PetscMax(totn, local_subs[i]);
7677: pcbddc->mat_graph->n_local_subs = totn + 1;
7678: pcbddc->mat_graph->local_subs = local_subs;
7679: }
7681: /* Setup of Graph */
7682: PetscCall(PCBDDCGraphSetUp(pcbddc->mat_graph, pcbddc->vertex_size, pcbddc->NeumannBoundariesLocal, pcbddc->DirichletBoundariesLocal, pcbddc->n_ISForDofsLocal, pcbddc->ISForDofsLocal, pcbddc->user_primal_vertices_local));
7683: }
7685: if (!pcbddc->graphanalyzed) {
7686: /* Graph's connected components analysis */
7687: PetscCall(PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph));
7688: pcbddc->graphanalyzed = PETSC_TRUE;
7689: pcbddc->corner_selected = pcbddc->corner_selection;
7690: }
7691: if (rcsr) pcbddc->mat_graph->nvtxs_csr = 0;
7692: PetscFunctionReturn(PETSC_SUCCESS);
7693: }
7695: PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt *nio, Vec vecs[])
7696: {
7697: PetscInt i, j, n;
7698: PetscScalar *alphas;
7699: PetscReal norm, *onorms;
7701: PetscFunctionBegin;
7702: n = *nio;
7703: if (!n) PetscFunctionReturn(PETSC_SUCCESS);
7704: PetscCall(PetscMalloc2(n, &alphas, n, &onorms));
7705: PetscCall(VecNormalize(vecs[0], &norm));
7706: if (norm < PETSC_SMALL) {
7707: onorms[0] = 0.0;
7708: PetscCall(VecSet(vecs[0], 0.0));
7709: } else {
7710: onorms[0] = norm;
7711: }
7713: for (i = 1; i < n; i++) {
7714: PetscCall(VecMDot(vecs[i], i, vecs, alphas));
7715: for (j = 0; j < i; j++) alphas[j] = PetscConj(-alphas[j]);
7716: PetscCall(VecMAXPY(vecs[i], i, alphas, vecs));
7717: PetscCall(VecNormalize(vecs[i], &norm));
7718: if (norm < PETSC_SMALL) {
7719: onorms[i] = 0.0;
7720: PetscCall(VecSet(vecs[i], 0.0));
7721: } else {
7722: onorms[i] = norm;
7723: }
7724: }
7725: /* push nonzero vectors at the beginning */
7726: for (i = 0; i < n; i++) {
7727: if (onorms[i] == 0.0) {
7728: for (j = i + 1; j < n; j++) {
7729: if (onorms[j] != 0.0) {
7730: PetscCall(VecCopy(vecs[j], vecs[i]));
7731: onorms[i] = onorms[j];
7732: onorms[j] = 0.0;
7733: break;
7734: }
7735: }
7736: }
7737: }
7738: for (i = 0, *nio = 0; i < n; i++) *nio += onorms[i] != 0.0 ? 1 : 0;
7739: PetscCall(PetscFree2(alphas, onorms));
7740: PetscFunctionReturn(PETSC_SUCCESS);
7741: }
7743: static PetscErrorCode PCBDDCMatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS *is_sends, PetscBool *have_void)
7744: {
7745: ISLocalToGlobalMapping mapping;
7746: Mat A;
7747: PetscInt n_neighs, *neighs, *n_shared, **shared;
7748: PetscMPIInt size, rank, color;
7749: PetscInt *xadj, *adjncy;
7750: PetscInt *adjncy_wgt, *v_wgt, *ranks_send_to_idx;
7751: PetscInt im_active, active_procs, N, n, i, j, threshold = 2;
7752: PetscInt void_procs, *procs_candidates = NULL;
7753: PetscInt xadj_count, *count;
7754: PetscBool ismatis, use_vwgt = PETSC_FALSE;
7755: PetscSubcomm psubcomm;
7756: MPI_Comm subcomm;
7758: PetscFunctionBegin;
7760: PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATIS, &ismatis));
7761: PetscCheck(ismatis, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Cannot use %s on a matrix object which is not of type MATIS", PETSC_FUNCTION_NAME);
7764: PetscCheck(*n_subdomains > 0, PetscObjectComm((PetscObject)mat), PETSC_ERR_ARG_WRONG, "Invalid number of subdomains requested %" PetscInt_FMT, *n_subdomains);
7766: if (have_void) *have_void = PETSC_FALSE;
7767: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)mat), &size));
7768: PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)mat), &rank));
7769: PetscCall(MatISGetLocalMat(mat, &A));
7770: PetscCall(MatGetLocalSize(A, &n, NULL));
7771: im_active = !!n;
7772: PetscCallMPI(MPIU_Allreduce(&im_active, &active_procs, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)mat)));
7773: void_procs = size - active_procs;
7774: /* get ranks of non-active processes in mat communicator */
7775: if (void_procs) {
7776: PetscInt ncand;
7778: if (have_void) *have_void = PETSC_TRUE;
7779: PetscCall(PetscMalloc1(size, &procs_candidates));
7780: PetscCallMPI(MPI_Allgather(&im_active, 1, MPIU_INT, procs_candidates, 1, MPIU_INT, PetscObjectComm((PetscObject)mat)));
7781: for (i = 0, ncand = 0; i < size; i++) {
7782: if (!procs_candidates[i]) procs_candidates[ncand++] = i;
7783: }
7784: /* force n_subdomains to be not greater that the number of non-active processes */
7785: *n_subdomains = PetscMin(void_procs, *n_subdomains);
7786: }
7788: /* number of subdomains requested greater than active processes or matrix size -> just shift the matrix
7789: number of subdomains requested 1 -> send to rank-0 or first candidate in voids */
7790: PetscCall(MatGetSize(mat, &N, NULL));
7791: if (active_procs < *n_subdomains || *n_subdomains == 1 || N <= *n_subdomains) {
7792: PetscInt issize, isidx, dest;
7793: PetscBool default_sub;
7795: if (*n_subdomains == 1) dest = 0;
7796: else dest = rank;
7797: if (im_active) {
7798: issize = 1;
7799: if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */
7800: isidx = procs_candidates[dest];
7801: } else {
7802: isidx = dest;
7803: }
7804: } else {
7805: issize = 0;
7806: isidx = rank;
7807: }
7808: if (*n_subdomains != 1) *n_subdomains = active_procs;
7809: PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)mat), issize, &isidx, PETSC_COPY_VALUES, is_sends));
7810: default_sub = (PetscBool)(isidx == rank);
7811: PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &default_sub, 1, MPI_C_BOOL, MPI_LAND, PetscObjectComm((PetscObject)mat)));
7812: if (default_sub) PetscCall(PetscObjectSetName((PetscObject)*is_sends, "default subassembling"));
7813: PetscCall(PetscFree(procs_candidates));
7814: PetscFunctionReturn(PETSC_SUCCESS);
7815: }
7816: PetscCall(PetscOptionsGetBool(NULL, ((PetscObject)A)->prefix, "-mat_is_partitioning_use_vwgt", &use_vwgt, NULL));
7817: PetscCall(PetscOptionsGetInt(NULL, ((PetscObject)A)->prefix, "-mat_is_partitioning_threshold", &threshold, NULL));
7818: threshold = PetscMax(threshold, 2);
7820: /* Get info on mapping */
7821: PetscCall(MatISGetLocalToGlobalMapping(mat, &mapping, NULL));
7822: PetscCall(ISLocalToGlobalMappingGetInfo(mapping, &n_neighs, &neighs, &n_shared, &shared));
7824: /* build local CSR graph of subdomains' connectivity */
7825: PetscCall(PetscMalloc1(2, &xadj));
7826: xadj[0] = 0;
7827: xadj[1] = PetscMax(n_neighs - 1, 0);
7828: PetscCall(PetscMalloc1(xadj[1], &adjncy));
7829: PetscCall(PetscMalloc1(xadj[1], &adjncy_wgt));
7830: PetscCall(PetscCalloc1(n, &count));
7831: for (i = 1; i < n_neighs; i++)
7832: for (j = 0; j < n_shared[i]; j++) count[shared[i][j]] += 1;
7834: xadj_count = 0;
7835: for (i = 1; i < n_neighs; i++) {
7836: for (j = 0; j < n_shared[i]; j++) {
7837: if (count[shared[i][j]] < threshold) {
7838: adjncy[xadj_count] = neighs[i];
7839: adjncy_wgt[xadj_count] = n_shared[i];
7840: xadj_count++;
7841: break;
7842: }
7843: }
7844: }
7845: xadj[1] = xadj_count;
7846: PetscCall(PetscFree(count));
7847: PetscCall(ISLocalToGlobalMappingRestoreInfo(mapping, &n_neighs, &neighs, &n_shared, &shared));
7848: PetscCall(PetscSortIntWithArray(xadj[1], adjncy, adjncy_wgt));
7850: PetscCall(PetscMalloc1(1, &ranks_send_to_idx));
7852: /* Restrict work on active processes only */
7853: PetscCall(PetscMPIIntCast(im_active, &color));
7854: if (void_procs) {
7855: PetscCall(PetscSubcommCreate(PetscObjectComm((PetscObject)mat), &psubcomm));
7856: PetscCall(PetscSubcommSetNumber(psubcomm, 2)); /* 2 groups, active process and not active processes */
7857: PetscCall(PetscSubcommSetTypeGeneral(psubcomm, color, rank));
7858: subcomm = PetscSubcommChild(psubcomm);
7859: } else {
7860: psubcomm = NULL;
7861: subcomm = PetscObjectComm((PetscObject)mat);
7862: }
7864: v_wgt = NULL;
7865: if (!color) {
7866: PetscCall(PetscFree(xadj));
7867: PetscCall(PetscFree(adjncy));
7868: PetscCall(PetscFree(adjncy_wgt));
7869: } else {
7870: Mat subdomain_adj;
7871: IS new_ranks, new_ranks_contig;
7872: MatPartitioning partitioner;
7873: PetscInt rstart, rend;
7874: PetscMPIInt irstart = 0, irend = 0;
7875: PetscInt *is_indices, *oldranks;
7876: PetscMPIInt size;
7877: PetscBool aggregate;
7879: PetscCallMPI(MPI_Comm_size(subcomm, &size));
7880: if (void_procs) {
7881: PetscInt prank = rank;
7882: PetscCall(PetscMalloc1(size, &oldranks));
7883: PetscCallMPI(MPI_Allgather(&prank, 1, MPIU_INT, oldranks, 1, MPIU_INT, subcomm));
7884: for (i = 0; i < xadj[1]; i++) PetscCall(PetscFindInt(adjncy[i], size, oldranks, &adjncy[i]));
7885: PetscCall(PetscSortIntWithArray(xadj[1], adjncy, adjncy_wgt));
7886: } else {
7887: oldranks = NULL;
7888: }
7889: aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE);
7890: if (aggregate) { /* TODO: all this part could be made more efficient */
7891: PetscInt lrows, row, ncols, *cols;
7892: PetscMPIInt nrank;
7893: PetscScalar *vals;
7895: PetscCallMPI(MPI_Comm_rank(subcomm, &nrank));
7896: lrows = 0;
7897: if (nrank < redprocs) {
7898: lrows = size / redprocs;
7899: if (nrank < size % redprocs) lrows++;
7900: }
7901: PetscCall(MatCreateAIJ(subcomm, lrows, lrows, size, size, 50, NULL, 50, NULL, &subdomain_adj));
7902: PetscCall(MatGetOwnershipRange(subdomain_adj, &rstart, &rend));
7903: PetscCall(PetscMPIIntCast(rstart, &irstart));
7904: PetscCall(PetscMPIIntCast(rend, &irend));
7905: PetscCall(MatSetOption(subdomain_adj, MAT_NEW_NONZERO_LOCATION_ERR, PETSC_FALSE));
7906: PetscCall(MatSetOption(subdomain_adj, MAT_NEW_NONZERO_ALLOCATION_ERR, PETSC_FALSE));
7907: row = nrank;
7908: ncols = xadj[1] - xadj[0];
7909: cols = adjncy;
7910: PetscCall(PetscMalloc1(ncols, &vals));
7911: for (i = 0; i < ncols; i++) vals[i] = adjncy_wgt[i];
7912: PetscCall(MatSetValues(subdomain_adj, 1, &row, ncols, cols, vals, INSERT_VALUES));
7913: PetscCall(MatAssemblyBegin(subdomain_adj, MAT_FINAL_ASSEMBLY));
7914: PetscCall(MatAssemblyEnd(subdomain_adj, MAT_FINAL_ASSEMBLY));
7915: PetscCall(PetscFree(xadj));
7916: PetscCall(PetscFree(adjncy));
7917: PetscCall(PetscFree(adjncy_wgt));
7918: PetscCall(PetscFree(vals));
7919: if (use_vwgt) {
7920: Vec v;
7921: const PetscScalar *array;
7922: PetscInt nl;
7924: PetscCall(MatCreateVecs(subdomain_adj, &v, NULL));
7925: PetscCall(VecSetValue(v, row, (PetscScalar)n, INSERT_VALUES));
7926: PetscCall(VecAssemblyBegin(v));
7927: PetscCall(VecAssemblyEnd(v));
7928: PetscCall(VecGetLocalSize(v, &nl));
7929: PetscCall(VecGetArrayRead(v, &array));
7930: PetscCall(PetscMalloc1(nl, &v_wgt));
7931: for (i = 0; i < nl; i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]);
7932: PetscCall(VecRestoreArrayRead(v, &array));
7933: PetscCall(VecDestroy(&v));
7934: }
7935: } else {
7936: PetscCall(MatCreateMPIAdj(subcomm, 1, size, xadj, adjncy, adjncy_wgt, &subdomain_adj));
7937: if (use_vwgt) {
7938: PetscCall(PetscMalloc1(1, &v_wgt));
7939: v_wgt[0] = n;
7940: }
7941: }
7942: /* PetscCall(MatView(subdomain_adj,0)); */
7944: /* Partition */
7945: PetscCall(MatPartitioningCreate(subcomm, &partitioner));
7946: #if defined(PETSC_HAVE_PTSCOTCH)
7947: PetscCall(MatPartitioningSetType(partitioner, MATPARTITIONINGPTSCOTCH));
7948: #elif defined(PETSC_HAVE_PARMETIS)
7949: PetscCall(MatPartitioningSetType(partitioner, MATPARTITIONINGPARMETIS));
7950: #else
7951: PetscCall(MatPartitioningSetType(partitioner, MATPARTITIONINGAVERAGE));
7952: #endif
7953: PetscCall(MatPartitioningSetAdjacency(partitioner, subdomain_adj));
7954: if (v_wgt) PetscCall(MatPartitioningSetVertexWeights(partitioner, v_wgt));
7955: *n_subdomains = PetscMin(size, *n_subdomains);
7956: PetscCall(MatPartitioningSetNParts(partitioner, *n_subdomains));
7957: PetscCall(MatPartitioningSetFromOptions(partitioner));
7958: PetscCall(MatPartitioningApply(partitioner, &new_ranks));
7959: /* PetscCall(MatPartitioningView(partitioner,0)); */
7961: /* renumber new_ranks to avoid "holes" in new set of processors */
7962: PetscCall(ISRenumber(new_ranks, NULL, NULL, &new_ranks_contig));
7963: PetscCall(ISDestroy(&new_ranks));
7964: PetscCall(ISGetIndices(new_ranks_contig, (const PetscInt **)&is_indices));
7965: if (!aggregate) {
7966: if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */
7967: PetscAssert(oldranks, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen");
7968: ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]];
7969: } else if (oldranks) {
7970: ranks_send_to_idx[0] = oldranks[is_indices[0]];
7971: } else {
7972: ranks_send_to_idx[0] = is_indices[0];
7973: }
7974: } else {
7975: PetscInt idx = 0;
7976: PetscMPIInt tag;
7977: MPI_Request *reqs;
7979: PetscCall(PetscObjectGetNewTag((PetscObject)subdomain_adj, &tag));
7980: PetscCall(PetscMalloc1(rend - rstart, &reqs));
7981: for (PetscMPIInt i = irstart; i < irend; i++) PetscCallMPI(MPIU_Isend(is_indices + i - rstart, 1, MPIU_INT, i, tag, subcomm, &reqs[i - rstart]));
7982: PetscCallMPI(MPIU_Recv(&idx, 1, MPIU_INT, MPI_ANY_SOURCE, tag, subcomm, MPI_STATUS_IGNORE));
7983: PetscCallMPI(MPI_Waitall(irend - irstart, reqs, MPI_STATUSES_IGNORE));
7984: PetscCall(PetscFree(reqs));
7985: if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */
7986: PetscAssert(oldranks, PETSC_COMM_SELF, PETSC_ERR_PLIB, "This should not happen");
7987: ranks_send_to_idx[0] = procs_candidates[oldranks[idx]];
7988: } else if (oldranks) {
7989: ranks_send_to_idx[0] = oldranks[idx];
7990: } else {
7991: ranks_send_to_idx[0] = idx;
7992: }
7993: }
7994: PetscCall(ISRestoreIndices(new_ranks_contig, (const PetscInt **)&is_indices));
7995: /* clean up */
7996: PetscCall(PetscFree(oldranks));
7997: PetscCall(ISDestroy(&new_ranks_contig));
7998: PetscCall(MatDestroy(&subdomain_adj));
7999: PetscCall(MatPartitioningDestroy(&partitioner));
8000: }
8001: PetscCall(PetscSubcommDestroy(&psubcomm));
8002: PetscCall(PetscFree(procs_candidates));
8004: /* assemble parallel IS for sends */
8005: i = 1;
8006: if (!color) i = 0;
8007: PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)mat), i, ranks_send_to_idx, PETSC_OWN_POINTER, is_sends));
8008: PetscFunctionReturn(PETSC_SUCCESS);
8009: }
8011: typedef enum {
8012: MATDENSE_PRIVATE = 0,
8013: MATAIJ_PRIVATE,
8014: MATBAIJ_PRIVATE,
8015: MATSBAIJ_PRIVATE
8016: } MatTypePrivate;
8018: static PetscErrorCode PCBDDCMatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[])
8019: {
8020: Mat local_mat;
8021: IS is_sends_internal;
8022: PetscInt rows, cols, new_local_rows;
8023: PetscInt i, bs, buf_size_idxs, buf_size_idxs_is, buf_size_vals, buf_size_vecs;
8024: PetscBool ismatis, isdense, newisdense, destroy_mat;
8025: ISLocalToGlobalMapping l2gmap;
8026: PetscInt *l2gmap_indices;
8027: const PetscInt *is_indices;
8028: MatType new_local_type;
8029: /* buffers */
8030: PetscInt *ptr_idxs, *send_buffer_idxs, *recv_buffer_idxs;
8031: PetscInt *ptr_idxs_is, *send_buffer_idxs_is, *recv_buffer_idxs_is;
8032: PetscInt *recv_buffer_idxs_local;
8033: PetscScalar *ptr_vals, *recv_buffer_vals;
8034: const PetscScalar *send_buffer_vals;
8035: PetscScalar *ptr_vecs, *send_buffer_vecs, *recv_buffer_vecs;
8036: /* MPI */
8037: MPI_Comm comm, comm_n;
8038: PetscSubcomm subcomm;
8039: PetscMPIInt n_sends, n_recvs, size;
8040: PetscMPIInt *iflags, *ilengths_idxs, *ilengths_vals, *ilengths_idxs_is;
8041: PetscMPIInt *onodes, *onodes_is, *olengths_idxs, *olengths_idxs_is, *olengths_vals;
8042: PetscMPIInt len, tag_idxs, tag_idxs_is, tag_vals, tag_vecs, source_dest;
8043: MPI_Request *send_req_idxs, *send_req_idxs_is, *send_req_vals, *send_req_vecs;
8044: MPI_Request *recv_req_idxs, *recv_req_idxs_is, *recv_req_vals, *recv_req_vecs;
8046: PetscFunctionBegin;
8048: PetscCall(PetscObjectTypeCompare((PetscObject)mat, MATIS, &ismatis));
8049: PetscCheck(ismatis, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Cannot use %s on a matrix object which is not of type MATIS", PETSC_FUNCTION_NAME);
8056: if (nvecs) {
8057: PetscCheck(nvecs <= 1, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Just 1 vector supported");
8059: }
8060: /* further checks */
8061: PetscCall(MatISGetLocalMat(mat, &local_mat));
8062: PetscCall(PetscObjectTypeCompare((PetscObject)local_mat, MATSEQDENSE, &isdense));
8063: PetscCheck(isdense, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE");
8065: PetscCall(MatGetSize(local_mat, &rows, &cols));
8066: PetscCheck(rows == cols, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Local MATIS matrices should be square");
8067: if (reuse && *mat_n) {
8068: PetscInt mrows, mcols, mnrows, mncols;
8070: PetscCall(PetscObjectTypeCompare((PetscObject)*mat_n, MATIS, &ismatis));
8071: PetscCheck(ismatis, PetscObjectComm((PetscObject)*mat_n), PETSC_ERR_SUP, "Cannot reuse a matrix which is not of type MATIS");
8072: PetscCall(MatGetSize(mat, &mrows, &mcols));
8073: PetscCall(MatGetSize(*mat_n, &mnrows, &mncols));
8074: PetscCheck(mrows == mnrows, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Cannot reuse matrix! Wrong number of rows %" PetscInt_FMT " != %" PetscInt_FMT, mrows, mnrows);
8075: PetscCheck(mcols == mncols, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Cannot reuse matrix! Wrong number of cols %" PetscInt_FMT " != %" PetscInt_FMT, mcols, mncols);
8076: }
8077: PetscCall(MatGetBlockSize(local_mat, &bs));
8080: /* prepare IS for sending if not provided */
8081: if (!is_sends) {
8082: PetscCheck(n_subdomains, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "You should specify either an IS or a target number of subdomains");
8083: PetscCall(PCBDDCMatISGetSubassemblingPattern(mat, &n_subdomains, 0, &is_sends_internal, NULL));
8084: } else {
8085: PetscCall(PetscObjectReference((PetscObject)is_sends));
8086: is_sends_internal = is_sends;
8087: }
8089: /* get comm */
8090: PetscCall(PetscObjectGetComm((PetscObject)mat, &comm));
8092: /* compute number of sends */
8093: PetscCall(ISGetLocalSize(is_sends_internal, &i));
8094: PetscCall(PetscMPIIntCast(i, &n_sends));
8096: /* compute number of receives */
8097: PetscCallMPI(MPI_Comm_size(comm, &size));
8098: PetscCall(PetscMalloc1(size, &iflags));
8099: PetscCall(PetscArrayzero(iflags, size));
8100: PetscCall(ISGetIndices(is_sends_internal, &is_indices));
8101: for (i = 0; i < n_sends; i++) iflags[is_indices[i]] = 1;
8102: PetscCall(PetscGatherNumberOfMessages(comm, iflags, NULL, &n_recvs));
8103: PetscCall(PetscFree(iflags));
8105: /* restrict comm if requested */
8106: subcomm = NULL;
8107: destroy_mat = PETSC_FALSE;
8108: if (restrict_comm) {
8109: PetscMPIInt color, subcommsize;
8111: color = 0;
8112: if (restrict_full) {
8113: if (!n_recvs) color = 1; /* processes not receiving anything will not participate in new comm (full restriction) */
8114: } else {
8115: if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not participate in new comm */
8116: }
8117: PetscCallMPI(MPIU_Allreduce(&color, &subcommsize, 1, MPI_INT, MPI_SUM, comm));
8118: subcommsize = size - subcommsize;
8119: /* check if reuse has been requested */
8120: if (reuse) {
8121: if (*mat_n) {
8122: PetscMPIInt subcommsize2;
8123: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n), &subcommsize2));
8124: PetscCheck(subcommsize == subcommsize2, PetscObjectComm((PetscObject)*mat_n), PETSC_ERR_PLIB, "Cannot reuse matrix! wrong subcomm size %d != %d", subcommsize, subcommsize2);
8125: comm_n = PetscObjectComm((PetscObject)*mat_n);
8126: } else {
8127: comm_n = PETSC_COMM_SELF;
8128: }
8129: } else { /* MAT_INITIAL_MATRIX */
8130: PetscMPIInt rank;
8132: PetscCallMPI(MPI_Comm_rank(comm, &rank));
8133: PetscCall(PetscSubcommCreate(comm, &subcomm));
8134: PetscCall(PetscSubcommSetNumber(subcomm, 2));
8135: PetscCall(PetscSubcommSetTypeGeneral(subcomm, color, rank));
8136: comm_n = PetscSubcommChild(subcomm);
8137: }
8138: /* flag to destroy *mat_n if not significative */
8139: if (color) destroy_mat = PETSC_TRUE;
8140: } else {
8141: comm_n = comm;
8142: }
8144: /* prepare send/receive buffers */
8145: PetscCall(PetscMalloc1(size, &ilengths_idxs));
8146: PetscCall(PetscArrayzero(ilengths_idxs, size));
8147: PetscCall(PetscMalloc1(size, &ilengths_vals));
8148: PetscCall(PetscArrayzero(ilengths_vals, size));
8149: if (nis) PetscCall(PetscCalloc1(size, &ilengths_idxs_is));
8151: /* Get data from local matrices */
8152: PetscCheck(isdense, PetscObjectComm((PetscObject)mat), PETSC_ERR_SUP, "Subassembling of AIJ local matrices not yet implemented");
8153: /* TODO: See below some guidelines on how to prepare the local buffers */
8154: /*
8155: send_buffer_vals should contain the raw values of the local matrix
8156: send_buffer_idxs should contain:
8157: - MatType_PRIVATE type
8158: - PetscInt size_of_l2gmap
8159: - PetscInt global_row_indices[size_of_l2gmap]
8160: - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values
8161: */
8162: {
8163: ISLocalToGlobalMapping mapping;
8165: PetscCall(MatISGetLocalToGlobalMapping(mat, &mapping, NULL));
8166: PetscCall(MatDenseGetArrayRead(local_mat, &send_buffer_vals));
8167: PetscCall(ISLocalToGlobalMappingGetSize(mapping, &i));
8168: PetscCall(PetscMalloc1(i + 2, &send_buffer_idxs));
8169: send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE;
8170: send_buffer_idxs[1] = i;
8171: PetscCall(ISLocalToGlobalMappingGetIndices(mapping, (const PetscInt **)&ptr_idxs));
8172: PetscCall(PetscArraycpy(&send_buffer_idxs[2], ptr_idxs, i));
8173: PetscCall(ISLocalToGlobalMappingRestoreIndices(mapping, (const PetscInt **)&ptr_idxs));
8174: PetscCall(PetscMPIIntCast(i, &len));
8175: for (i = 0; i < n_sends; i++) {
8176: ilengths_vals[is_indices[i]] = len * len;
8177: ilengths_idxs[is_indices[i]] = len + 2;
8178: }
8179: }
8180: PetscCall(PetscGatherMessageLengths2(comm, n_sends, n_recvs, ilengths_idxs, ilengths_vals, &onodes, &olengths_idxs, &olengths_vals));
8181: /* additional is (if any) */
8182: if (nis) {
8183: PetscMPIInt psum;
8184: PetscInt j;
8185: for (j = 0, psum = 0; j < nis; j++) {
8186: PetscInt plen;
8187: PetscCall(ISGetLocalSize(isarray[j], &plen));
8188: PetscCall(PetscMPIIntCast(plen, &len));
8189: psum += len + 1; /* indices + length */
8190: }
8191: PetscCall(PetscMalloc1(psum, &send_buffer_idxs_is));
8192: for (j = 0, psum = 0; j < nis; j++) {
8193: PetscInt plen;
8194: const PetscInt *is_array_idxs;
8195: PetscCall(ISGetLocalSize(isarray[j], &plen));
8196: send_buffer_idxs_is[psum] = plen;
8197: PetscCall(ISGetIndices(isarray[j], &is_array_idxs));
8198: PetscCall(PetscArraycpy(&send_buffer_idxs_is[psum + 1], is_array_idxs, plen));
8199: PetscCall(ISRestoreIndices(isarray[j], &is_array_idxs));
8200: psum += plen + 1; /* indices + length */
8201: }
8202: for (i = 0; i < n_sends; i++) ilengths_idxs_is[is_indices[i]] = psum;
8203: PetscCall(PetscGatherMessageLengths(comm, n_sends, n_recvs, ilengths_idxs_is, &onodes_is, &olengths_idxs_is));
8204: }
8205: PetscCall(MatISRestoreLocalMat(mat, &local_mat));
8207: buf_size_idxs = 0;
8208: buf_size_vals = 0;
8209: buf_size_idxs_is = 0;
8210: buf_size_vecs = 0;
8211: for (i = 0; i < n_recvs; i++) {
8212: buf_size_idxs += olengths_idxs[i];
8213: buf_size_vals += olengths_vals[i];
8214: if (nis) buf_size_idxs_is += olengths_idxs_is[i];
8215: if (nvecs) buf_size_vecs += olengths_idxs[i];
8216: }
8217: PetscCall(PetscMalloc1(buf_size_idxs, &recv_buffer_idxs));
8218: PetscCall(PetscMalloc1(buf_size_vals, &recv_buffer_vals));
8219: PetscCall(PetscMalloc1(buf_size_idxs_is, &recv_buffer_idxs_is));
8220: PetscCall(PetscMalloc1(buf_size_vecs, &recv_buffer_vecs));
8222: /* get new tags for clean communications */
8223: PetscCall(PetscObjectGetNewTag((PetscObject)mat, &tag_idxs));
8224: PetscCall(PetscObjectGetNewTag((PetscObject)mat, &tag_vals));
8225: PetscCall(PetscObjectGetNewTag((PetscObject)mat, &tag_idxs_is));
8226: PetscCall(PetscObjectGetNewTag((PetscObject)mat, &tag_vecs));
8228: /* allocate for requests */
8229: PetscCall(PetscMalloc1(n_sends, &send_req_idxs));
8230: PetscCall(PetscMalloc1(n_sends, &send_req_vals));
8231: PetscCall(PetscMalloc1(n_sends, &send_req_idxs_is));
8232: PetscCall(PetscMalloc1(n_sends, &send_req_vecs));
8233: PetscCall(PetscMalloc1(n_recvs, &recv_req_idxs));
8234: PetscCall(PetscMalloc1(n_recvs, &recv_req_vals));
8235: PetscCall(PetscMalloc1(n_recvs, &recv_req_idxs_is));
8236: PetscCall(PetscMalloc1(n_recvs, &recv_req_vecs));
8238: /* communications */
8239: ptr_idxs = recv_buffer_idxs;
8240: ptr_vals = recv_buffer_vals;
8241: ptr_idxs_is = recv_buffer_idxs_is;
8242: ptr_vecs = recv_buffer_vecs;
8243: for (i = 0; i < n_recvs; i++) {
8244: PetscCallMPI(MPIU_Irecv(ptr_idxs, olengths_idxs[i], MPIU_INT, onodes[i], tag_idxs, comm, &recv_req_idxs[i]));
8245: PetscCallMPI(MPIU_Irecv(ptr_vals, olengths_vals[i], MPIU_SCALAR, onodes[i], tag_vals, comm, &recv_req_vals[i]));
8246: ptr_idxs += olengths_idxs[i];
8247: ptr_vals += olengths_vals[i];
8248: if (nis) {
8249: PetscCallMPI(MPIU_Irecv(ptr_idxs_is, olengths_idxs_is[i], MPIU_INT, onodes_is[i], tag_idxs_is, comm, &recv_req_idxs_is[i]));
8250: ptr_idxs_is += olengths_idxs_is[i];
8251: }
8252: if (nvecs) {
8253: PetscCallMPI(MPIU_Irecv(ptr_vecs, olengths_idxs[i] - 2, MPIU_SCALAR, onodes[i], tag_vecs, comm, &recv_req_vecs[i]));
8254: ptr_vecs += olengths_idxs[i] - 2;
8255: }
8256: }
8257: for (i = 0; i < n_sends; i++) {
8258: PetscCall(PetscMPIIntCast(is_indices[i], &source_dest));
8259: PetscCallMPI(MPIU_Isend(send_buffer_idxs, ilengths_idxs[source_dest], MPIU_INT, source_dest, tag_idxs, comm, &send_req_idxs[i]));
8260: PetscCallMPI(MPIU_Isend(send_buffer_vals, ilengths_vals[source_dest], MPIU_SCALAR, source_dest, tag_vals, comm, &send_req_vals[i]));
8261: if (nis) PetscCallMPI(MPIU_Isend(send_buffer_idxs_is, ilengths_idxs_is[source_dest], MPIU_INT, source_dest, tag_idxs_is, comm, &send_req_idxs_is[i]));
8262: if (nvecs) {
8263: PetscCall(VecGetArray(nnsp_vec[0], &send_buffer_vecs));
8264: PetscCallMPI(MPIU_Isend(send_buffer_vecs, ilengths_idxs[source_dest] - 2, MPIU_SCALAR, source_dest, tag_vecs, comm, &send_req_vecs[i]));
8265: }
8266: }
8267: PetscCall(ISRestoreIndices(is_sends_internal, &is_indices));
8268: PetscCall(ISDestroy(&is_sends_internal));
8270: /* assemble new l2g map */
8271: PetscCallMPI(MPI_Waitall(n_recvs, recv_req_idxs, MPI_STATUSES_IGNORE));
8272: ptr_idxs = recv_buffer_idxs;
8273: new_local_rows = 0;
8274: for (i = 0; i < n_recvs; i++) {
8275: new_local_rows += *(ptr_idxs + 1); /* second element is the local size of the l2gmap */
8276: ptr_idxs += olengths_idxs[i];
8277: }
8278: PetscCall(PetscMalloc1(new_local_rows, &l2gmap_indices));
8279: ptr_idxs = recv_buffer_idxs;
8280: new_local_rows = 0;
8281: for (i = 0; i < n_recvs; i++) {
8282: PetscCall(PetscArraycpy(&l2gmap_indices[new_local_rows], ptr_idxs + 2, *(ptr_idxs + 1)));
8283: new_local_rows += *(ptr_idxs + 1); /* second element is the local size of the l2gmap */
8284: ptr_idxs += olengths_idxs[i];
8285: }
8286: PetscCall(PetscSortRemoveDupsInt(&new_local_rows, l2gmap_indices));
8287: PetscCall(ISLocalToGlobalMappingCreate(comm_n, 1, new_local_rows, l2gmap_indices, PETSC_COPY_VALUES, &l2gmap));
8288: PetscCall(PetscFree(l2gmap_indices));
8290: /* infer new local matrix type from received local matrices type */
8291: /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */
8292: /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */
8293: if (n_recvs) {
8294: MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0];
8295: ptr_idxs = recv_buffer_idxs;
8296: for (i = 0; i < n_recvs; i++) {
8297: if ((PetscInt)new_local_type_private != *ptr_idxs) {
8298: new_local_type_private = MATAIJ_PRIVATE;
8299: break;
8300: }
8301: ptr_idxs += olengths_idxs[i];
8302: }
8303: switch (new_local_type_private) {
8304: case MATDENSE_PRIVATE:
8305: new_local_type = MATSEQAIJ;
8306: bs = 1;
8307: break;
8308: case MATAIJ_PRIVATE:
8309: new_local_type = MATSEQAIJ;
8310: bs = 1;
8311: break;
8312: case MATBAIJ_PRIVATE:
8313: new_local_type = MATSEQBAIJ;
8314: break;
8315: case MATSBAIJ_PRIVATE:
8316: new_local_type = MATSEQSBAIJ;
8317: break;
8318: default:
8319: SETERRQ(comm, PETSC_ERR_SUP, "Unsupported private type %d in %s", new_local_type_private, PETSC_FUNCTION_NAME);
8320: }
8321: } else { /* by default, new_local_type is seqaij */
8322: new_local_type = MATSEQAIJ;
8323: bs = 1;
8324: }
8326: /* create MATIS object if needed */
8327: if (!reuse) {
8328: PetscCall(MatGetSize(mat, &rows, &cols));
8329: PetscCall(MatCreateIS(comm_n, bs, PETSC_DECIDE, PETSC_DECIDE, rows, cols, l2gmap, l2gmap, mat_n));
8330: } else {
8331: /* it also destroys the local matrices */
8332: if (*mat_n) {
8333: PetscCall(MatSetLocalToGlobalMapping(*mat_n, l2gmap, l2gmap));
8334: } else { /* this is a fake object */
8335: PetscCall(MatCreateIS(comm_n, bs, PETSC_DECIDE, PETSC_DECIDE, rows, cols, l2gmap, l2gmap, mat_n));
8336: }
8337: }
8338: PetscCall(MatISGetLocalMat(*mat_n, &local_mat));
8339: PetscCall(MatSetType(local_mat, new_local_type));
8341: PetscCallMPI(MPI_Waitall(n_recvs, recv_req_vals, MPI_STATUSES_IGNORE));
8343: /* Global to local map of received indices */
8344: PetscCall(PetscMalloc1(buf_size_idxs, &recv_buffer_idxs_local)); /* needed for values insertion */
8345: PetscCall(ISGlobalToLocalMappingApply(l2gmap, IS_GTOLM_MASK, buf_size_idxs, recv_buffer_idxs, &i, recv_buffer_idxs_local));
8346: PetscCall(ISLocalToGlobalMappingDestroy(&l2gmap));
8348: /* restore attributes -> type of incoming data and its size */
8349: buf_size_idxs = 0;
8350: for (i = 0; i < n_recvs; i++) {
8351: recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs];
8352: recv_buffer_idxs_local[buf_size_idxs + 1] = recv_buffer_idxs[buf_size_idxs + 1];
8353: buf_size_idxs += olengths_idxs[i];
8354: }
8355: PetscCall(PetscFree(recv_buffer_idxs));
8357: /* set preallocation */
8358: PetscCall(PetscObjectTypeCompare((PetscObject)local_mat, MATSEQDENSE, &newisdense));
8359: if (!newisdense) {
8360: PetscInt *new_local_nnz = NULL;
8362: ptr_idxs = recv_buffer_idxs_local;
8363: if (n_recvs) PetscCall(PetscCalloc1(new_local_rows, &new_local_nnz));
8364: for (i = 0; i < n_recvs; i++) {
8365: PetscInt j;
8366: if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */
8367: for (j = 0; j < *(ptr_idxs + 1); j++) new_local_nnz[*(ptr_idxs + 2 + j)] += *(ptr_idxs + 1);
8368: } else {
8369: /* TODO */
8370: }
8371: ptr_idxs += olengths_idxs[i];
8372: }
8373: if (new_local_nnz) {
8374: for (i = 0; i < new_local_rows; i++) new_local_nnz[i] = PetscMin(new_local_nnz[i], new_local_rows);
8375: PetscCall(MatSeqAIJSetPreallocation(local_mat, 0, new_local_nnz));
8376: for (i = 0; i < new_local_rows; i++) new_local_nnz[i] /= bs;
8377: PetscCall(MatSeqBAIJSetPreallocation(local_mat, bs, 0, new_local_nnz));
8378: for (i = 0; i < new_local_rows; i++) new_local_nnz[i] = PetscMax(new_local_nnz[i] - i, 0);
8379: PetscCall(MatSeqSBAIJSetPreallocation(local_mat, bs, 0, new_local_nnz));
8380: } else {
8381: PetscCall(MatSetUp(local_mat));
8382: }
8383: PetscCall(PetscFree(new_local_nnz));
8384: } else {
8385: PetscCall(MatSetUp(local_mat));
8386: }
8388: /* set values */
8389: ptr_vals = recv_buffer_vals;
8390: ptr_idxs = recv_buffer_idxs_local;
8391: for (i = 0; i < n_recvs; i++) {
8392: if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */
8393: PetscCall(MatSetOption(local_mat, MAT_ROW_ORIENTED, PETSC_FALSE));
8394: PetscCall(MatSetValues(local_mat, *(ptr_idxs + 1), ptr_idxs + 2, *(ptr_idxs + 1), ptr_idxs + 2, ptr_vals, ADD_VALUES));
8395: PetscCall(MatAssemblyBegin(local_mat, MAT_FLUSH_ASSEMBLY));
8396: PetscCall(MatAssemblyEnd(local_mat, MAT_FLUSH_ASSEMBLY));
8397: PetscCall(MatSetOption(local_mat, MAT_ROW_ORIENTED, PETSC_TRUE));
8398: } else {
8399: /* TODO */
8400: }
8401: ptr_idxs += olengths_idxs[i];
8402: ptr_vals += olengths_vals[i];
8403: }
8404: PetscCall(MatAssemblyBegin(local_mat, MAT_FINAL_ASSEMBLY));
8405: PetscCall(MatAssemblyEnd(local_mat, MAT_FINAL_ASSEMBLY));
8406: PetscCall(MatISRestoreLocalMat(*mat_n, &local_mat));
8407: PetscCall(MatAssemblyBegin(*mat_n, MAT_FINAL_ASSEMBLY));
8408: PetscCall(MatAssemblyEnd(*mat_n, MAT_FINAL_ASSEMBLY));
8409: PetscCall(PetscFree(recv_buffer_vals));
8411: #if 0
8412: if (!restrict_comm) { /* check */
8413: Vec lvec,rvec;
8414: PetscReal infty_error;
8416: PetscCall(MatCreateVecs(mat,&rvec,&lvec));
8417: PetscCall(VecSetRandom(rvec,NULL));
8418: PetscCall(MatMult(mat,rvec,lvec));
8419: PetscCall(VecScale(lvec,-1.0));
8420: PetscCall(MatMultAdd(*mat_n,rvec,lvec,lvec));
8421: PetscCall(VecNorm(lvec,NORM_INFINITY,&infty_error));
8422: PetscCall(PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error));
8423: PetscCall(VecDestroy(&rvec));
8424: PetscCall(VecDestroy(&lvec));
8425: }
8426: #endif
8428: /* assemble new additional is (if any) */
8429: if (nis) {
8430: PetscInt **temp_idxs, *count_is, j, psum;
8432: PetscCallMPI(MPI_Waitall(n_recvs, recv_req_idxs_is, MPI_STATUSES_IGNORE));
8433: PetscCall(PetscCalloc1(nis, &count_is));
8434: ptr_idxs = recv_buffer_idxs_is;
8435: psum = 0;
8436: for (i = 0; i < n_recvs; i++) {
8437: for (j = 0; j < nis; j++) {
8438: PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
8439: count_is[j] += plen; /* increment counting of buffer for j-th IS */
8440: psum += plen;
8441: ptr_idxs += plen + 1; /* shift pointer to received data */
8442: }
8443: }
8444: PetscCall(PetscMalloc1(nis, &temp_idxs));
8445: PetscCall(PetscMalloc1(psum, &temp_idxs[0]));
8446: for (i = 1; i < nis; i++) temp_idxs[i] = PetscSafePointerPlusOffset(temp_idxs[i - 1], count_is[i - 1]);
8447: PetscCall(PetscArrayzero(count_is, nis));
8448: ptr_idxs = recv_buffer_idxs_is;
8449: for (i = 0; i < n_recvs; i++) {
8450: for (j = 0; j < nis; j++) {
8451: PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */
8452: PetscCall(PetscArraycpy(&temp_idxs[j][count_is[j]], ptr_idxs + 1, plen));
8453: count_is[j] += plen; /* increment starting point of buffer for j-th IS */
8454: ptr_idxs += plen + 1; /* shift pointer to received data */
8455: }
8456: }
8457: for (i = 0; i < nis; i++) {
8458: PetscCall(ISDestroy(&isarray[i]));
8459: PetscCall(PetscSortRemoveDupsInt(&count_is[i], temp_idxs[i]));
8460: PetscCall(ISCreateGeneral(comm_n, count_is[i], temp_idxs[i], PETSC_COPY_VALUES, &isarray[i]));
8461: }
8462: PetscCall(PetscFree(count_is));
8463: PetscCall(PetscFree(temp_idxs[0]));
8464: PetscCall(PetscFree(temp_idxs));
8465: }
8466: /* free workspace */
8467: PetscCall(PetscFree(recv_buffer_idxs_is));
8468: PetscCallMPI(MPI_Waitall(n_sends, send_req_idxs, MPI_STATUSES_IGNORE));
8469: PetscCall(PetscFree(send_buffer_idxs));
8470: PetscCallMPI(MPI_Waitall(n_sends, send_req_vals, MPI_STATUSES_IGNORE));
8471: if (isdense) {
8472: PetscCall(MatISGetLocalMat(mat, &local_mat));
8473: PetscCall(MatDenseRestoreArrayRead(local_mat, &send_buffer_vals));
8474: PetscCall(MatISRestoreLocalMat(mat, &local_mat));
8475: } else {
8476: /* PetscCall(PetscFree(send_buffer_vals)); */
8477: }
8478: if (nis) {
8479: PetscCallMPI(MPI_Waitall(n_sends, send_req_idxs_is, MPI_STATUSES_IGNORE));
8480: PetscCall(PetscFree(send_buffer_idxs_is));
8481: }
8483: if (nvecs) {
8484: PetscCallMPI(MPI_Waitall(n_recvs, recv_req_vecs, MPI_STATUSES_IGNORE));
8485: PetscCallMPI(MPI_Waitall(n_sends, send_req_vecs, MPI_STATUSES_IGNORE));
8486: PetscCall(VecRestoreArray(nnsp_vec[0], &send_buffer_vecs));
8487: PetscCall(VecDestroy(&nnsp_vec[0]));
8488: PetscCall(VecCreate(comm_n, &nnsp_vec[0]));
8489: PetscCall(VecSetSizes(nnsp_vec[0], new_local_rows, PETSC_DECIDE));
8490: PetscCall(VecSetType(nnsp_vec[0], VECSTANDARD));
8491: /* set values */
8492: ptr_vals = recv_buffer_vecs;
8493: ptr_idxs = recv_buffer_idxs_local;
8494: PetscCall(VecGetArray(nnsp_vec[0], &send_buffer_vecs));
8495: for (i = 0; i < n_recvs; i++) {
8496: PetscInt j;
8497: for (j = 0; j < *(ptr_idxs + 1); j++) send_buffer_vecs[*(ptr_idxs + 2 + j)] += *(ptr_vals + j);
8498: ptr_idxs += olengths_idxs[i];
8499: ptr_vals += olengths_idxs[i] - 2;
8500: }
8501: PetscCall(VecRestoreArray(nnsp_vec[0], &send_buffer_vecs));
8502: PetscCall(VecAssemblyBegin(nnsp_vec[0]));
8503: PetscCall(VecAssemblyEnd(nnsp_vec[0]));
8504: }
8506: PetscCall(PetscFree(recv_buffer_vecs));
8507: PetscCall(PetscFree(recv_buffer_idxs_local));
8508: PetscCall(PetscFree(recv_req_idxs));
8509: PetscCall(PetscFree(recv_req_vals));
8510: PetscCall(PetscFree(recv_req_vecs));
8511: PetscCall(PetscFree(recv_req_idxs_is));
8512: PetscCall(PetscFree(send_req_idxs));
8513: PetscCall(PetscFree(send_req_vals));
8514: PetscCall(PetscFree(send_req_vecs));
8515: PetscCall(PetscFree(send_req_idxs_is));
8516: PetscCall(PetscFree(ilengths_vals));
8517: PetscCall(PetscFree(ilengths_idxs));
8518: PetscCall(PetscFree(olengths_vals));
8519: PetscCall(PetscFree(olengths_idxs));
8520: PetscCall(PetscFree(onodes));
8521: if (nis) {
8522: PetscCall(PetscFree(ilengths_idxs_is));
8523: PetscCall(PetscFree(olengths_idxs_is));
8524: PetscCall(PetscFree(onodes_is));
8525: }
8526: PetscCall(PetscSubcommDestroy(&subcomm));
8527: if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not participate */
8528: PetscCall(MatDestroy(mat_n));
8529: for (i = 0; i < nis; i++) PetscCall(ISDestroy(&isarray[i]));
8530: if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */
8531: PetscCall(VecDestroy(&nnsp_vec[0]));
8532: }
8533: *mat_n = NULL;
8534: }
8535: PetscFunctionReturn(PETSC_SUCCESS);
8536: }
8538: /* temporary hack into ksp private data structure */
8539: #include <petsc/private/kspimpl.h>
8541: PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc, Mat coarse_submat)
8542: {
8543: PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
8544: PC_IS *pcis = (PC_IS *)pc->data;
8545: PCBDDCGraph graph = pcbddc->mat_graph;
8546: Mat coarse_mat, coarse_mat_is;
8547: Mat coarsedivudotp = NULL;
8548: Mat coarseG, t_coarse_mat_is;
8549: MatNullSpace CoarseNullSpace = NULL;
8550: ISLocalToGlobalMapping coarse_islg;
8551: IS coarse_is, *isarray, corners;
8552: PetscInt i, im_active = -1, active_procs = -1;
8553: PetscInt nis, nisdofs, nisneu, nisvert;
8554: PetscInt coarse_eqs_per_proc, coarsening_ratio;
8555: PC pc_temp;
8556: PCType coarse_pc_type;
8557: KSPType coarse_ksp_type;
8558: PetscBool multilevel_requested, multilevel_allowed;
8559: PetscBool coarse_reuse, multi_element = graph->multi_element;
8560: PetscInt ncoarse, nedcfield;
8561: PetscBool compute_vecs = PETSC_FALSE;
8562: PetscScalar *array;
8563: MatReuse coarse_mat_reuse;
8564: PetscBool restr, full_restr, have_void;
8565: PetscMPIInt size;
8567: PetscFunctionBegin;
8568: PetscCall(PetscLogEventBegin(PC_BDDC_CoarseSetUp[pcbddc->current_level], pc, 0, 0, 0));
8569: /* Assign global numbering to coarse dofs */
8570: if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */
8571: PetscInt ocoarse_size;
8572: compute_vecs = PETSC_TRUE;
8574: pcbddc->new_primal_space = PETSC_TRUE;
8575: ocoarse_size = pcbddc->coarse_size;
8576: PetscCall(PetscFree(pcbddc->global_primal_indices));
8577: PetscCall(PCBDDCComputePrimalNumbering(pc, &pcbddc->coarse_size, &pcbddc->global_primal_indices));
8578: /* see if we can avoid some work */
8579: if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */
8580: /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */
8581: if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) {
8582: PetscCall(KSPReset(pcbddc->coarse_ksp));
8583: coarse_reuse = PETSC_FALSE;
8584: } else { /* we can safely reuse already computed coarse matrix */
8585: coarse_reuse = PETSC_TRUE;
8586: }
8587: } else { /* there's no coarse ksp, so we need to create the coarse matrix too */
8588: coarse_reuse = PETSC_FALSE;
8589: }
8590: /* reset any subassembling information */
8591: if (!coarse_reuse || pcbddc->recompute_topography) PetscCall(ISDestroy(&pcbddc->coarse_subassembling));
8592: } else { /* primal space is unchanged, so we can reuse coarse matrix */
8593: coarse_reuse = PETSC_TRUE;
8594: }
8595: if (coarse_reuse && pcbddc->coarse_ksp) {
8596: PetscCall(KSPGetOperators(pcbddc->coarse_ksp, &coarse_mat, NULL));
8597: PetscCall(PetscObjectReference((PetscObject)coarse_mat));
8598: coarse_mat_reuse = MAT_REUSE_MATRIX;
8599: } else {
8600: coarse_mat = NULL;
8601: coarse_mat_reuse = MAT_INITIAL_MATRIX;
8602: }
8604: /* creates temporary l2gmap and IS for coarse indexes */
8605: PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), pcbddc->local_primal_size, pcbddc->global_primal_indices, PETSC_COPY_VALUES, &coarse_is));
8606: PetscCall(ISLocalToGlobalMappingCreateIS(coarse_is, &coarse_islg));
8608: /* creates temporary MATIS object for coarse matrix */
8609: PetscCall(MatCreate(PetscObjectComm((PetscObject)pc), &t_coarse_mat_is));
8610: PetscCall(MatSetType(t_coarse_mat_is, MATIS));
8611: PetscCall(MatSetSizes(t_coarse_mat_is, PETSC_DECIDE, PETSC_DECIDE, pcbddc->coarse_size, pcbddc->coarse_size));
8612: PetscCall(MatISSetAllowRepeated(t_coarse_mat_is, multi_element));
8613: PetscCall(MatSetLocalToGlobalMapping(t_coarse_mat_is, coarse_islg, coarse_islg));
8614: PetscCall(MatISSetLocalMat(t_coarse_mat_is, coarse_submat));
8615: PetscCall(MatAssemblyBegin(t_coarse_mat_is, MAT_FINAL_ASSEMBLY));
8616: PetscCall(MatAssemblyEnd(t_coarse_mat_is, MAT_FINAL_ASSEMBLY));
8617: PetscCall(MatViewFromOptions(t_coarse_mat_is, (PetscObject)pc, "-pc_bddc_coarse_mat_is_view"));
8619: /* count "active" (i.e. with positive local size) and "void" processes */
8620: im_active = !!pcis->n;
8621: PetscCallMPI(MPIU_Allreduce(&im_active, &active_procs, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc)));
8623: /* determine number of processes partecipating to coarse solver and compute subassembling pattern */
8624: /* restr : whether we want to exclude senders (which are not receivers) from the subassembling pattern */
8625: /* full_restr : just use the receivers from the subassembling pattern */
8626: PetscCallMPI(MPI_Comm_size(PetscObjectComm((PetscObject)pc), &size));
8627: coarse_mat_is = NULL;
8628: multilevel_allowed = PETSC_FALSE;
8629: multilevel_requested = PETSC_FALSE;
8630: coarse_eqs_per_proc = PetscMin(PetscMax(pcbddc->coarse_size, 1), pcbddc->coarse_eqs_per_proc);
8631: if (coarse_eqs_per_proc < 0 || size == 1) coarse_eqs_per_proc = PetscMax(pcbddc->coarse_size, 1);
8632: if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE;
8633: if (pcbddc->coarse_size <= pcbddc->coarse_eqs_limit) multilevel_requested = PETSC_FALSE;
8634: coarsening_ratio = multi_element ? 1 : pcbddc->coarsening_ratio;
8635: if (multilevel_requested) {
8636: ncoarse = active_procs / coarsening_ratio;
8637: restr = PETSC_FALSE;
8638: full_restr = PETSC_FALSE;
8639: } else {
8640: ncoarse = pcbddc->coarse_size / coarse_eqs_per_proc + !!(pcbddc->coarse_size % coarse_eqs_per_proc);
8641: restr = PETSC_TRUE;
8642: full_restr = PETSC_TRUE;
8643: }
8644: if (!pcbddc->coarse_size || (size == 1 && !multi_element)) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE;
8645: ncoarse = PetscMax(1, ncoarse);
8646: if (!pcbddc->coarse_subassembling) {
8647: if (coarsening_ratio > 1) {
8648: if (multilevel_requested) {
8649: PetscCall(PCBDDCMatISGetSubassemblingPattern(pc->pmat, &ncoarse, pcbddc->coarse_adj_red, &pcbddc->coarse_subassembling, &have_void));
8650: } else {
8651: PetscCall(PCBDDCMatISGetSubassemblingPattern(t_coarse_mat_is, &ncoarse, pcbddc->coarse_adj_red, &pcbddc->coarse_subassembling, &have_void));
8652: }
8653: } else {
8654: PetscMPIInt rank;
8656: PetscCallMPI(MPI_Comm_rank(PetscObjectComm((PetscObject)pc), &rank));
8657: have_void = (active_procs == size) ? PETSC_FALSE : PETSC_TRUE;
8658: PetscCall(ISCreateStride(PetscObjectComm((PetscObject)pc), 1, rank, 1, &pcbddc->coarse_subassembling));
8659: PetscCall(PetscObjectSetName((PetscObject)pcbddc->coarse_subassembling, "default subassembling"));
8660: }
8661: } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */
8662: PetscInt psum;
8663: if (pcbddc->coarse_ksp) psum = 1;
8664: else psum = 0;
8665: PetscCallMPI(MPIU_Allreduce(&psum, &ncoarse, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)pc)));
8666: have_void = ncoarse < size ? PETSC_TRUE : PETSC_FALSE;
8667: }
8668: /* determine if we can go multilevel */
8669: if (multilevel_requested) {
8670: if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */
8671: else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */
8672: }
8673: if (multilevel_allowed && have_void) restr = PETSC_TRUE;
8675: /* dump subassembling pattern */
8676: if (pcbddc->dbg_flag && multilevel_allowed) PetscCall(ISView(pcbddc->coarse_subassembling, pcbddc->dbg_viewer));
8677: /* compute dofs splitting and neumann boundaries for coarse dofs */
8678: nedcfield = -1;
8679: corners = NULL;
8680: if (multilevel_allowed && !coarse_reuse && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || pcbddc->nedclocal || pcbddc->corner_selected)) { /* protects from unneeded computations */
8681: PetscInt *tidxs, *tidxs2, nout, tsize, i;
8682: const PetscInt *idxs;
8683: ISLocalToGlobalMapping tmap;
8685: /* create map between primal indices (in local representative ordering) and local primal numbering */
8686: PetscCall(ISLocalToGlobalMappingCreate(PETSC_COMM_SELF, 1, pcbddc->local_primal_size, pcbddc->primal_indices_local_idxs, PETSC_COPY_VALUES, &tmap));
8687: /* allocate space for temporary storage */
8688: PetscCall(PetscMalloc1(pcbddc->local_primal_size, &tidxs));
8689: PetscCall(PetscMalloc1(pcbddc->local_primal_size, &tidxs2));
8690: /* allocate for IS array */
8691: nisdofs = pcbddc->n_ISForDofsLocal;
8692: if (pcbddc->nedclocal) {
8693: if (pcbddc->nedfield > -1) {
8694: nedcfield = pcbddc->nedfield;
8695: } else {
8696: nedcfield = 0;
8697: PetscCheck(!nisdofs, PetscObjectComm((PetscObject)pc), PETSC_ERR_PLIB, "This should not happen (%" PetscInt_FMT ")", nisdofs);
8698: nisdofs = 1;
8699: }
8700: }
8701: nisneu = !!pcbddc->NeumannBoundariesLocal;
8702: nisvert = 0; /* nisvert is not used */
8703: nis = nisdofs + nisneu + nisvert;
8704: PetscCall(PetscMalloc1(nis, &isarray));
8705: /* dofs splitting */
8706: for (i = 0; i < nisdofs; i++) {
8707: /* PetscCall(ISView(pcbddc->ISForDofsLocal[i],0)); */
8708: if (nedcfield != i) {
8709: PetscCall(ISGetLocalSize(pcbddc->ISForDofsLocal[i], &tsize));
8710: PetscCall(ISGetIndices(pcbddc->ISForDofsLocal[i], &idxs));
8711: PetscCall(ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs));
8712: PetscCall(ISRestoreIndices(pcbddc->ISForDofsLocal[i], &idxs));
8713: } else {
8714: PetscCall(ISGetLocalSize(pcbddc->nedclocal, &tsize));
8715: PetscCall(ISGetIndices(pcbddc->nedclocal, &idxs));
8716: PetscCall(ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs));
8717: PetscCheck(tsize == nout, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Failed when mapping coarse nedelec field! %" PetscInt_FMT " != %" PetscInt_FMT, tsize, nout);
8718: PetscCall(ISRestoreIndices(pcbddc->nedclocal, &idxs));
8719: }
8720: PetscCall(ISLocalToGlobalMappingApply(coarse_islg, nout, tidxs, tidxs2));
8721: PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), nout, tidxs2, PETSC_COPY_VALUES, &isarray[i]));
8722: /* PetscCall(ISView(isarray[i],0)); */
8723: }
8724: /* neumann boundaries */
8725: if (pcbddc->NeumannBoundariesLocal) {
8726: /* PetscCall(ISView(pcbddc->NeumannBoundariesLocal,0)); */
8727: PetscCall(ISGetLocalSize(pcbddc->NeumannBoundariesLocal, &tsize));
8728: PetscCall(ISGetIndices(pcbddc->NeumannBoundariesLocal, &idxs));
8729: PetscCall(ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs));
8730: PetscCall(ISRestoreIndices(pcbddc->NeumannBoundariesLocal, &idxs));
8731: PetscCall(ISLocalToGlobalMappingApply(coarse_islg, nout, tidxs, tidxs2));
8732: PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), nout, tidxs2, PETSC_COPY_VALUES, &isarray[nisdofs]));
8733: /* PetscCall(ISView(isarray[nisdofs],0)); */
8734: }
8735: /* coordinates */
8736: if (pcbddc->corner_selected) {
8737: PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &corners));
8738: PetscCall(ISGetLocalSize(corners, &tsize));
8739: PetscCall(ISGetIndices(corners, &idxs));
8740: PetscCall(ISGlobalToLocalMappingApply(tmap, IS_GTOLM_DROP, tsize, idxs, &nout, tidxs));
8741: PetscCheck(tsize == nout, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Failed when mapping corners! %" PetscInt_FMT " != %" PetscInt_FMT, tsize, nout);
8742: PetscCall(ISRestoreIndices(corners, &idxs));
8743: PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &corners));
8744: PetscCall(ISLocalToGlobalMappingApply(coarse_islg, nout, tidxs, tidxs2));
8745: PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), nout, tidxs2, PETSC_COPY_VALUES, &corners));
8746: }
8747: PetscCall(PetscFree(tidxs));
8748: PetscCall(PetscFree(tidxs2));
8749: PetscCall(ISLocalToGlobalMappingDestroy(&tmap));
8750: } else {
8751: nis = 0;
8752: nisdofs = 0;
8753: nisneu = 0;
8754: nisvert = 0;
8755: isarray = NULL;
8756: }
8757: /* destroy no longer needed map */
8758: PetscCall(ISLocalToGlobalMappingDestroy(&coarse_islg));
8760: /* subassemble */
8761: if (multilevel_allowed) {
8762: Vec vp[1];
8763: PetscInt nvecs = 0;
8764: PetscBool reuse;
8766: vp[0] = NULL;
8767: /* XXX HDIV also */
8768: if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */
8769: PetscCall(VecCreate(PetscObjectComm((PetscObject)pc), &vp[0]));
8770: PetscCall(VecSetSizes(vp[0], pcbddc->local_primal_size, PETSC_DECIDE));
8771: PetscCall(VecSetType(vp[0], VECSTANDARD));
8772: nvecs = 1;
8774: if (pcbddc->divudotp) {
8775: Mat B, loc_divudotp;
8776: Vec v, p;
8777: IS dummy;
8778: PetscInt np;
8780: PetscCall(MatISGetLocalMat(pcbddc->divudotp, &loc_divudotp));
8781: PetscCall(MatGetSize(loc_divudotp, &np, NULL));
8782: PetscCall(ISCreateStride(PETSC_COMM_SELF, np, 0, 1, &dummy));
8783: PetscCall(MatCreateSubMatrix(loc_divudotp, dummy, pcis->is_B_local, MAT_INITIAL_MATRIX, &B));
8784: PetscCall(MatCreateVecs(B, &v, &p));
8785: PetscCall(VecSet(p, 1.));
8786: PetscCall(MatMultTranspose(B, p, v));
8787: PetscCall(VecDestroy(&p));
8788: PetscCall(MatDestroy(&B));
8789: PetscCall(VecGetArray(vp[0], &array));
8790: PetscCall(VecPlaceArray(pcbddc->vec1_P, array));
8791: PetscCall(MatMultTranspose(pcbddc->coarse_phi_B, v, pcbddc->vec1_P));
8792: PetscCall(VecResetArray(pcbddc->vec1_P));
8793: PetscCall(VecRestoreArray(vp[0], &array));
8794: PetscCall(ISDestroy(&dummy));
8795: PetscCall(VecDestroy(&v));
8796: }
8797: }
8798: if (coarse_mat) reuse = PETSC_TRUE;
8799: else reuse = PETSC_FALSE;
8800: if (multi_element) {
8801: PetscCall(PetscObjectReference((PetscObject)t_coarse_mat_is));
8802: coarse_mat_is = t_coarse_mat_is;
8803: } else {
8804: PetscCallMPI(MPIU_Allreduce(MPI_IN_PLACE, &reuse, 1, MPI_C_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc)));
8805: if (reuse) {
8806: PetscCall(PCBDDCMatISSubassemble(t_coarse_mat_is, pcbddc->coarse_subassembling, 0, restr, full_restr, PETSC_TRUE, &coarse_mat, nis, isarray, nvecs, vp));
8807: } else {
8808: PetscCall(PCBDDCMatISSubassemble(t_coarse_mat_is, pcbddc->coarse_subassembling, 0, restr, full_restr, PETSC_FALSE, &coarse_mat_is, nis, isarray, nvecs, vp));
8809: }
8810: if (vp[0]) { /* vp[0] could have been placed on a different set of processes */
8811: PetscScalar *arraym;
8812: const PetscScalar *arrayv;
8813: PetscInt nl;
8814: PetscCall(VecGetLocalSize(vp[0], &nl));
8815: PetscCall(MatCreateSeqDense(PETSC_COMM_SELF, 1, nl, NULL, &coarsedivudotp));
8816: PetscCall(MatDenseGetArray(coarsedivudotp, &arraym));
8817: PetscCall(VecGetArrayRead(vp[0], &arrayv));
8818: PetscCall(PetscArraycpy(arraym, arrayv, nl));
8819: PetscCall(VecRestoreArrayRead(vp[0], &arrayv));
8820: PetscCall(MatDenseRestoreArray(coarsedivudotp, &arraym));
8821: PetscCall(VecDestroy(&vp[0]));
8822: } else {
8823: PetscCall(MatCreateSeqAIJ(PETSC_COMM_SELF, 0, 0, 1, NULL, &coarsedivudotp));
8824: }
8825: }
8826: } else {
8827: PetscBool default_sub;
8829: PetscCall(PetscStrcmp(((PetscObject)pcbddc->coarse_subassembling)->name, "default subassembling", &default_sub));
8830: if (!default_sub) PetscCall(PCBDDCMatISSubassemble(t_coarse_mat_is, pcbddc->coarse_subassembling, 0, restr, full_restr, PETSC_FALSE, &coarse_mat_is, 0, NULL, 0, NULL));
8831: else {
8832: PetscCall(PetscObjectReference((PetscObject)t_coarse_mat_is));
8833: coarse_mat_is = t_coarse_mat_is;
8834: }
8835: }
8836: if (coarse_mat_is || coarse_mat) {
8837: if (!multilevel_allowed) {
8838: PetscCall(MatConvert(coarse_mat_is, MATAIJ, coarse_mat_reuse, &coarse_mat));
8839: } else {
8840: /* if this matrix is present, it means we are not reusing the coarse matrix */
8841: if (coarse_mat_is) {
8842: PetscCheck(!coarse_mat, PetscObjectComm((PetscObject)coarse_mat_is), PETSC_ERR_PLIB, "This should not happen");
8843: PetscCall(PetscObjectReference((PetscObject)coarse_mat_is));
8844: coarse_mat = coarse_mat_is;
8845: }
8846: }
8847: }
8848: PetscCall(MatDestroy(&t_coarse_mat_is));
8849: PetscCall(MatDestroy(&coarse_mat_is));
8851: /* create local to global scatters for coarse problem */
8852: if (compute_vecs) {
8853: PetscInt lrows;
8854: PetscCall(VecDestroy(&pcbddc->coarse_vec));
8855: if (coarse_mat) {
8856: PetscCall(MatGetLocalSize(coarse_mat, &lrows, NULL));
8857: } else {
8858: lrows = 0;
8859: }
8860: PetscCall(VecCreate(PetscObjectComm((PetscObject)pc), &pcbddc->coarse_vec));
8861: PetscCall(VecSetSizes(pcbddc->coarse_vec, lrows, PETSC_DECIDE));
8862: PetscCall(VecSetType(pcbddc->coarse_vec, coarse_mat ? coarse_mat->defaultvectype : VECSTANDARD));
8863: PetscCall(VecScatterDestroy(&pcbddc->coarse_loc_to_glob));
8864: PetscCall(VecScatterCreate(pcbddc->vec1_P, NULL, pcbddc->coarse_vec, coarse_is, &pcbddc->coarse_loc_to_glob));
8865: }
8866: PetscCall(ISDestroy(&coarse_is));
8868: /* set defaults for coarse KSP and PC */
8869: if (multilevel_allowed) {
8870: coarse_ksp_type = KSPRICHARDSON;
8871: coarse_pc_type = PCBDDC;
8872: } else {
8873: coarse_ksp_type = KSPPREONLY;
8874: coarse_pc_type = PCREDUNDANT;
8875: }
8877: /* print some info if requested */
8878: if (pcbddc->dbg_flag) {
8879: if (!multilevel_allowed) {
8880: PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n"));
8881: if (multilevel_requested) {
8882: PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Not enough active processes on level %" PetscInt_FMT " (active processes %" PetscInt_FMT ", coarsening ratio %" PetscInt_FMT ")\n", pcbddc->current_level, active_procs, coarsening_ratio));
8883: } else if (pcbddc->max_levels) {
8884: PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Maximum number of requested levels reached (%" PetscInt_FMT ")\n", pcbddc->max_levels));
8885: }
8886: PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
8887: }
8888: }
8890: /* communicate coarse discrete gradient */
8891: coarseG = NULL;
8892: if (pcbddc->nedcG && multilevel_allowed) {
8893: MPI_Comm ccomm;
8894: if (coarse_mat) {
8895: ccomm = PetscObjectComm((PetscObject)coarse_mat);
8896: } else {
8897: ccomm = MPI_COMM_NULL;
8898: }
8899: PetscCall(MatMPIAIJRestrict(pcbddc->nedcG, ccomm, &coarseG));
8900: }
8902: /* create the coarse KSP object only once with defaults */
8903: if (coarse_mat) {
8904: PetscBool isredundant, isbddc, force, valid;
8905: PetscViewer dbg_viewer = NULL;
8906: PetscBool isset, issym, isher, isspd;
8908: if (pcbddc->dbg_flag) {
8909: dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat));
8910: PetscCall(PetscViewerASCIIAddTab(dbg_viewer, 2 * pcbddc->current_level));
8911: }
8912: if (!pcbddc->coarse_ksp) {
8913: char prefix[256], str_level[16];
8914: size_t len;
8916: PetscCall(KSPCreate(PetscObjectComm((PetscObject)coarse_mat), &pcbddc->coarse_ksp));
8917: PetscCall(KSPSetNestLevel(pcbddc->coarse_ksp, pc->kspnestlevel));
8918: PetscCall(KSPSetErrorIfNotConverged(pcbddc->coarse_ksp, pc->erroriffailure));
8919: PetscCall(PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp, (PetscObject)pc, 1));
8920: PetscCall(KSPSetTolerances(pcbddc->coarse_ksp, PETSC_CURRENT, PETSC_CURRENT, PETSC_CURRENT, 1));
8921: PetscCall(KSPSetOperators(pcbddc->coarse_ksp, coarse_mat, coarse_mat));
8922: PetscCall(KSPSetType(pcbddc->coarse_ksp, coarse_ksp_type));
8923: PetscCall(KSPSetNormType(pcbddc->coarse_ksp, KSP_NORM_NONE));
8924: PetscCall(KSPGetPC(pcbddc->coarse_ksp, &pc_temp));
8925: /* TODO is this logic correct? should check for coarse_mat type */
8926: PetscCall(PCSetType(pc_temp, coarse_pc_type));
8927: /* prefix */
8928: PetscCall(PetscStrncpy(prefix, "", sizeof(prefix)));
8929: PetscCall(PetscStrncpy(str_level, "", sizeof(str_level)));
8930: if (!pcbddc->current_level) {
8931: PetscCall(PetscStrncpy(prefix, ((PetscObject)pc)->prefix, sizeof(prefix)));
8932: PetscCall(PetscStrlcat(prefix, "pc_bddc_coarse_", sizeof(prefix)));
8933: } else {
8934: PetscCall(PetscStrlen(((PetscObject)pc)->prefix, &len));
8935: if (pcbddc->current_level > 1) len -= 3; /* remove "lX_" with X level number */
8936: if (pcbddc->current_level > 10) len -= 1; /* remove another char from level number */
8937: /* Nonstandard use of PetscStrncpy() to copy only a portion of the string */
8938: PetscCall(PetscStrncpy(prefix, ((PetscObject)pc)->prefix, len + 1));
8939: PetscCall(PetscSNPrintf(str_level, sizeof(str_level), "l%" PetscInt_FMT "_", pcbddc->current_level));
8940: PetscCall(PetscStrlcat(prefix, str_level, sizeof(prefix)));
8941: }
8942: PetscCall(KSPSetOptionsPrefix(pcbddc->coarse_ksp, prefix));
8943: /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
8944: PetscCall(PCBDDCSetLevel(pc_temp, pcbddc->current_level + 1));
8945: PetscCall(PCBDDCSetCoarseningRatio(pc_temp, pcbddc->coarsening_ratio));
8946: PetscCall(PCBDDCSetLevels(pc_temp, pcbddc->max_levels));
8947: /* allow user customization */
8948: PetscCall(KSPSetFromOptions(pcbddc->coarse_ksp));
8949: /* get some info after set from options */
8950: PetscCall(KSPGetPC(pcbddc->coarse_ksp, &pc_temp));
8951: /* multilevel cannot be done with coarse PC different from BDDC, NN, HPDDM, unless forced to */
8952: force = PETSC_FALSE;
8953: PetscCall(PetscOptionsGetBool(NULL, ((PetscObject)pc_temp)->prefix, "-pc_type_forced", &force, NULL));
8954: PetscCall(PetscObjectTypeCompareAny((PetscObject)pc_temp, &valid, PCBDDC, PCNN, PCHPDDM, ""));
8955: PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp, PCBDDC, &isbddc));
8956: if (multilevel_allowed && !force && !valid) {
8957: isbddc = PETSC_TRUE;
8958: PetscCall(PCSetType(pc_temp, PCBDDC));
8959: PetscCall(PCBDDCSetLevel(pc_temp, pcbddc->current_level + 1));
8960: PetscCall(PCBDDCSetCoarseningRatio(pc_temp, pcbddc->coarsening_ratio));
8961: PetscCall(PCBDDCSetLevels(pc_temp, pcbddc->max_levels));
8962: if (pc_temp->ops->setfromoptions) { /* need to setfromoptions again, skipping the pc_type */
8963: PetscObjectOptionsBegin((PetscObject)pc_temp);
8964: PetscCall((*pc_temp->ops->setfromoptions)(pc_temp, PetscOptionsObject));
8965: PetscCall(PetscObjectProcessOptionsHandlers((PetscObject)pc_temp, PetscOptionsObject));
8966: PetscOptionsEnd();
8967: pc_temp->setfromoptionscalled++;
8968: }
8969: }
8970: }
8971: /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */
8972: PetscCall(KSPGetPC(pcbddc->coarse_ksp, &pc_temp));
8973: if (nisdofs) {
8974: PetscCall(PCBDDCSetDofsSplitting(pc_temp, nisdofs, isarray));
8975: for (i = 0; i < nisdofs; i++) PetscCall(ISDestroy(&isarray[i]));
8976: }
8977: if (nisneu) {
8978: PetscCall(PCBDDCSetNeumannBoundaries(pc_temp, isarray[nisdofs]));
8979: PetscCall(ISDestroy(&isarray[nisdofs]));
8980: }
8981: if (nisvert) {
8982: PetscCall(PCBDDCSetPrimalVerticesIS(pc_temp, isarray[nis - 1]));
8983: PetscCall(ISDestroy(&isarray[nis - 1]));
8984: }
8985: if (coarseG) PetscCall(PCBDDCSetDiscreteGradient(pc_temp, coarseG, 1, nedcfield, PETSC_FALSE, PETSC_TRUE));
8987: /* get some info after set from options */
8988: PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp, PCBDDC, &isbddc));
8990: /* multilevel can only be requested via -pc_bddc_levels or PCBDDCSetLevels */
8991: if (isbddc && !multilevel_allowed) PetscCall(PCSetType(pc_temp, coarse_pc_type));
8992: /* multilevel cannot be done with coarse PC different from BDDC, NN, HPDDM, unless forced to */
8993: force = PETSC_FALSE;
8994: PetscCall(PetscOptionsGetBool(NULL, ((PetscObject)pc_temp)->prefix, "-pc_type_forced", &force, NULL));
8995: PetscCall(PetscObjectTypeCompareAny((PetscObject)pc_temp, &valid, PCBDDC, PCNN, PCHPDDM, ""));
8996: if (multilevel_requested && multilevel_allowed && !valid && !force) PetscCall(PCSetType(pc_temp, PCBDDC));
8997: PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp, PCREDUNDANT, &isredundant));
8998: if (isredundant) {
8999: KSP inner_ksp;
9000: PC inner_pc;
9002: PetscCall(PCRedundantGetKSP(pc_temp, &inner_ksp));
9003: PetscCall(KSPGetPC(inner_ksp, &inner_pc));
9004: }
9006: /* parameters which miss an API */
9007: PetscCall(PetscObjectTypeCompare((PetscObject)pc_temp, PCBDDC, &isbddc));
9008: if (isbddc) {
9009: PC_BDDC *pcbddc_coarse = (PC_BDDC *)pc_temp->data;
9011: pcbddc_coarse->detect_disconnected = PETSC_TRUE;
9012: pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc;
9013: pcbddc_coarse->coarse_eqs_limit = pcbddc->coarse_eqs_limit;
9014: pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null;
9015: if (pcbddc_coarse->benign_saddle_point) {
9016: Mat coarsedivudotp_is;
9017: ISLocalToGlobalMapping l2gmap, rl2g, cl2g;
9018: IS row, col;
9019: const PetscInt *gidxs;
9020: PetscInt n, st, M, N;
9022: PetscCall(MatGetSize(coarsedivudotp, &n, NULL));
9023: PetscCallMPI(MPI_Scan(&n, &st, 1, MPIU_INT, MPI_SUM, PetscObjectComm((PetscObject)coarse_mat)));
9024: st = st - n;
9025: PetscCall(ISCreateStride(PetscObjectComm((PetscObject)coarse_mat), 1, st, 1, &row));
9026: PetscCall(MatISGetLocalToGlobalMapping(coarse_mat, &l2gmap, NULL));
9027: PetscCall(ISLocalToGlobalMappingGetSize(l2gmap, &n));
9028: PetscCall(ISLocalToGlobalMappingGetIndices(l2gmap, &gidxs));
9029: PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat), n, gidxs, PETSC_COPY_VALUES, &col));
9030: PetscCall(ISLocalToGlobalMappingRestoreIndices(l2gmap, &gidxs));
9031: PetscCall(ISLocalToGlobalMappingCreateIS(row, &rl2g));
9032: PetscCall(ISLocalToGlobalMappingCreateIS(col, &cl2g));
9033: PetscCall(ISGetSize(row, &M));
9034: PetscCall(MatGetSize(coarse_mat, &N, NULL));
9035: PetscCall(ISDestroy(&row));
9036: PetscCall(ISDestroy(&col));
9037: PetscCall(MatCreate(PetscObjectComm((PetscObject)coarse_mat), &coarsedivudotp_is));
9038: PetscCall(MatSetType(coarsedivudotp_is, MATIS));
9039: PetscCall(MatSetSizes(coarsedivudotp_is, PETSC_DECIDE, PETSC_DECIDE, M, N));
9040: PetscCall(MatSetLocalToGlobalMapping(coarsedivudotp_is, rl2g, cl2g));
9041: PetscCall(ISLocalToGlobalMappingDestroy(&rl2g));
9042: PetscCall(ISLocalToGlobalMappingDestroy(&cl2g));
9043: PetscCall(MatISSetLocalMat(coarsedivudotp_is, coarsedivudotp));
9044: PetscCall(MatDestroy(&coarsedivudotp));
9045: PetscCall(PCBDDCSetDivergenceMat(pc_temp, coarsedivudotp_is, PETSC_FALSE, NULL));
9046: PetscCall(MatDestroy(&coarsedivudotp_is));
9047: pcbddc_coarse->adaptive_userdefined = PETSC_TRUE;
9048: if (pcbddc->adaptive_threshold[0] == 0.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE;
9049: }
9050: }
9052: /* propagate symmetry info of coarse matrix */
9053: PetscCall(MatSetOption(coarse_mat, MAT_STRUCTURALLY_SYMMETRIC, PETSC_TRUE));
9054: PetscCall(MatIsSymmetricKnown(pc->pmat, &isset, &issym));
9055: if (isset) PetscCall(MatSetOption(coarse_mat, MAT_SYMMETRIC, issym));
9056: PetscCall(MatIsHermitianKnown(pc->pmat, &isset, &isher));
9057: if (isset) PetscCall(MatSetOption(coarse_mat, MAT_HERMITIAN, isher));
9058: PetscCall(MatIsSPDKnown(pc->pmat, &isset, &isspd));
9059: if (isset) PetscCall(MatSetOption(coarse_mat, MAT_SPD, isspd));
9061: if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) PetscCall(MatSetOption(coarse_mat, MAT_SPD, PETSC_TRUE));
9062: /* set operators */
9063: PetscCall(MatViewFromOptions(coarse_mat, (PetscObject)pc, "-pc_bddc_coarse_mat_view"));
9064: PetscCall(MatSetOptionsPrefix(coarse_mat, ((PetscObject)pcbddc->coarse_ksp)->prefix));
9065: PetscCall(KSPSetOperators(pcbddc->coarse_ksp, coarse_mat, coarse_mat));
9066: if (pcbddc->dbg_flag) PetscCall(PetscViewerASCIISubtractTab(dbg_viewer, 2 * pcbddc->current_level));
9067: }
9068: PetscCall(MatDestroy(&coarseG));
9069: PetscCall(PetscFree(isarray));
9070: #if 0
9071: {
9072: PetscViewer viewer;
9073: char filename[256];
9074: PetscCall(PetscSNPrintf(filename, PETSC_STATIC_ARRAY_LENGTH(filename), "coarse_mat_level%d.m",pcbddc->current_level));
9075: PetscCall(PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer));
9076: PetscCall(PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB));
9077: PetscCall(MatView(coarse_mat,viewer));
9078: PetscCall(PetscViewerPopFormat(viewer));
9079: PetscCall(PetscViewerDestroy(&viewer));
9080: }
9081: #endif
9083: if (corners) {
9084: Vec gv;
9085: IS is;
9086: const PetscInt *idxs;
9087: PetscInt i, d, N, n, cdim = pcbddc->mat_graph->cdim;
9088: PetscScalar *coords;
9090: PetscCheck(pcbddc->mat_graph->cloc, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Missing local coordinates");
9091: PetscCall(VecGetSize(pcbddc->coarse_vec, &N));
9092: PetscCall(VecGetLocalSize(pcbddc->coarse_vec, &n));
9093: PetscCall(VecCreate(PetscObjectComm((PetscObject)pcbddc->coarse_vec), &gv));
9094: PetscCall(VecSetBlockSize(gv, cdim));
9095: PetscCall(VecSetSizes(gv, n * cdim, N * cdim));
9096: PetscCall(VecSetType(gv, VECSTANDARD));
9097: PetscCall(VecSetFromOptions(gv));
9098: PetscCall(VecSet(gv, PETSC_MAX_REAL)); /* we only propagate coordinates from vertices constraints */
9100: PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &is));
9101: PetscCall(ISGetLocalSize(is, &n));
9102: PetscCall(ISGetIndices(is, &idxs));
9103: PetscCall(PetscMalloc1(n * cdim, &coords));
9104: for (i = 0; i < n; i++) {
9105: for (d = 0; d < cdim; d++) coords[cdim * i + d] = pcbddc->mat_graph->coords[cdim * idxs[i] + d];
9106: }
9107: PetscCall(ISRestoreIndices(is, &idxs));
9108: PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &is));
9110: PetscCall(ISGetLocalSize(corners, &n));
9111: PetscCall(ISGetIndices(corners, &idxs));
9112: PetscCall(VecSetValuesBlocked(gv, n, idxs, coords, INSERT_VALUES));
9113: PetscCall(ISRestoreIndices(corners, &idxs));
9114: PetscCall(PetscFree(coords));
9115: PetscCall(VecAssemblyBegin(gv));
9116: PetscCall(VecAssemblyEnd(gv));
9117: PetscCall(VecGetArray(gv, &coords));
9118: if (pcbddc->coarse_ksp) {
9119: PC coarse_pc;
9120: PetscBool isbddc;
9122: PetscCall(KSPGetPC(pcbddc->coarse_ksp, &coarse_pc));
9123: PetscCall(PetscObjectTypeCompare((PetscObject)coarse_pc, PCBDDC, &isbddc));
9124: if (isbddc) { /* coarse coordinates have PETSC_MAX_REAL, specific for BDDC */
9125: PetscReal *realcoords;
9127: PetscCall(VecGetLocalSize(gv, &n));
9128: #if defined(PETSC_USE_COMPLEX)
9129: PetscCall(PetscMalloc1(n, &realcoords));
9130: for (i = 0; i < n; i++) realcoords[i] = PetscRealPart(coords[i]);
9131: #else
9132: realcoords = coords;
9133: #endif
9134: PetscCall(PCSetCoordinates(coarse_pc, cdim, n / cdim, realcoords));
9135: #if defined(PETSC_USE_COMPLEX)
9136: PetscCall(PetscFree(realcoords));
9137: #endif
9138: }
9139: }
9140: PetscCall(VecRestoreArray(gv, &coords));
9141: PetscCall(VecDestroy(&gv));
9142: }
9143: PetscCall(ISDestroy(&corners));
9145: if (pcbddc->coarse_ksp) {
9146: Vec crhs, csol;
9148: PetscCall(KSPGetSolution(pcbddc->coarse_ksp, &csol));
9149: PetscCall(KSPGetRhs(pcbddc->coarse_ksp, &crhs));
9150: if (!csol) PetscCall(MatCreateVecs(coarse_mat, &pcbddc->coarse_ksp->vec_sol, NULL));
9151: if (!crhs) PetscCall(MatCreateVecs(coarse_mat, NULL, &pcbddc->coarse_ksp->vec_rhs));
9152: }
9153: PetscCall(MatDestroy(&coarsedivudotp));
9155: /* compute null space for coarse solver if the benign trick has been requested */
9156: if (pcbddc->benign_null) {
9157: PetscCall(VecSet(pcbddc->vec1_P, 0.));
9158: for (i = 0; i < pcbddc->benign_n; i++) PetscCall(VecSetValue(pcbddc->vec1_P, pcbddc->local_primal_size - pcbddc->benign_n + i, 1.0, INSERT_VALUES));
9159: PetscCall(VecAssemblyBegin(pcbddc->vec1_P));
9160: PetscCall(VecAssemblyEnd(pcbddc->vec1_P));
9161: PetscCall(VecScatterBegin(pcbddc->coarse_loc_to_glob, pcbddc->vec1_P, pcbddc->coarse_vec, INSERT_VALUES, SCATTER_FORWARD));
9162: PetscCall(VecScatterEnd(pcbddc->coarse_loc_to_glob, pcbddc->vec1_P, pcbddc->coarse_vec, INSERT_VALUES, SCATTER_FORWARD));
9163: if (coarse_mat) {
9164: Vec nullv;
9165: PetscScalar *array, *array2;
9166: PetscInt nl;
9168: PetscCall(MatCreateVecs(coarse_mat, &nullv, NULL));
9169: PetscCall(VecGetLocalSize(nullv, &nl));
9170: PetscCall(VecGetArrayRead(pcbddc->coarse_vec, (const PetscScalar **)&array));
9171: PetscCall(VecGetArray(nullv, &array2));
9172: PetscCall(PetscArraycpy(array2, array, nl));
9173: PetscCall(VecRestoreArray(nullv, &array2));
9174: PetscCall(VecRestoreArrayRead(pcbddc->coarse_vec, (const PetscScalar **)&array));
9175: PetscCall(VecNormalize(nullv, NULL));
9176: PetscCall(MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat), PETSC_FALSE, 1, &nullv, &CoarseNullSpace));
9177: PetscCall(VecDestroy(&nullv));
9178: }
9179: }
9180: PetscCall(PetscLogEventEnd(PC_BDDC_CoarseSetUp[pcbddc->current_level], pc, 0, 0, 0));
9182: PetscCall(PetscLogEventBegin(PC_BDDC_CoarseSolver[pcbddc->current_level], pc, 0, 0, 0));
9183: if (pcbddc->coarse_ksp) {
9184: PetscBool ispreonly;
9186: if (CoarseNullSpace) {
9187: PetscBool isnull;
9189: PetscCall(MatNullSpaceTest(CoarseNullSpace, coarse_mat, &isnull));
9190: if (isnull) PetscCall(MatSetNullSpace(coarse_mat, CoarseNullSpace));
9191: /* TODO: add local nullspaces (if any) */
9192: }
9193: /* setup coarse ksp */
9194: PetscCall(KSPSetUp(pcbddc->coarse_ksp));
9195: /* Check coarse problem if in debug mode or if solving with an iterative method */
9196: PetscCall(PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp, KSPPREONLY, &ispreonly));
9197: if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates)) {
9198: KSP check_ksp;
9199: KSPType check_ksp_type;
9200: PC check_pc;
9201: Vec check_vec, coarse_vec;
9202: PetscReal abs_infty_error, infty_error, lambda_min = 1.0, lambda_max = 1.0;
9203: PetscInt its;
9204: PetscBool compute_eigs;
9205: PetscReal *eigs_r, *eigs_c;
9206: PetscInt neigs;
9207: const char *prefix;
9209: /* Create ksp object suitable for estimation of extreme eigenvalues */
9210: PetscCall(KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp), &check_ksp));
9211: PetscCall(KSPSetNestLevel(check_ksp, pc->kspnestlevel));
9212: PetscCall(PetscObjectIncrementTabLevel((PetscObject)check_ksp, (PetscObject)pcbddc->coarse_ksp, 0));
9213: PetscCall(KSPSetErrorIfNotConverged(pcbddc->coarse_ksp, PETSC_FALSE));
9214: PetscCall(KSPSetOperators(check_ksp, coarse_mat, coarse_mat));
9215: PetscCall(KSPSetTolerances(check_ksp, 1.e-12, 1.e-12, PETSC_CURRENT, pcbddc->coarse_size));
9216: /* prevent from setup unneeded object */
9217: PetscCall(KSPGetPC(check_ksp, &check_pc));
9218: PetscCall(PCSetType(check_pc, PCNONE));
9219: if (ispreonly) {
9220: check_ksp_type = KSPPREONLY;
9221: compute_eigs = PETSC_FALSE;
9222: } else {
9223: check_ksp_type = KSPGMRES;
9224: compute_eigs = PETSC_TRUE;
9225: }
9226: PetscCall(KSPSetType(check_ksp, check_ksp_type));
9227: PetscCall(KSPSetComputeSingularValues(check_ksp, compute_eigs));
9228: PetscCall(KSPSetComputeEigenvalues(check_ksp, compute_eigs));
9229: PetscCall(KSPGMRESSetRestart(check_ksp, pcbddc->coarse_size + 1));
9230: PetscCall(KSPGetOptionsPrefix(pcbddc->coarse_ksp, &prefix));
9231: PetscCall(KSPSetOptionsPrefix(check_ksp, prefix));
9232: PetscCall(KSPAppendOptionsPrefix(check_ksp, "check_"));
9233: PetscCall(KSPSetFromOptions(check_ksp));
9234: PetscCall(KSPSetUp(check_ksp));
9235: PetscCall(KSPGetPC(pcbddc->coarse_ksp, &check_pc));
9236: PetscCall(KSPSetPC(check_ksp, check_pc));
9237: /* create random vec */
9238: PetscCall(MatCreateVecs(coarse_mat, &coarse_vec, &check_vec));
9239: PetscCall(VecSetRandom(check_vec, NULL));
9240: PetscCall(MatMult(coarse_mat, check_vec, coarse_vec));
9241: /* solve coarse problem */
9242: PetscCall(KSPSolve(check_ksp, coarse_vec, coarse_vec));
9243: PetscCall(KSPCheckSolve(check_ksp, pc, coarse_vec));
9244: /* set eigenvalue estimation if preonly has not been requested */
9245: if (compute_eigs) {
9246: PetscCall(PetscMalloc1(pcbddc->coarse_size + 1, &eigs_r));
9247: PetscCall(PetscMalloc1(pcbddc->coarse_size + 1, &eigs_c));
9248: PetscCall(KSPComputeEigenvalues(check_ksp, pcbddc->coarse_size + 1, eigs_r, eigs_c, &neigs));
9249: if (neigs) {
9250: lambda_max = eigs_r[neigs - 1];
9251: lambda_min = eigs_r[0];
9252: if (pcbddc->use_coarse_estimates) {
9253: if (lambda_max >= lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */
9254: PetscCall(KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp, lambda_max + PETSC_SMALL, lambda_min));
9255: PetscCall(KSPRichardsonSetScale(pcbddc->coarse_ksp, 2.0 / (lambda_max + lambda_min)));
9256: }
9257: }
9258: }
9259: }
9261: /* check coarse problem residual error */
9262: if (pcbddc->dbg_flag) {
9263: PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp));
9264: PetscCall(PetscViewerASCIIAddTab(dbg_viewer, 2 * (pcbddc->current_level + 1)));
9265: PetscCall(VecAXPY(check_vec, -1.0, coarse_vec));
9266: PetscCall(VecNorm(check_vec, NORM_INFINITY, &infty_error));
9267: PetscCall(MatMult(coarse_mat, check_vec, coarse_vec));
9268: PetscCall(VecNorm(coarse_vec, NORM_INFINITY, &abs_infty_error));
9269: PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem details (use estimates %d)\n", pcbddc->use_coarse_estimates));
9270: PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)pcbddc->coarse_ksp, dbg_viewer));
9271: PetscCall(PetscObjectPrintClassNamePrefixType((PetscObject)check_pc, dbg_viewer));
9272: PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem exact infty_error : %1.6e\n", (double)infty_error));
9273: PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem residual infty_error: %1.6e\n", (double)abs_infty_error));
9274: if (CoarseNullSpace) PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem is singular\n"));
9275: if (compute_eigs) {
9276: PetscReal lambda_max_s, lambda_min_s;
9277: KSPConvergedReason reason;
9278: PetscCall(KSPGetType(check_ksp, &check_ksp_type));
9279: PetscCall(KSPGetIterationNumber(check_ksp, &its));
9280: PetscCall(KSPGetConvergedReason(check_ksp, &reason));
9281: PetscCall(KSPComputeExtremeSingularValues(check_ksp, &lambda_max_s, &lambda_min_s));
9282: PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "Coarse problem eigenvalues (estimated with %" PetscInt_FMT " iterations of %s, conv reason %d): %1.6e %1.6e (%1.6e %1.6e)\n", its, check_ksp_type, reason, (double)lambda_min, (double)lambda_max, (double)lambda_min_s, (double)lambda_max_s));
9283: for (i = 0; i < neigs; i++) PetscCall(PetscViewerASCIIPrintf(dbg_viewer, "%1.6e %1.6ei\n", (double)eigs_r[i], (double)eigs_c[i]));
9284: }
9285: PetscCall(PetscViewerFlush(dbg_viewer));
9286: PetscCall(PetscViewerASCIISubtractTab(dbg_viewer, 2 * (pcbddc->current_level + 1)));
9287: }
9288: PetscCall(VecDestroy(&check_vec));
9289: PetscCall(VecDestroy(&coarse_vec));
9290: PetscCall(KSPDestroy(&check_ksp));
9291: if (compute_eigs) {
9292: PetscCall(PetscFree(eigs_r));
9293: PetscCall(PetscFree(eigs_c));
9294: }
9295: }
9296: }
9297: PetscCall(MatNullSpaceDestroy(&CoarseNullSpace));
9298: /* print additional info */
9299: if (pcbddc->dbg_flag) {
9300: /* waits until all processes reaches this point */
9301: PetscCall(PetscBarrier((PetscObject)pc));
9302: PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Coarse solver setup completed at level %" PetscInt_FMT "\n", pcbddc->current_level));
9303: PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
9304: }
9306: /* free memory */
9307: PetscCall(MatDestroy(&coarse_mat));
9308: PetscCall(PetscLogEventEnd(PC_BDDC_CoarseSolver[pcbddc->current_level], pc, 0, 0, 0));
9309: PetscFunctionReturn(PETSC_SUCCESS);
9310: }
9312: PetscErrorCode PCBDDCComputePrimalNumbering(PC pc, PetscInt *coarse_size_n, PetscInt **local_primal_indices_n)
9313: {
9314: PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
9315: PC_IS *pcis = (PC_IS *)pc->data;
9316: IS subset, subset_mult, subset_n;
9317: PetscInt local_size, coarse_size = 0;
9318: PetscInt *local_primal_indices = NULL;
9319: const PetscInt *t_local_primal_indices;
9321: PetscFunctionBegin;
9322: /* Compute global number of coarse dofs */
9323: PetscCheck(!pcbddc->local_primal_size || pcbddc->local_primal_ref_node, PETSC_COMM_SELF, PETSC_ERR_PLIB, "BDDC ConstraintsSetUp should be called first");
9324: PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc->pmat), pcbddc->local_primal_size_cc, pcbddc->local_primal_ref_node, PETSC_COPY_VALUES, &subset_n));
9325: PetscCall(ISLocalToGlobalMappingApplyIS(pcis->mapping, subset_n, &subset));
9326: PetscCall(ISDestroy(&subset_n));
9327: PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc->pmat), pcbddc->local_primal_size_cc, pcbddc->local_primal_ref_mult, PETSC_COPY_VALUES, &subset_mult));
9328: PetscCall(ISRenumber(subset, subset_mult, &coarse_size, &subset_n));
9329: PetscCall(ISDestroy(&subset));
9330: PetscCall(ISDestroy(&subset_mult));
9331: PetscCall(ISGetLocalSize(subset_n, &local_size));
9332: PetscCheck(local_size == pcbddc->local_primal_size, PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of local primal indices computed %" PetscInt_FMT " != %" PetscInt_FMT, local_size, pcbddc->local_primal_size);
9333: PetscCall(PetscMalloc1(local_size, &local_primal_indices));
9334: PetscCall(ISGetIndices(subset_n, &t_local_primal_indices));
9335: PetscCall(PetscArraycpy(local_primal_indices, t_local_primal_indices, local_size));
9336: PetscCall(ISRestoreIndices(subset_n, &t_local_primal_indices));
9337: PetscCall(ISDestroy(&subset_n));
9339: if (pcbddc->dbg_flag) {
9340: PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
9341: PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "--------------------------------------------------\n"));
9342: PetscCall(PetscViewerASCIIPrintf(pcbddc->dbg_viewer, "Size of coarse problem is %" PetscInt_FMT "\n", coarse_size));
9343: PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
9344: }
9346: /* get back data */
9347: *coarse_size_n = coarse_size;
9348: *local_primal_indices_n = local_primal_indices;
9349: PetscFunctionReturn(PETSC_SUCCESS);
9350: }
9352: PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx, Vec gwork, Vec lwork, IS globalis, IS *localis)
9353: {
9354: IS localis_t;
9355: PetscInt i, lsize, *idxs, n;
9356: PetscScalar *vals;
9358: PetscFunctionBegin;
9359: /* get indices in local ordering exploiting local to global map */
9360: PetscCall(ISGetLocalSize(globalis, &lsize));
9361: PetscCall(PetscMalloc1(lsize, &vals));
9362: for (i = 0; i < lsize; i++) vals[i] = 1.0;
9363: PetscCall(ISGetIndices(globalis, (const PetscInt **)&idxs));
9364: PetscCall(VecSet(gwork, 0.0));
9365: PetscCall(VecSet(lwork, 0.0));
9366: if (idxs) { /* multilevel guard */
9367: PetscCall(VecSetOption(gwork, VEC_IGNORE_NEGATIVE_INDICES, PETSC_TRUE));
9368: PetscCall(VecSetValues(gwork, lsize, idxs, vals, INSERT_VALUES));
9369: }
9370: PetscCall(VecAssemblyBegin(gwork));
9371: PetscCall(ISRestoreIndices(globalis, (const PetscInt **)&idxs));
9372: PetscCall(PetscFree(vals));
9373: PetscCall(VecAssemblyEnd(gwork));
9374: /* now compute set in local ordering */
9375: PetscCall(VecScatterBegin(g2l_ctx, gwork, lwork, INSERT_VALUES, SCATTER_FORWARD));
9376: PetscCall(VecScatterEnd(g2l_ctx, gwork, lwork, INSERT_VALUES, SCATTER_FORWARD));
9377: PetscCall(VecGetArrayRead(lwork, (const PetscScalar **)&vals));
9378: PetscCall(VecGetSize(lwork, &n));
9379: for (i = 0, lsize = 0; i < n; i++) {
9380: if (PetscRealPart(vals[i]) > 0.5) lsize++;
9381: }
9382: PetscCall(PetscMalloc1(lsize, &idxs));
9383: for (i = 0, lsize = 0; i < n; i++) {
9384: if (PetscRealPart(vals[i]) > 0.5) idxs[lsize++] = i;
9385: }
9386: PetscCall(VecRestoreArrayRead(lwork, (const PetscScalar **)&vals));
9387: PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)gwork), lsize, idxs, PETSC_OWN_POINTER, &localis_t));
9388: *localis = localis_t;
9389: PetscFunctionReturn(PETSC_SUCCESS);
9390: }
9392: PetscErrorCode PCBDDCComputeFakeChange(PC pc, PetscBool constraints, PCBDDCGraph graph, PCBDDCSubSchurs schurs, Mat *change, IS *change_primal, IS *change_primal_mult, PetscBool *change_with_qr)
9393: {
9394: PC_IS *pcis = (PC_IS *)pc->data;
9395: PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
9396: PC_IS *pcisf;
9397: PC_BDDC *pcbddcf;
9398: PC pcf;
9400: PetscFunctionBegin;
9401: PetscCall(PCCreate(PetscObjectComm((PetscObject)pc), &pcf));
9402: PetscCall(PCSetOperators(pcf, pc->mat, pc->pmat));
9403: PetscCall(PCSetType(pcf, PCBDDC));
9405: pcisf = (PC_IS *)pcf->data;
9406: pcbddcf = (PC_BDDC *)pcf->data;
9408: pcisf->is_B_local = pcis->is_B_local;
9409: pcisf->vec1_N = pcis->vec1_N;
9410: pcisf->BtoNmap = pcis->BtoNmap;
9411: pcisf->n = pcis->n;
9412: pcisf->n_B = pcis->n_B;
9414: PetscCall(PetscFree(pcbddcf->mat_graph));
9415: PetscCall(PetscFree(pcbddcf->sub_schurs));
9416: pcbddcf->mat_graph = graph ? graph : pcbddc->mat_graph;
9417: pcbddcf->sub_schurs = schurs;
9418: pcbddcf->adaptive_selection = schurs ? PETSC_TRUE : PETSC_FALSE;
9419: pcbddcf->adaptive_threshold[0] = pcbddc->adaptive_threshold[0];
9420: pcbddcf->adaptive_threshold[1] = pcbddc->adaptive_threshold[1];
9421: pcbddcf->adaptive_nmin = pcbddc->adaptive_nmin;
9422: pcbddcf->adaptive_nmax = pcbddc->adaptive_nmax;
9423: pcbddcf->use_faces = PETSC_TRUE;
9424: pcbddcf->use_change_of_basis = (PetscBool)!constraints;
9425: pcbddcf->use_change_on_faces = (PetscBool)!constraints;
9426: pcbddcf->use_qr_single = (PetscBool)!constraints;
9427: pcbddcf->fake_change = PETSC_TRUE;
9428: pcbddcf->dbg_flag = pcbddc->dbg_flag;
9430: PetscCall(PCBDDCAdaptiveSelection(pcf));
9431: PetscCall(PCBDDCConstraintsSetUp(pcf));
9433: *change = pcbddcf->ConstraintMatrix;
9434: if (change_primal) PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc->pmat), pcbddcf->local_primal_size_cc, pcbddcf->local_primal_ref_node, PETSC_COPY_VALUES, change_primal));
9435: if (change_primal_mult) PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc->pmat), pcbddcf->local_primal_size_cc, pcbddcf->local_primal_ref_mult, PETSC_COPY_VALUES, change_primal_mult));
9436: if (change_with_qr) *change_with_qr = pcbddcf->use_qr_single;
9438: if (schurs) pcbddcf->sub_schurs = NULL;
9439: pcbddcf->ConstraintMatrix = NULL;
9440: pcbddcf->mat_graph = NULL;
9441: pcisf->is_B_local = NULL;
9442: pcisf->vec1_N = NULL;
9443: pcisf->BtoNmap = NULL;
9444: PetscCall(PCDestroy(&pcf));
9445: PetscFunctionReturn(PETSC_SUCCESS);
9446: }
9448: PetscErrorCode PCBDDCSetUpSubSchurs(PC pc)
9449: {
9450: PC_IS *pcis = (PC_IS *)pc->data;
9451: PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
9452: PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs;
9453: Mat S_j;
9454: PetscInt *used_xadj, *used_adjncy;
9455: PetscBool free_used_adj;
9457: PetscFunctionBegin;
9458: PetscCall(PetscLogEventBegin(PC_BDDC_Schurs[pcbddc->current_level], pc, 0, 0, 0));
9459: /* decide the adjacency to be used for determining internal problems for local schur on subsets */
9460: free_used_adj = PETSC_FALSE;
9461: if (pcbddc->sub_schurs_layers == -1) {
9462: used_xadj = NULL;
9463: used_adjncy = NULL;
9464: } else {
9465: if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) {
9466: used_xadj = pcbddc->mat_graph->xadj;
9467: used_adjncy = pcbddc->mat_graph->adjncy;
9468: } else if (pcbddc->computed_rowadj) {
9469: used_xadj = pcbddc->mat_graph->xadj;
9470: used_adjncy = pcbddc->mat_graph->adjncy;
9471: } else {
9472: PetscBool flg_row = PETSC_FALSE;
9473: const PetscInt *xadj, *adjncy;
9474: PetscInt nvtxs;
9476: PetscCall(MatGetRowIJ(pcbddc->local_mat, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, &xadj, &adjncy, &flg_row));
9477: if (flg_row) {
9478: PetscCall(PetscMalloc2(nvtxs + 1, &used_xadj, xadj[nvtxs], &used_adjncy));
9479: PetscCall(PetscArraycpy(used_xadj, xadj, nvtxs + 1));
9480: PetscCall(PetscArraycpy(used_adjncy, adjncy, xadj[nvtxs]));
9481: free_used_adj = PETSC_TRUE;
9482: } else {
9483: pcbddc->sub_schurs_layers = -1;
9484: used_xadj = NULL;
9485: used_adjncy = NULL;
9486: }
9487: PetscCall(MatRestoreRowIJ(pcbddc->local_mat, 0, PETSC_TRUE, PETSC_FALSE, &nvtxs, &xadj, &adjncy, &flg_row));
9488: }
9489: }
9491: /* setup sub_schurs data */
9492: PetscCall(MatCreateSchurComplement(pcis->A_II, pcis->pA_II, pcis->A_IB, pcis->A_BI, pcis->A_BB, &S_j));
9493: if (!sub_schurs->schur_explicit) {
9494: /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */
9495: PetscCall(MatSchurComplementSetKSP(S_j, pcbddc->ksp_D));
9496: PetscCall(PCBDDCSubSchursSetUp(sub_schurs, NULL, S_j, PETSC_FALSE, used_xadj, used_adjncy, pcbddc->sub_schurs_layers, NULL, pcbddc->adaptive_selection, PETSC_FALSE, PETSC_FALSE, 0, NULL, NULL, NULL, NULL));
9497: } else {
9498: Mat change = NULL;
9499: Vec scaling = NULL;
9500: IS change_primal = NULL, iP;
9501: PetscInt benign_n;
9502: PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis;
9503: PetscBool need_change = PETSC_FALSE;
9504: PetscBool discrete_harmonic = PETSC_FALSE;
9506: if (!pcbddc->use_vertices && reuse_solvers) {
9507: PetscInt n_vertices;
9509: PetscCall(ISGetLocalSize(sub_schurs->is_vertices, &n_vertices));
9510: reuse_solvers = (PetscBool)!n_vertices;
9511: }
9512: if (!pcbddc->benign_change_explicit) {
9513: benign_n = pcbddc->benign_n;
9514: } else {
9515: benign_n = 0;
9516: }
9517: /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc.
9518: We need a global reduction to avoid possible deadlocks.
9519: We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */
9520: if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) {
9521: PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change);
9522: PetscCallMPI(MPIU_Allreduce(&have_loc_change, &need_change, 1, MPI_C_BOOL, MPI_LOR, PetscObjectComm((PetscObject)pc)));
9523: need_change = (PetscBool)(!need_change);
9524: }
9525: /* If the user defines additional constraints, we import them here */
9526: if (need_change) {
9527: PetscCheck(!pcbddc->sub_schurs_rebuild, PETSC_COMM_SELF, PETSC_ERR_SUP, "Cannot compute change of basis with a different graph");
9528: PetscCall(PCBDDCComputeFakeChange(pc, PETSC_FALSE, NULL, NULL, &change, &change_primal, NULL, &sub_schurs->change_with_qr));
9529: }
9530: if (!pcbddc->use_deluxe_scaling) scaling = pcis->D;
9532: PetscCall(PetscObjectQuery((PetscObject)pc, "__KSPFETIDP_iP", (PetscObject *)&iP));
9533: if (iP) {
9534: PetscOptionsBegin(PetscObjectComm((PetscObject)iP), sub_schurs->prefix, "BDDC sub_schurs options", "PC");
9535: PetscCall(PetscOptionsBool("-sub_schurs_discrete_harmonic", NULL, NULL, discrete_harmonic, &discrete_harmonic, NULL));
9536: PetscOptionsEnd();
9537: }
9538: if (discrete_harmonic) {
9539: Mat A;
9540: PetscCall(MatDuplicate(pcbddc->local_mat, MAT_COPY_VALUES, &A));
9541: PetscCall(MatZeroRowsColumnsIS(A, iP, 1.0, NULL, NULL));
9542: PetscCall(PetscObjectCompose((PetscObject)A, "__KSPFETIDP_iP", (PetscObject)iP));
9543: PetscCall(PCBDDCSubSchursSetUp(sub_schurs, A, S_j, pcbddc->sub_schurs_exact_schur, used_xadj, used_adjncy, pcbddc->sub_schurs_layers, scaling, pcbddc->adaptive_selection, reuse_solvers, pcbddc->benign_saddle_point, benign_n, pcbddc->benign_p0_lidx,
9544: pcbddc->benign_zerodiag_subs, change, change_primal));
9545: PetscCall(MatDestroy(&A));
9546: } else {
9547: PetscCall(PCBDDCSubSchursSetUp(sub_schurs, pcbddc->local_mat, S_j, pcbddc->sub_schurs_exact_schur, used_xadj, used_adjncy, pcbddc->sub_schurs_layers, scaling, pcbddc->adaptive_selection, reuse_solvers, pcbddc->benign_saddle_point, benign_n,
9548: pcbddc->benign_p0_lidx, pcbddc->benign_zerodiag_subs, change, change_primal));
9549: }
9550: PetscCall(MatDestroy(&change));
9551: PetscCall(ISDestroy(&change_primal));
9552: }
9553: PetscCall(MatDestroy(&S_j));
9555: /* free adjacency */
9556: if (free_used_adj) PetscCall(PetscFree2(used_xadj, used_adjncy));
9557: PetscCall(PetscLogEventEnd(PC_BDDC_Schurs[pcbddc->current_level], pc, 0, 0, 0));
9558: PetscFunctionReturn(PETSC_SUCCESS);
9559: }
9561: PetscErrorCode PCBDDCInitSubSchurs(PC pc)
9562: {
9563: PC_IS *pcis = (PC_IS *)pc->data;
9564: PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
9565: PCBDDCGraph graph;
9567: PetscFunctionBegin;
9568: /* attach interface graph for determining subsets */
9569: if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */
9570: IS verticesIS, verticescomm;
9571: PetscInt vsize, *idxs;
9573: PetscCall(PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &verticesIS));
9574: PetscCall(ISGetSize(verticesIS, &vsize));
9575: PetscCall(ISGetIndices(verticesIS, (const PetscInt **)&idxs));
9576: PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), vsize, idxs, PETSC_COPY_VALUES, &verticescomm));
9577: PetscCall(ISRestoreIndices(verticesIS, (const PetscInt **)&idxs));
9578: PetscCall(PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph, NULL, NULL, NULL, NULL, &verticesIS));
9579: PetscCall(PCBDDCGraphCreate(&graph));
9580: PetscCall(PCBDDCGraphInit(graph, pcbddc->mat_graph->l2gmap, pcbddc->mat_graph->nvtxs_global, pcbddc->graphmaxcount));
9581: PetscCall(PCBDDCGraphSetUp(graph, pcbddc->mat_graph->custom_minimal_size, NULL, pcbddc->DirichletBoundariesLocal, 0, NULL, verticescomm));
9582: PetscCall(ISDestroy(&verticescomm));
9583: PetscCall(PCBDDCGraphComputeConnectedComponents(graph));
9584: } else {
9585: graph = pcbddc->mat_graph;
9586: }
9587: /* print some info */
9588: if (pcbddc->dbg_flag && !pcbddc->sub_schurs_rebuild) {
9589: IS vertices;
9590: PetscInt nv, nedges, nfaces;
9591: PetscCall(PCBDDCGraphASCIIView(graph, pcbddc->dbg_flag, pcbddc->dbg_viewer));
9592: PetscCall(PCBDDCGraphGetCandidatesIS(graph, &nfaces, NULL, &nedges, NULL, &vertices));
9593: PetscCall(ISGetSize(vertices, &nv));
9594: PetscCall(PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer));
9595: PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "--------------------------------------------------------------\n"));
9596: PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate vertices (%d)\n", PetscGlobalRank, nv, pcbddc->use_vertices));
9597: PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate edges (%d)\n", PetscGlobalRank, nedges, pcbddc->use_edges));
9598: PetscCall(PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer, "Subdomain %04d got %02" PetscInt_FMT " local candidate faces (%d)\n", PetscGlobalRank, nfaces, pcbddc->use_faces));
9599: PetscCall(PetscViewerFlush(pcbddc->dbg_viewer));
9600: PetscCall(PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer));
9601: PetscCall(PCBDDCGraphRestoreCandidatesIS(graph, &nfaces, NULL, &nedges, NULL, &vertices));
9602: }
9604: /* sub_schurs init */
9605: if (!pcbddc->sub_schurs) PetscCall(PCBDDCSubSchursCreate(&pcbddc->sub_schurs));
9606: PetscCall(PCBDDCSubSchursInit(pcbddc->sub_schurs, ((PetscObject)pc)->prefix, pcis->is_I_local, pcis->is_B_local, graph, pcis->BtoNmap, pcbddc->sub_schurs_rebuild, PETSC_FALSE));
9608: /* free graph struct */
9609: if (pcbddc->sub_schurs_rebuild) PetscCall(PCBDDCGraphDestroy(&graph));
9610: PetscFunctionReturn(PETSC_SUCCESS);
9611: }
9613: static PetscErrorCode PCBDDCViewGlobalIS(PC pc, IS is, PetscViewer viewer)
9614: {
9615: Mat_IS *matis = (Mat_IS *)pc->pmat->data;
9616: PetscInt n = pc->pmat->rmap->n, ln, ni, st;
9617: const PetscInt *idxs;
9618: IS gis;
9620: PetscFunctionBegin;
9621: if (!is) PetscFunctionReturn(PETSC_SUCCESS);
9622: PetscCall(MatGetOwnershipRange(pc->pmat, &st, NULL));
9623: PetscCall(MatGetLocalSize(matis->A, NULL, &ln));
9624: PetscCall(PetscArrayzero(matis->sf_leafdata, ln));
9625: PetscCall(PetscArrayzero(matis->sf_rootdata, n));
9626: PetscCall(ISGetLocalSize(is, &ni));
9627: PetscCall(ISGetIndices(is, &idxs));
9628: for (PetscInt i = 0; i < ni; i++) {
9629: if (idxs[i] < 0 || idxs[i] >= ln) continue;
9630: matis->sf_leafdata[idxs[i]] = 1;
9631: }
9632: PetscCall(ISRestoreIndices(is, &idxs));
9633: PetscCall(PetscSFReduceBegin(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, MPI_SUM));
9634: PetscCall(PetscSFReduceEnd(matis->sf, MPIU_INT, matis->sf_leafdata, matis->sf_rootdata, MPI_SUM));
9635: ln = 0;
9636: for (PetscInt i = 0; i < n; i++) {
9637: if (matis->sf_rootdata[i]) matis->sf_rootdata[ln++] = i + st;
9638: }
9639: PetscCall(ISCreateGeneral(PetscObjectComm((PetscObject)pc), ln, matis->sf_rootdata, PETSC_USE_POINTER, &gis));
9640: PetscCall(ISView(gis, viewer));
9641: PetscCall(ISDestroy(&gis));
9642: PetscFunctionReturn(PETSC_SUCCESS);
9643: }
9645: PetscErrorCode PCBDDCLoadOrViewCustomization(PC pc, PetscBool load, const char *outfile)
9646: {
9647: PetscInt header[11];
9648: PC_BDDC *pcbddc = (PC_BDDC *)pc->data;
9649: PetscViewer viewer;
9650: MPI_Comm comm = PetscObjectComm((PetscObject)pc);
9652: PetscFunctionBegin;
9653: PetscCall(PetscViewerBinaryOpen(comm, outfile ? outfile : "bddc_dump.dat", load ? FILE_MODE_READ : FILE_MODE_WRITE, &viewer));
9654: if (load) {
9655: IS is;
9656: Mat A;
9658: PetscCall(PetscViewerBinaryRead(viewer, header, PETSC_STATIC_ARRAY_LENGTH(header), NULL, PETSC_INT));
9659: PetscCheck(header[0] == 0 || header[0] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file");
9660: PetscCheck(header[1] == 0 || header[1] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file");
9661: PetscCheck(header[2] >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file");
9662: PetscCheck(header[3] == 0 || header[3] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file");
9663: PetscCheck(header[4] == 0 || header[4] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file");
9664: PetscCheck(header[5] >= 0, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file");
9665: PetscCheck(header[7] == 0 || header[7] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file");
9666: PetscCheck(header[8] == 0 || header[8] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file");
9667: PetscCheck(header[9] == 0 || header[9] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file");
9668: PetscCheck(header[10] == 0 || header[10] == 1, PETSC_COMM_SELF, PETSC_ERR_FILE_UNEXPECTED, "Not a BDDC dump next in file");
9669: if (header[0]) {
9670: PetscCall(ISCreate(comm, &is));
9671: PetscCall(ISLoad(is, viewer));
9672: PetscCall(PCBDDCSetDirichletBoundaries(pc, is));
9673: PetscCall(ISDestroy(&is));
9674: }
9675: if (header[1]) {
9676: PetscCall(ISCreate(comm, &is));
9677: PetscCall(ISLoad(is, viewer));
9678: PetscCall(PCBDDCSetNeumannBoundaries(pc, is));
9679: PetscCall(ISDestroy(&is));
9680: }
9681: if (header[2]) {
9682: IS *isarray;
9684: PetscCall(PetscMalloc1(header[2], &isarray));
9685: for (PetscInt i = 0; i < header[2]; i++) {
9686: PetscCall(ISCreate(comm, &isarray[i]));
9687: PetscCall(ISLoad(isarray[i], viewer));
9688: }
9689: PetscCall(PCBDDCSetDofsSplitting(pc, header[2], isarray));
9690: for (PetscInt i = 0; i < header[2]; i++) PetscCall(ISDestroy(&isarray[i]));
9691: PetscCall(PetscFree(isarray));
9692: }
9693: if (header[3]) {
9694: PetscCall(ISCreate(comm, &is));
9695: PetscCall(ISLoad(is, viewer));
9696: PetscCall(PCBDDCSetPrimalVerticesIS(pc, is));
9697: PetscCall(ISDestroy(&is));
9698: }
9699: if (header[4]) {
9700: PetscCall(MatCreate(comm, &A));
9701: PetscCall(MatSetType(A, MATAIJ));
9702: PetscCall(MatLoad(A, viewer));
9703: PetscCall(PCBDDCSetDiscreteGradient(pc, A, header[5], header[6], (PetscBool)header[7], (PetscBool)header[8]));
9704: PetscCall(MatDestroy(&A));
9705: }
9706: if (header[9]) {
9707: PetscCall(MatCreate(comm, &A));
9708: PetscCall(MatSetType(A, MATIS));
9709: PetscCall(MatLoad(A, viewer));
9710: PetscCall(PCBDDCSetDivergenceMat(pc, A, (PetscBool)header[10], NULL));
9711: PetscCall(MatDestroy(&A));
9712: }
9713: } else {
9714: header[0] = (PetscInt)!!pcbddc->DirichletBoundariesLocal;
9715: header[1] = (PetscInt)!!pcbddc->NeumannBoundariesLocal;
9716: header[2] = pcbddc->n_ISForDofsLocal;
9717: header[3] = (PetscInt)!!pcbddc->user_primal_vertices_local;
9718: header[4] = (PetscInt)!!pcbddc->discretegradient;
9719: header[5] = pcbddc->nedorder;
9720: header[6] = pcbddc->nedfield;
9721: header[7] = (PetscInt)pcbddc->nedglobal;
9722: header[8] = (PetscInt)pcbddc->conforming;
9723: header[9] = (PetscInt)!!pcbddc->divudotp;
9724: header[10] = (PetscInt)pcbddc->divudotp_trans;
9725: if (header[4]) header[3] = 0;
9727: PetscCall(PetscViewerBinaryWrite(viewer, header, PETSC_STATIC_ARRAY_LENGTH(header), PETSC_INT));
9728: PetscCall(PCBDDCViewGlobalIS(pc, pcbddc->DirichletBoundariesLocal, viewer));
9729: PetscCall(PCBDDCViewGlobalIS(pc, pcbddc->NeumannBoundariesLocal, viewer));
9730: for (PetscInt i = 0; i < header[2]; i++) PetscCall(PCBDDCViewGlobalIS(pc, pcbddc->ISForDofsLocal[i], viewer));
9731: if (header[3]) PetscCall(PCBDDCViewGlobalIS(pc, pcbddc->user_primal_vertices_local, viewer));
9732: if (header[4]) PetscCall(MatView(pcbddc->discretegradient, viewer));
9733: if (header[9]) PetscCall(MatView(pcbddc->divudotp, viewer));
9734: }
9735: PetscCall(PetscViewerDestroy(&viewer));
9736: PetscFunctionReturn(PETSC_SUCCESS);
9737: }
9739: #include <../src/mat/impls/aij/mpi/mpiaij.h>
9740: static PetscErrorCode MatMPIAIJRestrict(Mat A, MPI_Comm ccomm, Mat *B)
9741: {
9742: Mat At;
9743: IS rows;
9744: PetscInt rst, ren;
9745: PetscLayout rmap;
9747: PetscFunctionBegin;
9748: rst = ren = 0;
9749: if (ccomm != MPI_COMM_NULL) {
9750: PetscCall(PetscLayoutCreate(ccomm, &rmap));
9751: PetscCall(PetscLayoutSetSize(rmap, A->rmap->N));
9752: PetscCall(PetscLayoutSetBlockSize(rmap, 1));
9753: PetscCall(PetscLayoutSetUp(rmap));
9754: PetscCall(PetscLayoutGetRange(rmap, &rst, &ren));
9755: }
9756: PetscCall(ISCreateStride(PetscObjectComm((PetscObject)A), ren - rst, rst, 1, &rows));
9757: PetscCall(MatCreateSubMatrix(A, rows, NULL, MAT_INITIAL_MATRIX, &At));
9758: PetscCall(ISDestroy(&rows));
9760: if (ccomm != MPI_COMM_NULL) {
9761: Mat_MPIAIJ *a, *b;
9762: IS from, to;
9763: Vec gvec;
9764: PetscInt lsize;
9766: PetscCall(MatCreate(ccomm, B));
9767: PetscCall(MatSetSizes(*B, ren - rst, PETSC_DECIDE, PETSC_DECIDE, At->cmap->N));
9768: PetscCall(MatSetType(*B, MATAIJ));
9769: PetscCall(PetscLayoutDestroy(&(*B)->rmap));
9770: PetscCall(PetscLayoutSetUp((*B)->cmap));
9771: a = (Mat_MPIAIJ *)At->data;
9772: b = (Mat_MPIAIJ *)(*B)->data;
9773: PetscCallMPI(MPI_Comm_size(ccomm, &b->size));
9774: PetscCallMPI(MPI_Comm_rank(ccomm, &b->rank));
9775: PetscCall(PetscObjectReference((PetscObject)a->A));
9776: PetscCall(PetscObjectReference((PetscObject)a->B));
9777: b->A = a->A;
9778: b->B = a->B;
9780: b->donotstash = a->donotstash;
9781: b->roworiented = a->roworiented;
9782: b->rowindices = NULL;
9783: b->rowvalues = NULL;
9784: b->getrowactive = PETSC_FALSE;
9786: (*B)->rmap = rmap;
9787: (*B)->factortype = A->factortype;
9788: (*B)->assembled = PETSC_TRUE;
9789: (*B)->insertmode = NOT_SET_VALUES;
9790: (*B)->preallocated = PETSC_TRUE;
9792: if (a->colmap) {
9793: #if defined(PETSC_USE_CTABLE)
9794: PetscCall(PetscHMapIDuplicate(a->colmap, &b->colmap));
9795: #else
9796: PetscCall(PetscMalloc1(At->cmap->N, &b->colmap));
9797: PetscCall(PetscArraycpy(b->colmap, a->colmap, At->cmap->N));
9798: #endif
9799: } else b->colmap = NULL;
9800: if (a->garray) {
9801: PetscInt len;
9802: len = a->B->cmap->n;
9803: PetscCall(PetscMalloc1(len + 1, &b->garray));
9804: if (len) PetscCall(PetscArraycpy(b->garray, a->garray, len));
9805: } else b->garray = NULL;
9807: PetscCall(PetscObjectReference((PetscObject)a->lvec));
9808: b->lvec = a->lvec;
9810: /* cannot use VecScatterCopy */
9811: PetscCall(VecGetLocalSize(b->lvec, &lsize));
9812: PetscCall(ISCreateGeneral(ccomm, lsize, b->garray, PETSC_USE_POINTER, &from));
9813: PetscCall(ISCreateStride(PETSC_COMM_SELF, lsize, 0, 1, &to));
9814: PetscCall(MatCreateVecs(*B, &gvec, NULL));
9815: PetscCall(VecScatterCreate(gvec, from, b->lvec, to, &b->Mvctx));
9816: PetscCall(ISDestroy(&from));
9817: PetscCall(ISDestroy(&to));
9818: PetscCall(VecDestroy(&gvec));
9819: }
9820: PetscCall(MatDestroy(&At));
9821: PetscFunctionReturn(PETSC_SUCCESS);
9822: }
9824: /* same as MatCreateSubMatrix(A, rows, NULL,...) but allows repeated rows */
9825: static PetscErrorCode MatAIJExtractRows(Mat A, IS rows, Mat *sA)
9826: {
9827: PetscBool isaij;
9828: MPI_Comm comm;
9830: PetscFunctionBegin;
9831: PetscCall(PetscObjectGetComm((PetscObject)A, &comm));
9832: PetscCall(PetscObjectBaseTypeCompareAny((PetscObject)A, &isaij, MATSEQAIJ, MATMPIAIJ, ""));
9833: PetscCheck(isaij, comm, PETSC_ERR_SUP, "Not implemented");
9834: PetscCall(PetscObjectBaseTypeCompare((PetscObject)A, MATSEQAIJ, &isaij));
9835: if (isaij) { /* SeqAIJ supports repeated rows */
9836: PetscCall(MatCreateSubMatrix(A, rows, NULL, MAT_INITIAL_MATRIX, sA));
9837: } else {
9838: Mat A_loc;
9839: Mat_SeqAIJ *da;
9840: PetscSF sf;
9841: PetscInt ni, *di, *dj, m = A->rmap->n, c, *ldata, *rdata;
9842: PetscScalar *daa;
9843: const PetscInt *idxs;
9844: const PetscSFNode *iremotes;
9845: PetscSFNode *remotes;
9847: /* SF for incoming rows */
9848: PetscCall(PetscSFCreate(comm, &sf));
9849: PetscCall(ISGetLocalSize(rows, &ni));
9850: PetscCall(ISGetIndices(rows, &idxs));
9851: PetscCall(PetscSFSetGraphLayout(sf, A->rmap, ni, NULL, PETSC_USE_POINTER, idxs));
9852: PetscCall(ISRestoreIndices(rows, &idxs));
9854: PetscCall(MatMPIAIJGetLocalMat(A, MAT_INITIAL_MATRIX, &A_loc));
9855: da = (Mat_SeqAIJ *)A_loc->data;
9856: PetscCall(PetscMalloc2(2 * ni, &ldata, 2 * m, &rdata));
9857: for (PetscInt i = 0; i < m; i++) {
9858: rdata[2 * i + 0] = da->i[i + 1] - da->i[i];
9859: rdata[2 * i + 1] = da->i[i];
9860: }
9861: PetscCall(PetscSFBcastBegin(sf, MPIU_2INT, rdata, ldata, MPI_REPLACE));
9862: PetscCall(PetscSFBcastEnd(sf, MPIU_2INT, rdata, ldata, MPI_REPLACE));
9863: PetscCall(PetscMalloc1(ni + 1, &di));
9864: di[0] = 0;
9865: for (PetscInt i = 0; i < ni; i++) di[i + 1] = di[i] + ldata[2 * i + 0];
9866: PetscCall(PetscMalloc1(di[ni], &dj));
9867: PetscCall(PetscMalloc1(di[ni], &daa));
9868: PetscCall(PetscMalloc1(di[ni], &remotes));
9870: PetscCall(PetscSFGetGraph(sf, NULL, NULL, NULL, &iremotes));
9872: /* SF graph for nonzeros */
9873: c = 0;
9874: for (PetscInt i = 0; i < ni; i++) {
9875: const PetscInt rank = iremotes[i].rank;
9876: const PetscInt rsize = ldata[2 * i];
9877: for (PetscInt j = 0; j < rsize; j++) {
9878: remotes[c].rank = rank;
9879: remotes[c].index = ldata[2 * i + 1] + j;
9880: c++;
9881: }
9882: }
9883: PetscCheck(c == di[ni], PETSC_COMM_SELF, PETSC_ERR_PLIB, "Invalid number of local nonzeros %" PetscInt_FMT " != %" PetscInt_FMT, c, di[ni]);
9884: PetscCall(PetscSFSetGraph(sf, da->i[m], di[ni], NULL, PETSC_USE_POINTER, remotes, PETSC_USE_POINTER));
9885: PetscCall(PetscSFBcastBegin(sf, MPIU_INT, da->j, dj, MPI_REPLACE));
9886: PetscCall(PetscSFBcastEnd(sf, MPIU_INT, da->j, dj, MPI_REPLACE));
9887: PetscCall(PetscSFBcastBegin(sf, MPIU_SCALAR, da->a, daa, MPI_REPLACE));
9888: PetscCall(PetscSFBcastEnd(sf, MPIU_SCALAR, da->a, daa, MPI_REPLACE));
9890: PetscCall(MatCreateMPIAIJWithArrays(comm, ni, A->cmap->n, PETSC_DECIDE, A->cmap->N, di, dj, daa, sA));
9891: PetscCall(MatDestroy(&A_loc));
9892: PetscCall(PetscSFDestroy(&sf));
9893: PetscCall(PetscFree(di));
9894: PetscCall(PetscFree(dj));
9895: PetscCall(PetscFree(daa));
9896: PetscCall(PetscFree(remotes));
9897: PetscCall(PetscFree2(ldata, rdata));
9898: }
9899: PetscFunctionReturn(PETSC_SUCCESS);
9900: }